1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 Label done;
1708 __ movl(dst, -1);
1709 __ jcc(Assembler::parity, done);
1710 __ jcc(Assembler::below, done);
1711 __ setcc(Assembler::notEqual, dst);
1712 __ bind(done);
1713 }
1714
1715 // Math.min() # Math.max()
1716 // --------------------------
1717 // ucomis[s/d] #
1718 // ja -> b # a
1719 // jp -> NaN # NaN
1720 // jb -> a # b
1721 // je #
1722 // |-jz -> a | b # a & b
1723 // | -> a #
1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1725 XMMRegister a, XMMRegister b,
1726 XMMRegister xmmt, Register rt,
1727 bool min, bool single) {
1728
1729 Label nan, zero, below, above, done;
1730
1731 if (single)
1732 __ ucomiss(a, b);
1733 else
1734 __ ucomisd(a, b);
1735
1736 if (dst->encoding() != (min ? b : a)->encoding())
1737 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1738 else
1739 __ jccb(Assembler::above, done);
1740
1741 __ jccb(Assembler::parity, nan); // PF=1
1742 __ jccb(Assembler::below, below); // CF=1
1743
1744 // equal
1745 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1746 if (single) {
1747 __ ucomiss(a, xmmt);
1748 __ jccb(Assembler::equal, zero);
1749
1750 __ movflt(dst, a);
1751 __ jmp(done);
1752 }
1753 else {
1754 __ ucomisd(a, xmmt);
1755 __ jccb(Assembler::equal, zero);
1756
1757 __ movdbl(dst, a);
1758 __ jmp(done);
1759 }
1760
1761 __ bind(zero);
1762 if (min)
1763 __ vpor(dst, a, b, Assembler::AVX_128bit);
1764 else
1765 __ vpand(dst, a, b, Assembler::AVX_128bit);
1766
1767 __ jmp(done);
1768
1769 __ bind(above);
1770 if (single)
1771 __ movflt(dst, min ? b : a);
1772 else
1773 __ movdbl(dst, min ? b : a);
1774
1775 __ jmp(done);
1776
1777 __ bind(nan);
1778 if (single) {
1779 __ movl(rt, 0x7fc00000); // Float.NaN
1780 __ movdl(dst, rt);
1781 }
1782 else {
1783 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1784 __ movdq(dst, rt);
1785 }
1786 __ jmp(done);
1787
1788 __ bind(below);
1789 if (single)
1790 __ movflt(dst, min ? a : b);
1791 else
1792 __ movdbl(dst, min ? a : b);
1793
1794 __ bind(done);
1795 }
1796
1797 //=============================================================================
1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1799
1800 int ConstantTable::calculate_table_base_offset() const {
1801 return 0; // absolute addressing, no offset
1802 }
1803
1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1806 ShouldNotReachHere();
1807 }
1808
1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1810 // Empty encoding
1811 }
1812
1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1814 return 0;
1815 }
1816
1817 #ifndef PRODUCT
1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1819 st->print("# MachConstantBaseNode (empty encoding)");
1820 }
1821 #endif
1822
1823
1824 //=============================================================================
1825 #ifndef PRODUCT
1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1827 Compile* C = ra_->C;
1828
1829 int framesize = C->output()->frame_size_in_bytes();
1830 int bangsize = C->output()->bang_size_in_bytes();
1831 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1832 // Remove wordSize for return addr which is already pushed.
1833 framesize -= wordSize;
1834
1835 if (C->output()->need_stack_bang(bangsize)) {
1836 framesize -= wordSize;
1837 st->print("# stack bang (%d bytes)", bangsize);
1838 st->print("\n\t");
1839 st->print("pushq rbp\t# Save rbp");
1840 if (PreserveFramePointer) {
1841 st->print("\n\t");
1842 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1843 }
1844 if (framesize) {
1845 st->print("\n\t");
1846 st->print("subq rsp, #%d\t# Create frame",framesize);
1847 }
1848 } else {
1849 st->print("subq rsp, #%d\t# Create frame",framesize);
1850 st->print("\n\t");
1851 framesize -= wordSize;
1852 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1853 if (PreserveFramePointer) {
1854 st->print("\n\t");
1855 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1856 if (framesize > 0) {
1857 st->print("\n\t");
1858 st->print("addq rbp, #%d", framesize);
1859 }
1860 }
1861 }
1862
1863 if (VerifyStackAtCalls) {
1864 st->print("\n\t");
1865 framesize -= wordSize;
1866 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1867 #ifdef ASSERT
1868 st->print("\n\t");
1869 st->print("# stack alignment check");
1870 #endif
1871 }
1872 if (C->stub_function() != nullptr) {
1873 st->print("\n\t");
1874 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1875 st->print("\n\t");
1876 st->print("je fast_entry\t");
1877 st->print("\n\t");
1878 st->print("call #nmethod_entry_barrier_stub\t");
1879 st->print("\n\tfast_entry:");
1880 }
1881 st->cr();
1882 }
1883 #endif
1884
1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1886 Compile* C = ra_->C;
1887
1888 __ verified_entry(C);
1889
1890 if (ra_->C->stub_function() == nullptr) {
1891 __ entry_barrier();
1892 }
1893
1894 if (!Compile::current()->output()->in_scratch_emit_size()) {
1895 __ bind(*_verified_entry);
1896 }
1897
1898 C->output()->set_frame_complete(__ offset());
1899
1900 if (C->has_mach_constant_base_node()) {
1901 // NOTE: We set the table base offset here because users might be
1902 // emitted before MachConstantBaseNode.
1903 ConstantTable& constant_table = C->output()->constant_table();
1904 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1905 }
1906 }
1907
1908
1909 int MachPrologNode::reloc() const
1910 {
1911 return 0; // a large enough number
1912 }
1913
1914 //=============================================================================
1915 #ifndef PRODUCT
1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1917 {
1918 Compile* C = ra_->C;
1919 if (generate_vzeroupper(C)) {
1920 st->print("vzeroupper");
1921 st->cr(); st->print("\t");
1922 }
1923
1924 int framesize = C->output()->frame_size_in_bytes();
1925 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1926 // Remove word for return adr already pushed
1927 // and RBP
1928 framesize -= 2*wordSize;
1929
1930 if (framesize) {
1931 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1932 st->print("\t");
1933 }
1934
1935 st->print_cr("popq rbp");
1936 if (do_polling() && C->is_method_compilation()) {
1937 st->print("\t");
1938 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1939 "ja #safepoint_stub\t"
1940 "# Safepoint: poll for GC");
1941 }
1942 }
1943 #endif
1944
1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1946 {
1947 Compile* C = ra_->C;
1948
1949 if (generate_vzeroupper(C)) {
1950 // Clear upper bits of YMM registers when current compiled code uses
1951 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1952 __ vzeroupper();
1953 }
1954
1955 // Subtract two words to account for return address and rbp
1956 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1957 __ remove_frame(initial_framesize, C->needs_stack_repair());
1958
1959 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1960 __ reserved_stack_check();
1961 }
1962
1963 if (do_polling() && C->is_method_compilation()) {
1964 Label dummy_label;
1965 Label* code_stub = &dummy_label;
1966 if (!C->output()->in_scratch_emit_size()) {
1967 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1968 C->output()->add_stub(stub);
1969 code_stub = &stub->entry();
1970 }
1971 __ relocate(relocInfo::poll_return_type);
1972 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1973 }
1974 }
1975
1976 int MachEpilogNode::reloc() const
1977 {
1978 return 2; // a large enough number
1979 }
1980
1981 const Pipeline* MachEpilogNode::pipeline() const
1982 {
1983 return MachNode::pipeline_class();
1984 }
1985
1986 //=============================================================================
1987
1988 enum RC {
1989 rc_bad,
1990 rc_int,
1991 rc_kreg,
1992 rc_float,
1993 rc_stack
1994 };
1995
1996 static enum RC rc_class(OptoReg::Name reg)
1997 {
1998 if( !OptoReg::is_valid(reg) ) return rc_bad;
1999
2000 if (OptoReg::is_stack(reg)) return rc_stack;
2001
2002 VMReg r = OptoReg::as_VMReg(reg);
2003
2004 if (r->is_Register()) return rc_int;
2005
2006 if (r->is_KRegister()) return rc_kreg;
2007
2008 assert(r->is_XMMRegister(), "must be");
2009 return rc_float;
2010 }
2011
2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2014 int src_hi, int dst_hi, uint ireg, outputStream* st);
2015
2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2017 int stack_offset, int reg, uint ireg, outputStream* st);
2018
2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2020 int dst_offset, uint ireg, outputStream* st) {
2021 if (masm) {
2022 switch (ireg) {
2023 case Op_VecS:
2024 __ movq(Address(rsp, -8), rax);
2025 __ movl(rax, Address(rsp, src_offset));
2026 __ movl(Address(rsp, dst_offset), rax);
2027 __ movq(rax, Address(rsp, -8));
2028 break;
2029 case Op_VecD:
2030 __ pushq(Address(rsp, src_offset));
2031 __ popq (Address(rsp, dst_offset));
2032 break;
2033 case Op_VecX:
2034 __ pushq(Address(rsp, src_offset));
2035 __ popq (Address(rsp, dst_offset));
2036 __ pushq(Address(rsp, src_offset+8));
2037 __ popq (Address(rsp, dst_offset+8));
2038 break;
2039 case Op_VecY:
2040 __ vmovdqu(Address(rsp, -32), xmm0);
2041 __ vmovdqu(xmm0, Address(rsp, src_offset));
2042 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2043 __ vmovdqu(xmm0, Address(rsp, -32));
2044 break;
2045 case Op_VecZ:
2046 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2047 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2048 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2049 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2050 break;
2051 default:
2052 ShouldNotReachHere();
2053 }
2054 #ifndef PRODUCT
2055 } else {
2056 switch (ireg) {
2057 case Op_VecS:
2058 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2059 "movl rax, [rsp + #%d]\n\t"
2060 "movl [rsp + #%d], rax\n\t"
2061 "movq rax, [rsp - #8]",
2062 src_offset, dst_offset);
2063 break;
2064 case Op_VecD:
2065 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2066 "popq [rsp + #%d]",
2067 src_offset, dst_offset);
2068 break;
2069 case Op_VecX:
2070 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2071 "popq [rsp + #%d]\n\t"
2072 "pushq [rsp + #%d]\n\t"
2073 "popq [rsp + #%d]",
2074 src_offset, dst_offset, src_offset+8, dst_offset+8);
2075 break;
2076 case Op_VecY:
2077 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2078 "vmovdqu xmm0, [rsp + #%d]\n\t"
2079 "vmovdqu [rsp + #%d], xmm0\n\t"
2080 "vmovdqu xmm0, [rsp - #32]",
2081 src_offset, dst_offset);
2082 break;
2083 case Op_VecZ:
2084 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2085 "vmovdqu xmm0, [rsp + #%d]\n\t"
2086 "vmovdqu [rsp + #%d], xmm0\n\t"
2087 "vmovdqu xmm0, [rsp - #64]",
2088 src_offset, dst_offset);
2089 break;
2090 default:
2091 ShouldNotReachHere();
2092 }
2093 #endif
2094 }
2095 }
2096
2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2098 PhaseRegAlloc* ra_,
2099 bool do_size,
2100 outputStream* st) const {
2101 assert(masm != nullptr || st != nullptr, "sanity");
2102 // Get registers to move
2103 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2104 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2105 OptoReg::Name dst_second = ra_->get_reg_second(this);
2106 OptoReg::Name dst_first = ra_->get_reg_first(this);
2107
2108 enum RC src_second_rc = rc_class(src_second);
2109 enum RC src_first_rc = rc_class(src_first);
2110 enum RC dst_second_rc = rc_class(dst_second);
2111 enum RC dst_first_rc = rc_class(dst_first);
2112
2113 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2114 "must move at least 1 register" );
2115
2116 if (src_first == dst_first && src_second == dst_second) {
2117 // Self copy, no move
2118 return 0;
2119 }
2120 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2121 uint ireg = ideal_reg();
2122 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2123 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2125 // mem -> mem
2126 int src_offset = ra_->reg2offset(src_first);
2127 int dst_offset = ra_->reg2offset(dst_first);
2128 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2129 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2130 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2131 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2132 int stack_offset = ra_->reg2offset(dst_first);
2133 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2134 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2135 int stack_offset = ra_->reg2offset(src_first);
2136 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2137 } else {
2138 ShouldNotReachHere();
2139 }
2140 return 0;
2141 }
2142 if (src_first_rc == rc_stack) {
2143 // mem ->
2144 if (dst_first_rc == rc_stack) {
2145 // mem -> mem
2146 assert(src_second != dst_first, "overlap");
2147 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2148 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2149 // 64-bit
2150 int src_offset = ra_->reg2offset(src_first);
2151 int dst_offset = ra_->reg2offset(dst_first);
2152 if (masm) {
2153 __ pushq(Address(rsp, src_offset));
2154 __ popq (Address(rsp, dst_offset));
2155 #ifndef PRODUCT
2156 } else {
2157 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2158 "popq [rsp + #%d]",
2159 src_offset, dst_offset);
2160 #endif
2161 }
2162 } else {
2163 // 32-bit
2164 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2165 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2166 // No pushl/popl, so:
2167 int src_offset = ra_->reg2offset(src_first);
2168 int dst_offset = ra_->reg2offset(dst_first);
2169 if (masm) {
2170 __ movq(Address(rsp, -8), rax);
2171 __ movl(rax, Address(rsp, src_offset));
2172 __ movl(Address(rsp, dst_offset), rax);
2173 __ movq(rax, Address(rsp, -8));
2174 #ifndef PRODUCT
2175 } else {
2176 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2177 "movl rax, [rsp + #%d]\n\t"
2178 "movl [rsp + #%d], rax\n\t"
2179 "movq rax, [rsp - #8]",
2180 src_offset, dst_offset);
2181 #endif
2182 }
2183 }
2184 return 0;
2185 } else if (dst_first_rc == rc_int) {
2186 // mem -> gpr
2187 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2188 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2189 // 64-bit
2190 int offset = ra_->reg2offset(src_first);
2191 if (masm) {
2192 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2193 #ifndef PRODUCT
2194 } else {
2195 st->print("movq %s, [rsp + #%d]\t# spill",
2196 Matcher::regName[dst_first],
2197 offset);
2198 #endif
2199 }
2200 } else {
2201 // 32-bit
2202 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2203 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2204 int offset = ra_->reg2offset(src_first);
2205 if (masm) {
2206 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movl %s, [rsp + #%d]\t# spill",
2210 Matcher::regName[dst_first],
2211 offset);
2212 #endif
2213 }
2214 }
2215 return 0;
2216 } else if (dst_first_rc == rc_float) {
2217 // mem-> xmm
2218 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2219 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2220 // 64-bit
2221 int offset = ra_->reg2offset(src_first);
2222 if (masm) {
2223 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2224 #ifndef PRODUCT
2225 } else {
2226 st->print("%s %s, [rsp + #%d]\t# spill",
2227 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2228 Matcher::regName[dst_first],
2229 offset);
2230 #endif
2231 }
2232 } else {
2233 // 32-bit
2234 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2235 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2236 int offset = ra_->reg2offset(src_first);
2237 if (masm) {
2238 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2239 #ifndef PRODUCT
2240 } else {
2241 st->print("movss %s, [rsp + #%d]\t# spill",
2242 Matcher::regName[dst_first],
2243 offset);
2244 #endif
2245 }
2246 }
2247 return 0;
2248 } else if (dst_first_rc == rc_kreg) {
2249 // mem -> kreg
2250 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2251 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2252 // 64-bit
2253 int offset = ra_->reg2offset(src_first);
2254 if (masm) {
2255 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2256 #ifndef PRODUCT
2257 } else {
2258 st->print("kmovq %s, [rsp + #%d]\t# spill",
2259 Matcher::regName[dst_first],
2260 offset);
2261 #endif
2262 }
2263 }
2264 return 0;
2265 }
2266 } else if (src_first_rc == rc_int) {
2267 // gpr ->
2268 if (dst_first_rc == rc_stack) {
2269 // gpr -> mem
2270 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2271 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2272 // 64-bit
2273 int offset = ra_->reg2offset(dst_first);
2274 if (masm) {
2275 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2276 #ifndef PRODUCT
2277 } else {
2278 st->print("movq [rsp + #%d], %s\t# spill",
2279 offset,
2280 Matcher::regName[src_first]);
2281 #endif
2282 }
2283 } else {
2284 // 32-bit
2285 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2286 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2287 int offset = ra_->reg2offset(dst_first);
2288 if (masm) {
2289 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2290 #ifndef PRODUCT
2291 } else {
2292 st->print("movl [rsp + #%d], %s\t# spill",
2293 offset,
2294 Matcher::regName[src_first]);
2295 #endif
2296 }
2297 }
2298 return 0;
2299 } else if (dst_first_rc == rc_int) {
2300 // gpr -> gpr
2301 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2302 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2303 // 64-bit
2304 if (masm) {
2305 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2306 as_Register(Matcher::_regEncode[src_first]));
2307 #ifndef PRODUCT
2308 } else {
2309 st->print("movq %s, %s\t# spill",
2310 Matcher::regName[dst_first],
2311 Matcher::regName[src_first]);
2312 #endif
2313 }
2314 return 0;
2315 } else {
2316 // 32-bit
2317 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2318 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2319 if (masm) {
2320 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2321 as_Register(Matcher::_regEncode[src_first]));
2322 #ifndef PRODUCT
2323 } else {
2324 st->print("movl %s, %s\t# spill",
2325 Matcher::regName[dst_first],
2326 Matcher::regName[src_first]);
2327 #endif
2328 }
2329 return 0;
2330 }
2331 } else if (dst_first_rc == rc_float) {
2332 // gpr -> xmm
2333 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2334 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2335 // 64-bit
2336 if (masm) {
2337 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2338 #ifndef PRODUCT
2339 } else {
2340 st->print("movdq %s, %s\t# spill",
2341 Matcher::regName[dst_first],
2342 Matcher::regName[src_first]);
2343 #endif
2344 }
2345 } else {
2346 // 32-bit
2347 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2348 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2349 if (masm) {
2350 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2351 #ifndef PRODUCT
2352 } else {
2353 st->print("movdl %s, %s\t# spill",
2354 Matcher::regName[dst_first],
2355 Matcher::regName[src_first]);
2356 #endif
2357 }
2358 }
2359 return 0;
2360 } else if (dst_first_rc == rc_kreg) {
2361 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2362 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2363 // 64-bit
2364 if (masm) {
2365 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2366 #ifndef PRODUCT
2367 } else {
2368 st->print("kmovq %s, %s\t# spill",
2369 Matcher::regName[dst_first],
2370 Matcher::regName[src_first]);
2371 #endif
2372 }
2373 }
2374 Unimplemented();
2375 return 0;
2376 }
2377 } else if (src_first_rc == rc_float) {
2378 // xmm ->
2379 if (dst_first_rc == rc_stack) {
2380 // xmm -> mem
2381 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2382 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2383 // 64-bit
2384 int offset = ra_->reg2offset(dst_first);
2385 if (masm) {
2386 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2387 #ifndef PRODUCT
2388 } else {
2389 st->print("movsd [rsp + #%d], %s\t# spill",
2390 offset,
2391 Matcher::regName[src_first]);
2392 #endif
2393 }
2394 } else {
2395 // 32-bit
2396 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2397 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2398 int offset = ra_->reg2offset(dst_first);
2399 if (masm) {
2400 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2401 #ifndef PRODUCT
2402 } else {
2403 st->print("movss [rsp + #%d], %s\t# spill",
2404 offset,
2405 Matcher::regName[src_first]);
2406 #endif
2407 }
2408 }
2409 return 0;
2410 } else if (dst_first_rc == rc_int) {
2411 // xmm -> gpr
2412 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2413 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2414 // 64-bit
2415 if (masm) {
2416 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2417 #ifndef PRODUCT
2418 } else {
2419 st->print("movdq %s, %s\t# spill",
2420 Matcher::regName[dst_first],
2421 Matcher::regName[src_first]);
2422 #endif
2423 }
2424 } else {
2425 // 32-bit
2426 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2427 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2428 if (masm) {
2429 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2430 #ifndef PRODUCT
2431 } else {
2432 st->print("movdl %s, %s\t# spill",
2433 Matcher::regName[dst_first],
2434 Matcher::regName[src_first]);
2435 #endif
2436 }
2437 }
2438 return 0;
2439 } else if (dst_first_rc == rc_float) {
2440 // xmm -> xmm
2441 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2442 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2443 // 64-bit
2444 if (masm) {
2445 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2446 #ifndef PRODUCT
2447 } else {
2448 st->print("%s %s, %s\t# spill",
2449 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2450 Matcher::regName[dst_first],
2451 Matcher::regName[src_first]);
2452 #endif
2453 }
2454 } else {
2455 // 32-bit
2456 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2457 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2458 if (masm) {
2459 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2460 #ifndef PRODUCT
2461 } else {
2462 st->print("%s %s, %s\t# spill",
2463 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2464 Matcher::regName[dst_first],
2465 Matcher::regName[src_first]);
2466 #endif
2467 }
2468 }
2469 return 0;
2470 } else if (dst_first_rc == rc_kreg) {
2471 assert(false, "Illegal spilling");
2472 return 0;
2473 }
2474 } else if (src_first_rc == rc_kreg) {
2475 if (dst_first_rc == rc_stack) {
2476 // mem -> kreg
2477 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2478 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2479 // 64-bit
2480 int offset = ra_->reg2offset(dst_first);
2481 if (masm) {
2482 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2483 #ifndef PRODUCT
2484 } else {
2485 st->print("kmovq [rsp + #%d] , %s\t# spill",
2486 offset,
2487 Matcher::regName[src_first]);
2488 #endif
2489 }
2490 }
2491 return 0;
2492 } else if (dst_first_rc == rc_int) {
2493 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2494 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2495 // 64-bit
2496 if (masm) {
2497 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2498 #ifndef PRODUCT
2499 } else {
2500 st->print("kmovq %s, %s\t# spill",
2501 Matcher::regName[dst_first],
2502 Matcher::regName[src_first]);
2503 #endif
2504 }
2505 }
2506 Unimplemented();
2507 return 0;
2508 } else if (dst_first_rc == rc_kreg) {
2509 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2510 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2511 // 64-bit
2512 if (masm) {
2513 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2514 #ifndef PRODUCT
2515 } else {
2516 st->print("kmovq %s, %s\t# spill",
2517 Matcher::regName[dst_first],
2518 Matcher::regName[src_first]);
2519 #endif
2520 }
2521 }
2522 return 0;
2523 } else if (dst_first_rc == rc_float) {
2524 assert(false, "Illegal spill");
2525 return 0;
2526 }
2527 }
2528
2529 assert(0," foo ");
2530 Unimplemented();
2531 return 0;
2532 }
2533
2534 #ifndef PRODUCT
2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2536 implementation(nullptr, ra_, false, st);
2537 }
2538 #endif
2539
2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2541 implementation(masm, ra_, false, nullptr);
2542 }
2543
2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2545 return MachNode::size(ra_);
2546 }
2547
2548 //=============================================================================
2549 #ifndef PRODUCT
2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2551 {
2552 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2553 int reg = ra_->get_reg_first(this);
2554 st->print("leaq %s, [rsp + #%d]\t# box lock",
2555 Matcher::regName[reg], offset);
2556 }
2557 #endif
2558
2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2560 {
2561 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2562 int reg = ra_->get_encode(this);
2563
2564 __ lea(as_Register(reg), Address(rsp, offset));
2565 }
2566
2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2568 {
2569 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2570 if (ra_->get_encode(this) > 15) {
2571 return (offset < 0x80) ? 6 : 9; // REX2
2572 } else {
2573 return (offset < 0x80) ? 5 : 8; // REX
2574 }
2575 }
2576
2577 //=============================================================================
2578 #ifndef PRODUCT
2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2580 {
2581 st->print_cr("MachVEPNode");
2582 }
2583 #endif
2584
2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 CodeBuffer* cbuf = masm->code();
2588 uint insts_size = cbuf->insts_size();
2589 if (!_verified) {
2590 __ ic_check(1);
2591 } else {
2592 // TODO 8284443 Avoid creation of temporary frame
2593 if (ra_->C->stub_function() == nullptr) {
2594 __ verified_entry(ra_->C, 0);
2595 __ entry_barrier();
2596 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2597 __ remove_frame(initial_framesize, false);
2598 }
2599 // Unpack inline type args passed as oop and then jump to
2600 // the verified entry point (skipping the unverified entry).
2601 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2602 // Emit code for verified entry and save increment for stack repair on return
2603 __ verified_entry(ra_->C, sp_inc);
2604 if (Compile::current()->output()->in_scratch_emit_size()) {
2605 Label dummy_verified_entry;
2606 __ jmp(dummy_verified_entry);
2607 } else {
2608 __ jmp(*_verified_entry);
2609 }
2610 }
2611 /* WARNING these NOPs are critical so that verified entry point is properly
2612 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2613 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2614 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2615 if (nops_cnt > 0) {
2616 __ nop(nops_cnt);
2617 }
2618 }
2619
2620 //=============================================================================
2621 #ifndef PRODUCT
2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2623 {
2624 if (UseCompressedClassPointers) {
2625 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2626 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2627 } else {
2628 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2629 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2630 }
2631 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2632 }
2633 #endif
2634
2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2636 {
2637 __ ic_check(InteriorEntryAlignment);
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2648 assert(EnableVectorSupport, "sanity");
2649 int lo = XMM0_num;
2650 int hi = XMM0b_num;
2651 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2652 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2653 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2654 return OptoRegPair(hi, lo);
2655 }
2656
2657 // Is this branch offset short enough that a short branch can be used?
2658 //
2659 // NOTE: If the platform does not provide any short branch variants, then
2660 // this method should return false for offset 0.
2661 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2662 // The passed offset is relative to address of the branch.
2663 // On 86 a branch displacement is calculated relative to address
2664 // of a next instruction.
2665 offset -= br_size;
2666
2667 // the short version of jmpConUCF2 contains multiple branches,
2668 // making the reach slightly less
2669 if (rule == jmpConUCF2_rule)
2670 return (-126 <= offset && offset <= 125);
2671 return (-128 <= offset && offset <= 127);
2672 }
2673
2674 // Return whether or not this register is ever used as an argument.
2675 // This function is used on startup to build the trampoline stubs in
2676 // generateOptoStub. Registers not mentioned will be killed by the VM
2677 // call in the trampoline, and arguments in those registers not be
2678 // available to the callee.
2679 bool Matcher::can_be_java_arg(int reg)
2680 {
2681 return
2682 reg == RDI_num || reg == RDI_H_num ||
2683 reg == RSI_num || reg == RSI_H_num ||
2684 reg == RDX_num || reg == RDX_H_num ||
2685 reg == RCX_num || reg == RCX_H_num ||
2686 reg == R8_num || reg == R8_H_num ||
2687 reg == R9_num || reg == R9_H_num ||
2688 reg == R12_num || reg == R12_H_num ||
2689 reg == XMM0_num || reg == XMM0b_num ||
2690 reg == XMM1_num || reg == XMM1b_num ||
2691 reg == XMM2_num || reg == XMM2b_num ||
2692 reg == XMM3_num || reg == XMM3b_num ||
2693 reg == XMM4_num || reg == XMM4b_num ||
2694 reg == XMM5_num || reg == XMM5b_num ||
2695 reg == XMM6_num || reg == XMM6b_num ||
2696 reg == XMM7_num || reg == XMM7b_num;
2697 }
2698
2699 bool Matcher::is_spillable_arg(int reg)
2700 {
2701 return can_be_java_arg(reg);
2702 }
2703
2704 uint Matcher::int_pressure_limit()
2705 {
2706 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2707 }
2708
2709 uint Matcher::float_pressure_limit()
2710 {
2711 // After experiment around with different values, the following default threshold
2712 // works best for LCM's register pressure scheduling on x64.
2713 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2714 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2715 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2716 }
2717
2718 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2719 // In 64 bit mode a code which use multiply when
2720 // devisor is constant is faster than hardware
2721 // DIV instruction (it uses MulHiL).
2722 return false;
2723 }
2724
2725 // Register for DIVI projection of divmodI
2726 const RegMask& Matcher::divI_proj_mask() {
2727 return INT_RAX_REG_mask();
2728 }
2729
2730 // Register for MODI projection of divmodI
2731 const RegMask& Matcher::modI_proj_mask() {
2732 return INT_RDX_REG_mask();
2733 }
2734
2735 // Register for DIVL projection of divmodL
2736 const RegMask& Matcher::divL_proj_mask() {
2737 return LONG_RAX_REG_mask();
2738 }
2739
2740 // Register for MODL projection of divmodL
2741 const RegMask& Matcher::modL_proj_mask() {
2742 return LONG_RDX_REG_mask();
2743 }
2744
2745 %}
2746
2747 source_hpp %{
2748 // Header information of the source block.
2749 // Method declarations/definitions which are used outside
2750 // the ad-scope can conveniently be defined here.
2751 //
2752 // To keep related declarations/definitions/uses close together,
2753 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2754
2755 #include "runtime/vm_version.hpp"
2756
2757 class NativeJump;
2758
2759 class CallStubImpl {
2760
2761 //--------------------------------------------------------------
2762 //---< Used for optimization in Compile::shorten_branches >---
2763 //--------------------------------------------------------------
2764
2765 public:
2766 // Size of call trampoline stub.
2767 static uint size_call_trampoline() {
2768 return 0; // no call trampolines on this platform
2769 }
2770
2771 // number of relocations needed by a call trampoline stub
2772 static uint reloc_call_trampoline() {
2773 return 0; // no call trampolines on this platform
2774 }
2775 };
2776
2777 class HandlerImpl {
2778
2779 public:
2780
2781 static int emit_exception_handler(C2_MacroAssembler *masm);
2782 static int emit_deopt_handler(C2_MacroAssembler* masm);
2783
2784 static uint size_exception_handler() {
2785 // NativeCall instruction size is the same as NativeJump.
2786 // exception handler starts out as jump and can be patched to
2787 // a call be deoptimization. (4932387)
2788 // Note that this value is also credited (in output.cpp) to
2789 // the size of the code section.
2790 return NativeJump::instruction_size;
2791 }
2792
2793 static uint size_deopt_handler() {
2794 // three 5 byte instructions plus one move for unreachable address.
2795 return 15+3;
2796 }
2797 };
2798
2799 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2800 switch(bytes) {
2801 case 4: // fall-through
2802 case 8: // fall-through
2803 case 16: return Assembler::AVX_128bit;
2804 case 32: return Assembler::AVX_256bit;
2805 case 64: return Assembler::AVX_512bit;
2806
2807 default: {
2808 ShouldNotReachHere();
2809 return Assembler::AVX_NoVec;
2810 }
2811 }
2812 }
2813
2814 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2815 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2816 }
2817
2818 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2819 uint def_idx = use->operand_index(opnd);
2820 Node* def = use->in(def_idx);
2821 return vector_length_encoding(def);
2822 }
2823
2824 static inline bool is_vector_popcount_predicate(BasicType bt) {
2825 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2826 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2827 }
2828
2829 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2830 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2831 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2832 }
2833
2834 class Node::PD {
2835 public:
2836 enum NodeFlags {
2837 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2838 Flag_sets_carry_flag = Node::_last_flag << 2,
2839 Flag_sets_parity_flag = Node::_last_flag << 3,
2840 Flag_sets_zero_flag = Node::_last_flag << 4,
2841 Flag_sets_overflow_flag = Node::_last_flag << 5,
2842 Flag_sets_sign_flag = Node::_last_flag << 6,
2843 Flag_clears_carry_flag = Node::_last_flag << 7,
2844 Flag_clears_parity_flag = Node::_last_flag << 8,
2845 Flag_clears_zero_flag = Node::_last_flag << 9,
2846 Flag_clears_overflow_flag = Node::_last_flag << 10,
2847 Flag_clears_sign_flag = Node::_last_flag << 11,
2848 _last_flag = Flag_clears_sign_flag
2849 };
2850 };
2851
2852 %} // end source_hpp
2853
2854 source %{
2855
2856 #include "opto/addnode.hpp"
2857 #include "c2_intelJccErratum_x86.hpp"
2858
2859 void PhaseOutput::pd_perform_mach_node_analysis() {
2860 if (VM_Version::has_intel_jcc_erratum()) {
2861 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2862 _buf_sizes._code += extra_padding;
2863 }
2864 }
2865
2866 int MachNode::pd_alignment_required() const {
2867 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2868 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2869 return IntelJccErratum::largest_jcc_size() + 1;
2870 } else {
2871 return 1;
2872 }
2873 }
2874
2875 int MachNode::compute_padding(int current_offset) const {
2876 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2877 Compile* C = Compile::current();
2878 PhaseOutput* output = C->output();
2879 Block* block = output->block();
2880 int index = output->index();
2881 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2882 } else {
2883 return 0;
2884 }
2885 }
2886
2887 // Emit exception handler code.
2888 // Stuff framesize into a register and call a VM stub routine.
2889 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
2890
2891 // Note that the code buffer's insts_mark is always relative to insts.
2892 // That's why we must use the macroassembler to generate a handler.
2893 address base = __ start_a_stub(size_exception_handler());
2894 if (base == nullptr) {
2895 ciEnv::current()->record_failure("CodeCache is full");
2896 return 0; // CodeBuffer::expand failed
2897 }
2898 int offset = __ offset();
2899 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2900 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2901 __ end_a_stub();
2902 return offset;
2903 }
2904
2905 // Emit deopt handler code.
2906 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2907
2908 // Note that the code buffer's insts_mark is always relative to insts.
2909 // That's why we must use the macroassembler to generate a handler.
2910 address base = __ start_a_stub(size_deopt_handler());
2911 if (base == nullptr) {
2912 ciEnv::current()->record_failure("CodeCache is full");
2913 return 0; // CodeBuffer::expand failed
2914 }
2915 int offset = __ offset();
2916
2917 address the_pc = (address) __ pc();
2918 Label next;
2919 // push a "the_pc" on the stack without destroying any registers
2920 // as they all may be live.
2921
2922 // push address of "next"
2923 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
2924 __ bind(next);
2925 // adjust it so it matches "the_pc"
2926 __ subptr(Address(rsp, 0), __ offset() - offset);
2927
2928 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2929 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2930 __ end_a_stub();
2931 return offset;
2932 }
2933
2934 static Assembler::Width widthForType(BasicType bt) {
2935 if (bt == T_BYTE) {
2936 return Assembler::B;
2937 } else if (bt == T_SHORT) {
2938 return Assembler::W;
2939 } else if (bt == T_INT) {
2940 return Assembler::D;
2941 } else {
2942 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2943 return Assembler::Q;
2944 }
2945 }
2946
2947 //=============================================================================
2948
2949 // Float masks come from different places depending on platform.
2950 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2951 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2952 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2953 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2954 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2955 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2956 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2957 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2958 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2959 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2960 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2961 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2962 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2963 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2964 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2965 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2966 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2967 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2968 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2969
2970 //=============================================================================
2971 bool Matcher::match_rule_supported(int opcode) {
2972 if (!has_match_rule(opcode)) {
2973 return false; // no match rule present
2974 }
2975 switch (opcode) {
2976 case Op_AbsVL:
2977 case Op_StoreVectorScatter:
2978 if (UseAVX < 3) {
2979 return false;
2980 }
2981 break;
2982 case Op_PopCountI:
2983 case Op_PopCountL:
2984 if (!UsePopCountInstruction) {
2985 return false;
2986 }
2987 break;
2988 case Op_PopCountVI:
2989 if (UseAVX < 2) {
2990 return false;
2991 }
2992 break;
2993 case Op_CompressV:
2994 case Op_ExpandV:
2995 case Op_PopCountVL:
2996 if (UseAVX < 2) {
2997 return false;
2998 }
2999 break;
3000 case Op_MulVI:
3001 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3002 return false;
3003 }
3004 break;
3005 case Op_MulVL:
3006 if (UseSSE < 4) { // only with SSE4_1 or AVX
3007 return false;
3008 }
3009 break;
3010 case Op_MulReductionVL:
3011 if (VM_Version::supports_avx512dq() == false) {
3012 return false;
3013 }
3014 break;
3015 case Op_AbsVB:
3016 case Op_AbsVS:
3017 case Op_AbsVI:
3018 case Op_AddReductionVI:
3019 case Op_AndReductionV:
3020 case Op_OrReductionV:
3021 case Op_XorReductionV:
3022 if (UseSSE < 3) { // requires at least SSSE3
3023 return false;
3024 }
3025 break;
3026 case Op_MaxHF:
3027 case Op_MinHF:
3028 if (!VM_Version::supports_avx512vlbw()) {
3029 return false;
3030 } // fallthrough
3031 case Op_AddHF:
3032 case Op_DivHF:
3033 case Op_FmaHF:
3034 case Op_MulHF:
3035 case Op_ReinterpretS2HF:
3036 case Op_ReinterpretHF2S:
3037 case Op_SubHF:
3038 case Op_SqrtHF:
3039 if (!VM_Version::supports_avx512_fp16()) {
3040 return false;
3041 }
3042 break;
3043 case Op_VectorLoadShuffle:
3044 case Op_VectorRearrange:
3045 case Op_MulReductionVI:
3046 if (UseSSE < 4) { // requires at least SSE4
3047 return false;
3048 }
3049 break;
3050 case Op_IsInfiniteF:
3051 case Op_IsInfiniteD:
3052 if (!VM_Version::supports_avx512dq()) {
3053 return false;
3054 }
3055 break;
3056 case Op_SqrtVD:
3057 case Op_SqrtVF:
3058 case Op_VectorMaskCmp:
3059 case Op_VectorCastB2X:
3060 case Op_VectorCastS2X:
3061 case Op_VectorCastI2X:
3062 case Op_VectorCastL2X:
3063 case Op_VectorCastF2X:
3064 case Op_VectorCastD2X:
3065 case Op_VectorUCastB2X:
3066 case Op_VectorUCastS2X:
3067 case Op_VectorUCastI2X:
3068 case Op_VectorMaskCast:
3069 if (UseAVX < 1) { // enabled for AVX only
3070 return false;
3071 }
3072 break;
3073 case Op_PopulateIndex:
3074 if (UseAVX < 2) {
3075 return false;
3076 }
3077 break;
3078 case Op_RoundVF:
3079 if (UseAVX < 2) { // enabled for AVX2 only
3080 return false;
3081 }
3082 break;
3083 case Op_RoundVD:
3084 if (UseAVX < 3) {
3085 return false; // enabled for AVX3 only
3086 }
3087 break;
3088 case Op_CompareAndSwapL:
3089 case Op_CompareAndSwapP:
3090 break;
3091 case Op_StrIndexOf:
3092 if (!UseSSE42Intrinsics) {
3093 return false;
3094 }
3095 break;
3096 case Op_StrIndexOfChar:
3097 if (!UseSSE42Intrinsics) {
3098 return false;
3099 }
3100 break;
3101 case Op_OnSpinWait:
3102 if (VM_Version::supports_on_spin_wait() == false) {
3103 return false;
3104 }
3105 break;
3106 case Op_MulVB:
3107 case Op_LShiftVB:
3108 case Op_RShiftVB:
3109 case Op_URShiftVB:
3110 case Op_VectorInsert:
3111 case Op_VectorLoadMask:
3112 case Op_VectorStoreMask:
3113 case Op_VectorBlend:
3114 if (UseSSE < 4) {
3115 return false;
3116 }
3117 break;
3118 case Op_MaxD:
3119 case Op_MaxF:
3120 case Op_MinD:
3121 case Op_MinF:
3122 if (UseAVX < 1) { // enabled for AVX only
3123 return false;
3124 }
3125 break;
3126 case Op_CacheWB:
3127 case Op_CacheWBPreSync:
3128 case Op_CacheWBPostSync:
3129 if (!VM_Version::supports_data_cache_line_flush()) {
3130 return false;
3131 }
3132 break;
3133 case Op_ExtractB:
3134 case Op_ExtractL:
3135 case Op_ExtractI:
3136 case Op_RoundDoubleMode:
3137 if (UseSSE < 4) {
3138 return false;
3139 }
3140 break;
3141 case Op_RoundDoubleModeV:
3142 if (VM_Version::supports_avx() == false) {
3143 return false; // 128bit vroundpd is not available
3144 }
3145 break;
3146 case Op_LoadVectorGather:
3147 case Op_LoadVectorGatherMasked:
3148 if (UseAVX < 2) {
3149 return false;
3150 }
3151 break;
3152 case Op_FmaF:
3153 case Op_FmaD:
3154 case Op_FmaVD:
3155 case Op_FmaVF:
3156 if (!UseFMA) {
3157 return false;
3158 }
3159 break;
3160 case Op_MacroLogicV:
3161 if (UseAVX < 3 || !UseVectorMacroLogic) {
3162 return false;
3163 }
3164 break;
3165
3166 case Op_VectorCmpMasked:
3167 case Op_VectorMaskGen:
3168 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3169 return false;
3170 }
3171 break;
3172 case Op_VectorMaskFirstTrue:
3173 case Op_VectorMaskLastTrue:
3174 case Op_VectorMaskTrueCount:
3175 case Op_VectorMaskToLong:
3176 if (UseAVX < 1) {
3177 return false;
3178 }
3179 break;
3180 case Op_RoundF:
3181 case Op_RoundD:
3182 break;
3183 case Op_CopySignD:
3184 case Op_CopySignF:
3185 if (UseAVX < 3) {
3186 return false;
3187 }
3188 if (!VM_Version::supports_avx512vl()) {
3189 return false;
3190 }
3191 break;
3192 case Op_CompressBits:
3193 case Op_ExpandBits:
3194 if (!VM_Version::supports_bmi2()) {
3195 return false;
3196 }
3197 break;
3198 case Op_CompressM:
3199 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3200 return false;
3201 }
3202 break;
3203 case Op_ConvF2HF:
3204 case Op_ConvHF2F:
3205 if (!VM_Version::supports_float16()) {
3206 return false;
3207 }
3208 break;
3209 case Op_VectorCastF2HF:
3210 case Op_VectorCastHF2F:
3211 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3212 return false;
3213 }
3214 break;
3215 }
3216 return true; // Match rules are supported by default.
3217 }
3218
3219 //------------------------------------------------------------------------
3220
3221 static inline bool is_pop_count_instr_target(BasicType bt) {
3222 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3223 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3224 }
3225
3226 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3227 return match_rule_supported_vector(opcode, vlen, bt);
3228 }
3229
3230 // Identify extra cases that we might want to provide match rules for vector nodes and
3231 // other intrinsics guarded with vector length (vlen) and element type (bt).
3232 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3233 if (!match_rule_supported(opcode)) {
3234 return false;
3235 }
3236 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3237 // * SSE2 supports 128bit vectors for all types;
3238 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3239 // * AVX2 supports 256bit vectors for all types;
3240 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3241 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3242 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3243 // And MaxVectorSize is taken into account as well.
3244 if (!vector_size_supported(bt, vlen)) {
3245 return false;
3246 }
3247 // Special cases which require vector length follow:
3248 // * implementation limitations
3249 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3250 // * 128bit vroundpd instruction is present only in AVX1
3251 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3252 switch (opcode) {
3253 case Op_MaxVHF:
3254 case Op_MinVHF:
3255 if (!VM_Version::supports_avx512bw()) {
3256 return false;
3257 }
3258 case Op_AddVHF:
3259 case Op_DivVHF:
3260 case Op_FmaVHF:
3261 case Op_MulVHF:
3262 case Op_SubVHF:
3263 case Op_SqrtVHF:
3264 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3265 return false;
3266 }
3267 if (!VM_Version::supports_avx512_fp16()) {
3268 return false;
3269 }
3270 break;
3271 case Op_AbsVF:
3272 case Op_NegVF:
3273 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3274 return false; // 512bit vandps and vxorps are not available
3275 }
3276 break;
3277 case Op_AbsVD:
3278 case Op_NegVD:
3279 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3280 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3281 }
3282 break;
3283 case Op_RotateRightV:
3284 case Op_RotateLeftV:
3285 if (bt != T_INT && bt != T_LONG) {
3286 return false;
3287 } // fallthrough
3288 case Op_MacroLogicV:
3289 if (!VM_Version::supports_evex() ||
3290 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3291 return false;
3292 }
3293 break;
3294 case Op_ClearArray:
3295 case Op_VectorMaskGen:
3296 case Op_VectorCmpMasked:
3297 if (!VM_Version::supports_avx512bw()) {
3298 return false;
3299 }
3300 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3301 return false;
3302 }
3303 break;
3304 case Op_LoadVectorMasked:
3305 case Op_StoreVectorMasked:
3306 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3307 return false;
3308 }
3309 break;
3310 case Op_UMinV:
3311 case Op_UMaxV:
3312 if (UseAVX == 0) {
3313 return false;
3314 }
3315 break;
3316 case Op_MaxV:
3317 case Op_MinV:
3318 if (UseSSE < 4 && is_integral_type(bt)) {
3319 return false;
3320 }
3321 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3322 // Float/Double intrinsics are enabled for AVX family currently.
3323 if (UseAVX == 0) {
3324 return false;
3325 }
3326 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3327 return false;
3328 }
3329 }
3330 break;
3331 case Op_CallLeafVector:
3332 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3333 return false;
3334 }
3335 break;
3336 case Op_AddReductionVI:
3337 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3338 return false;
3339 }
3340 // fallthrough
3341 case Op_AndReductionV:
3342 case Op_OrReductionV:
3343 case Op_XorReductionV:
3344 if (is_subword_type(bt) && (UseSSE < 4)) {
3345 return false;
3346 }
3347 break;
3348 case Op_MinReductionV:
3349 case Op_MaxReductionV:
3350 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3351 return false;
3352 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3353 return false;
3354 }
3355 // Float/Double intrinsics enabled for AVX family.
3356 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3357 return false;
3358 }
3359 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3360 return false;
3361 }
3362 break;
3363 case Op_VectorTest:
3364 if (UseSSE < 4) {
3365 return false; // Implementation limitation
3366 } else if (size_in_bits < 32) {
3367 return false; // Implementation limitation
3368 }
3369 break;
3370 case Op_VectorLoadShuffle:
3371 case Op_VectorRearrange:
3372 if(vlen == 2) {
3373 return false; // Implementation limitation due to how shuffle is loaded
3374 } else if (size_in_bits == 256 && UseAVX < 2) {
3375 return false; // Implementation limitation
3376 }
3377 break;
3378 case Op_VectorLoadMask:
3379 case Op_VectorMaskCast:
3380 if (size_in_bits == 256 && UseAVX < 2) {
3381 return false; // Implementation limitation
3382 }
3383 // fallthrough
3384 case Op_VectorStoreMask:
3385 if (vlen == 2) {
3386 return false; // Implementation limitation
3387 }
3388 break;
3389 case Op_PopulateIndex:
3390 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3391 return false;
3392 }
3393 break;
3394 case Op_VectorCastB2X:
3395 case Op_VectorCastS2X:
3396 case Op_VectorCastI2X:
3397 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3398 return false;
3399 }
3400 break;
3401 case Op_VectorCastL2X:
3402 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3403 return false;
3404 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3405 return false;
3406 }
3407 break;
3408 case Op_VectorCastF2X: {
3409 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3410 // happen after intermediate conversion to integer and special handling
3411 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3412 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3413 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3414 return false;
3415 }
3416 }
3417 // fallthrough
3418 case Op_VectorCastD2X:
3419 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3420 return false;
3421 }
3422 break;
3423 case Op_VectorCastF2HF:
3424 case Op_VectorCastHF2F:
3425 if (!VM_Version::supports_f16c() &&
3426 ((!VM_Version::supports_evex() ||
3427 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3428 return false;
3429 }
3430 break;
3431 case Op_RoundVD:
3432 if (!VM_Version::supports_avx512dq()) {
3433 return false;
3434 }
3435 break;
3436 case Op_MulReductionVI:
3437 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3438 return false;
3439 }
3440 break;
3441 case Op_LoadVectorGatherMasked:
3442 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3443 return false;
3444 }
3445 if (is_subword_type(bt) &&
3446 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3447 (size_in_bits < 64) ||
3448 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3449 return false;
3450 }
3451 break;
3452 case Op_StoreVectorScatterMasked:
3453 case Op_StoreVectorScatter:
3454 if (is_subword_type(bt)) {
3455 return false;
3456 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3457 return false;
3458 }
3459 // fallthrough
3460 case Op_LoadVectorGather:
3461 if (!is_subword_type(bt) && size_in_bits == 64) {
3462 return false;
3463 }
3464 if (is_subword_type(bt) && size_in_bits < 64) {
3465 return false;
3466 }
3467 break;
3468 case Op_SaturatingAddV:
3469 case Op_SaturatingSubV:
3470 if (UseAVX < 1) {
3471 return false; // Implementation limitation
3472 }
3473 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3474 return false;
3475 }
3476 break;
3477 case Op_SelectFromTwoVector:
3478 if (size_in_bits < 128) {
3479 return false;
3480 }
3481 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3482 return false;
3483 }
3484 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3485 return false;
3486 }
3487 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3488 return false;
3489 }
3490 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3491 return false;
3492 }
3493 break;
3494 case Op_MaskAll:
3495 if (!VM_Version::supports_evex()) {
3496 return false;
3497 }
3498 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3499 return false;
3500 }
3501 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3502 return false;
3503 }
3504 break;
3505 case Op_VectorMaskCmp:
3506 if (vlen < 2 || size_in_bits < 32) {
3507 return false;
3508 }
3509 break;
3510 case Op_CompressM:
3511 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3512 return false;
3513 }
3514 break;
3515 case Op_CompressV:
3516 case Op_ExpandV:
3517 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3518 return false;
3519 }
3520 if (size_in_bits < 128 ) {
3521 return false;
3522 }
3523 case Op_VectorLongToMask:
3524 if (UseAVX < 1) {
3525 return false;
3526 }
3527 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3528 return false;
3529 }
3530 break;
3531 case Op_SignumVD:
3532 case Op_SignumVF:
3533 if (UseAVX < 1) {
3534 return false;
3535 }
3536 break;
3537 case Op_PopCountVI:
3538 case Op_PopCountVL: {
3539 if (!is_pop_count_instr_target(bt) &&
3540 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3541 return false;
3542 }
3543 }
3544 break;
3545 case Op_ReverseV:
3546 case Op_ReverseBytesV:
3547 if (UseAVX < 2) {
3548 return false;
3549 }
3550 break;
3551 case Op_CountTrailingZerosV:
3552 case Op_CountLeadingZerosV:
3553 if (UseAVX < 2) {
3554 return false;
3555 }
3556 break;
3557 }
3558 return true; // Per default match rules are supported.
3559 }
3560
3561 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3562 // ADLC based match_rule_supported routine checks for the existence of pattern based
3563 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3564 // of their non-masked counterpart with mask edge being the differentiator.
3565 // This routine does a strict check on the existence of masked operation patterns
3566 // by returning a default false value for all the other opcodes apart from the
3567 // ones whose masked instruction patterns are defined in this file.
3568 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3569 return false;
3570 }
3571
3572 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3573 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3574 return false;
3575 }
3576 switch(opcode) {
3577 // Unary masked operations
3578 case Op_AbsVB:
3579 case Op_AbsVS:
3580 if(!VM_Version::supports_avx512bw()) {
3581 return false; // Implementation limitation
3582 }
3583 case Op_AbsVI:
3584 case Op_AbsVL:
3585 return true;
3586
3587 // Ternary masked operations
3588 case Op_FmaVF:
3589 case Op_FmaVD:
3590 return true;
3591
3592 case Op_MacroLogicV:
3593 if(bt != T_INT && bt != T_LONG) {
3594 return false;
3595 }
3596 return true;
3597
3598 // Binary masked operations
3599 case Op_AddVB:
3600 case Op_AddVS:
3601 case Op_SubVB:
3602 case Op_SubVS:
3603 case Op_MulVS:
3604 case Op_LShiftVS:
3605 case Op_RShiftVS:
3606 case Op_URShiftVS:
3607 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3608 if (!VM_Version::supports_avx512bw()) {
3609 return false; // Implementation limitation
3610 }
3611 return true;
3612
3613 case Op_MulVL:
3614 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3615 if (!VM_Version::supports_avx512dq()) {
3616 return false; // Implementation limitation
3617 }
3618 return true;
3619
3620 case Op_AndV:
3621 case Op_OrV:
3622 case Op_XorV:
3623 case Op_RotateRightV:
3624 case Op_RotateLeftV:
3625 if (bt != T_INT && bt != T_LONG) {
3626 return false; // Implementation limitation
3627 }
3628 return true;
3629
3630 case Op_VectorLoadMask:
3631 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3632 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3633 return false;
3634 }
3635 return true;
3636
3637 case Op_AddVI:
3638 case Op_AddVL:
3639 case Op_AddVF:
3640 case Op_AddVD:
3641 case Op_SubVI:
3642 case Op_SubVL:
3643 case Op_SubVF:
3644 case Op_SubVD:
3645 case Op_MulVI:
3646 case Op_MulVF:
3647 case Op_MulVD:
3648 case Op_DivVF:
3649 case Op_DivVD:
3650 case Op_SqrtVF:
3651 case Op_SqrtVD:
3652 case Op_LShiftVI:
3653 case Op_LShiftVL:
3654 case Op_RShiftVI:
3655 case Op_RShiftVL:
3656 case Op_URShiftVI:
3657 case Op_URShiftVL:
3658 case Op_LoadVectorMasked:
3659 case Op_StoreVectorMasked:
3660 case Op_LoadVectorGatherMasked:
3661 case Op_StoreVectorScatterMasked:
3662 return true;
3663
3664 case Op_UMinV:
3665 case Op_UMaxV:
3666 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3667 return false;
3668 } // fallthrough
3669 case Op_MaxV:
3670 case Op_MinV:
3671 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3672 return false; // Implementation limitation
3673 }
3674 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3675 return false; // Implementation limitation
3676 }
3677 return true;
3678 case Op_SaturatingAddV:
3679 case Op_SaturatingSubV:
3680 if (!is_subword_type(bt)) {
3681 return false;
3682 }
3683 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3684 return false; // Implementation limitation
3685 }
3686 return true;
3687
3688 case Op_VectorMaskCmp:
3689 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3690 return false; // Implementation limitation
3691 }
3692 return true;
3693
3694 case Op_VectorRearrange:
3695 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3696 return false; // Implementation limitation
3697 }
3698 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3699 return false; // Implementation limitation
3700 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3701 return false; // Implementation limitation
3702 }
3703 return true;
3704
3705 // Binary Logical operations
3706 case Op_AndVMask:
3707 case Op_OrVMask:
3708 case Op_XorVMask:
3709 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3710 return false; // Implementation limitation
3711 }
3712 return true;
3713
3714 case Op_PopCountVI:
3715 case Op_PopCountVL:
3716 if (!is_pop_count_instr_target(bt)) {
3717 return false;
3718 }
3719 return true;
3720
3721 case Op_MaskAll:
3722 return true;
3723
3724 case Op_CountLeadingZerosV:
3725 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3726 return true;
3727 }
3728 default:
3729 return false;
3730 }
3731 }
3732
3733 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3734 return false;
3735 }
3736
3737 // Return true if Vector::rearrange needs preparation of the shuffle argument
3738 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3739 switch (elem_bt) {
3740 case T_BYTE: return false;
3741 case T_SHORT: return !VM_Version::supports_avx512bw();
3742 case T_INT: return !VM_Version::supports_avx();
3743 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3744 default:
3745 ShouldNotReachHere();
3746 return false;
3747 }
3748 }
3749
3750 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3751 // Prefer predicate if the mask type is "TypeVectMask".
3752 return vt->isa_vectmask() != nullptr;
3753 }
3754
3755 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3756 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3757 bool legacy = (generic_opnd->opcode() == LEGVEC);
3758 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3759 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3760 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3761 return new legVecZOper();
3762 }
3763 if (legacy) {
3764 switch (ideal_reg) {
3765 case Op_VecS: return new legVecSOper();
3766 case Op_VecD: return new legVecDOper();
3767 case Op_VecX: return new legVecXOper();
3768 case Op_VecY: return new legVecYOper();
3769 case Op_VecZ: return new legVecZOper();
3770 }
3771 } else {
3772 switch (ideal_reg) {
3773 case Op_VecS: return new vecSOper();
3774 case Op_VecD: return new vecDOper();
3775 case Op_VecX: return new vecXOper();
3776 case Op_VecY: return new vecYOper();
3777 case Op_VecZ: return new vecZOper();
3778 }
3779 }
3780 ShouldNotReachHere();
3781 return nullptr;
3782 }
3783
3784 bool Matcher::is_reg2reg_move(MachNode* m) {
3785 switch (m->rule()) {
3786 case MoveVec2Leg_rule:
3787 case MoveLeg2Vec_rule:
3788 case MoveF2VL_rule:
3789 case MoveF2LEG_rule:
3790 case MoveVL2F_rule:
3791 case MoveLEG2F_rule:
3792 case MoveD2VL_rule:
3793 case MoveD2LEG_rule:
3794 case MoveVL2D_rule:
3795 case MoveLEG2D_rule:
3796 return true;
3797 default:
3798 return false;
3799 }
3800 }
3801
3802 bool Matcher::is_generic_vector(MachOper* opnd) {
3803 switch (opnd->opcode()) {
3804 case VEC:
3805 case LEGVEC:
3806 return true;
3807 default:
3808 return false;
3809 }
3810 }
3811
3812 //------------------------------------------------------------------------
3813
3814 const RegMask* Matcher::predicate_reg_mask(void) {
3815 return &_VECTMASK_REG_mask;
3816 }
3817
3818 // Max vector size in bytes. 0 if not supported.
3819 int Matcher::vector_width_in_bytes(BasicType bt) {
3820 assert(is_java_primitive(bt), "only primitive type vectors");
3821 // SSE2 supports 128bit vectors for all types.
3822 // AVX2 supports 256bit vectors for all types.
3823 // AVX2/EVEX supports 512bit vectors for all types.
3824 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3825 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3826 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3827 size = (UseAVX > 2) ? 64 : 32;
3828 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3829 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3830 // Use flag to limit vector size.
3831 size = MIN2(size,(int)MaxVectorSize);
3832 // Minimum 2 values in vector (or 4 for bytes).
3833 switch (bt) {
3834 case T_DOUBLE:
3835 case T_LONG:
3836 if (size < 16) return 0;
3837 break;
3838 case T_FLOAT:
3839 case T_INT:
3840 if (size < 8) return 0;
3841 break;
3842 case T_BOOLEAN:
3843 if (size < 4) return 0;
3844 break;
3845 case T_CHAR:
3846 if (size < 4) return 0;
3847 break;
3848 case T_BYTE:
3849 if (size < 4) return 0;
3850 break;
3851 case T_SHORT:
3852 if (size < 4) return 0;
3853 break;
3854 default:
3855 ShouldNotReachHere();
3856 }
3857 return size;
3858 }
3859
3860 // Limits on vector size (number of elements) loaded into vector.
3861 int Matcher::max_vector_size(const BasicType bt) {
3862 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3863 }
3864 int Matcher::min_vector_size(const BasicType bt) {
3865 int max_size = max_vector_size(bt);
3866 // Min size which can be loaded into vector is 4 bytes.
3867 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3868 // Support for calling svml double64 vectors
3869 if (bt == T_DOUBLE) {
3870 size = 1;
3871 }
3872 return MIN2(size,max_size);
3873 }
3874
3875 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3876 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3877 // by default on Cascade Lake
3878 if (VM_Version::is_default_intel_cascade_lake()) {
3879 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3880 }
3881 return Matcher::max_vector_size(bt);
3882 }
3883
3884 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3885 return -1;
3886 }
3887
3888 // Vector ideal reg corresponding to specified size in bytes
3889 uint Matcher::vector_ideal_reg(int size) {
3890 assert(MaxVectorSize >= size, "");
3891 switch(size) {
3892 case 4: return Op_VecS;
3893 case 8: return Op_VecD;
3894 case 16: return Op_VecX;
3895 case 32: return Op_VecY;
3896 case 64: return Op_VecZ;
3897 }
3898 ShouldNotReachHere();
3899 return 0;
3900 }
3901
3902 // Check for shift by small constant as well
3903 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3904 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3905 shift->in(2)->get_int() <= 3 &&
3906 // Are there other uses besides address expressions?
3907 !matcher->is_visited(shift)) {
3908 address_visited.set(shift->_idx); // Flag as address_visited
3909 mstack.push(shift->in(2), Matcher::Visit);
3910 Node *conv = shift->in(1);
3911 // Allow Matcher to match the rule which bypass
3912 // ConvI2L operation for an array index on LP64
3913 // if the index value is positive.
3914 if (conv->Opcode() == Op_ConvI2L &&
3915 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3916 // Are there other uses besides address expressions?
3917 !matcher->is_visited(conv)) {
3918 address_visited.set(conv->_idx); // Flag as address_visited
3919 mstack.push(conv->in(1), Matcher::Pre_Visit);
3920 } else {
3921 mstack.push(conv, Matcher::Pre_Visit);
3922 }
3923 return true;
3924 }
3925 return false;
3926 }
3927
3928 // This function identifies sub-graphs in which a 'load' node is
3929 // input to two different nodes, and such that it can be matched
3930 // with BMI instructions like blsi, blsr, etc.
3931 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3932 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3933 // refers to the same node.
3934 //
3935 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3936 // This is a temporary solution until we make DAGs expressible in ADL.
3937 template<typename ConType>
3938 class FusedPatternMatcher {
3939 Node* _op1_node;
3940 Node* _mop_node;
3941 int _con_op;
3942
3943 static int match_next(Node* n, int next_op, int next_op_idx) {
3944 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3945 return -1;
3946 }
3947
3948 if (next_op_idx == -1) { // n is commutative, try rotations
3949 if (n->in(1)->Opcode() == next_op) {
3950 return 1;
3951 } else if (n->in(2)->Opcode() == next_op) {
3952 return 2;
3953 }
3954 } else {
3955 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3956 if (n->in(next_op_idx)->Opcode() == next_op) {
3957 return next_op_idx;
3958 }
3959 }
3960 return -1;
3961 }
3962
3963 public:
3964 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3965 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3966
3967 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3968 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3969 typename ConType::NativeType con_value) {
3970 if (_op1_node->Opcode() != op1) {
3971 return false;
3972 }
3973 if (_mop_node->outcnt() > 2) {
3974 return false;
3975 }
3976 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3977 if (op1_op2_idx == -1) {
3978 return false;
3979 }
3980 // Memory operation must be the other edge
3981 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3982
3983 // Check that the mop node is really what we want
3984 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3985 Node* op2_node = _op1_node->in(op1_op2_idx);
3986 if (op2_node->outcnt() > 1) {
3987 return false;
3988 }
3989 assert(op2_node->Opcode() == op2, "Should be");
3990 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3991 if (op2_con_idx == -1) {
3992 return false;
3993 }
3994 // Memory operation must be the other edge
3995 int op2_mop_idx = (op2_con_idx & 1) + 1;
3996 // Check that the memory operation is the same node
3997 if (op2_node->in(op2_mop_idx) == _mop_node) {
3998 // Now check the constant
3999 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4000 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4001 return true;
4002 }
4003 }
4004 }
4005 return false;
4006 }
4007 };
4008
4009 static bool is_bmi_pattern(Node* n, Node* m) {
4010 assert(UseBMI1Instructions, "sanity");
4011 if (n != nullptr && m != nullptr) {
4012 if (m->Opcode() == Op_LoadI) {
4013 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4014 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4015 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4016 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4017 } else if (m->Opcode() == Op_LoadL) {
4018 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4019 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4020 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4021 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4022 }
4023 }
4024 return false;
4025 }
4026
4027 // Should the matcher clone input 'm' of node 'n'?
4028 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4029 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4030 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4031 mstack.push(m, Visit);
4032 return true;
4033 }
4034 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4035 mstack.push(m, Visit); // m = ShiftCntV
4036 return true;
4037 }
4038 if (is_encode_and_store_pattern(n, m)) {
4039 mstack.push(m, Visit);
4040 return true;
4041 }
4042 return false;
4043 }
4044
4045 // Should the Matcher clone shifts on addressing modes, expecting them
4046 // to be subsumed into complex addressing expressions or compute them
4047 // into registers?
4048 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4049 Node *off = m->in(AddPNode::Offset);
4050 if (off->is_Con()) {
4051 address_visited.test_set(m->_idx); // Flag as address_visited
4052 Node *adr = m->in(AddPNode::Address);
4053
4054 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4055 // AtomicAdd is not an addressing expression.
4056 // Cheap to find it by looking for screwy base.
4057 if (adr->is_AddP() &&
4058 !adr->in(AddPNode::Base)->is_top() &&
4059 !adr->in(AddPNode::Offset)->is_Con() &&
4060 off->get_long() == (int) (off->get_long()) && // immL32
4061 // Are there other uses besides address expressions?
4062 !is_visited(adr)) {
4063 address_visited.set(adr->_idx); // Flag as address_visited
4064 Node *shift = adr->in(AddPNode::Offset);
4065 if (!clone_shift(shift, this, mstack, address_visited)) {
4066 mstack.push(shift, Pre_Visit);
4067 }
4068 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4069 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4070 } else {
4071 mstack.push(adr, Pre_Visit);
4072 }
4073
4074 // Clone X+offset as it also folds into most addressing expressions
4075 mstack.push(off, Visit);
4076 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4077 return true;
4078 } else if (clone_shift(off, this, mstack, address_visited)) {
4079 address_visited.test_set(m->_idx); // Flag as address_visited
4080 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4081 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4082 return true;
4083 }
4084 return false;
4085 }
4086
4087 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4088 switch (bt) {
4089 case BoolTest::eq:
4090 return Assembler::eq;
4091 case BoolTest::ne:
4092 return Assembler::neq;
4093 case BoolTest::le:
4094 case BoolTest::ule:
4095 return Assembler::le;
4096 case BoolTest::ge:
4097 case BoolTest::uge:
4098 return Assembler::nlt;
4099 case BoolTest::lt:
4100 case BoolTest::ult:
4101 return Assembler::lt;
4102 case BoolTest::gt:
4103 case BoolTest::ugt:
4104 return Assembler::nle;
4105 default : ShouldNotReachHere(); return Assembler::_false;
4106 }
4107 }
4108
4109 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4110 switch (bt) {
4111 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4112 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4113 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4114 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4115 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4116 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4117 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4118 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4119 }
4120 }
4121
4122 // Helper methods for MachSpillCopyNode::implementation().
4123 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4124 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4125 assert(ireg == Op_VecS || // 32bit vector
4126 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4127 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4128 "no non-adjacent vector moves" );
4129 if (masm) {
4130 switch (ireg) {
4131 case Op_VecS: // copy whole register
4132 case Op_VecD:
4133 case Op_VecX:
4134 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4135 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4136 } else {
4137 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4138 }
4139 break;
4140 case Op_VecY:
4141 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4142 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4143 } else {
4144 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4145 }
4146 break;
4147 case Op_VecZ:
4148 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4149 break;
4150 default:
4151 ShouldNotReachHere();
4152 }
4153 #ifndef PRODUCT
4154 } else {
4155 switch (ireg) {
4156 case Op_VecS:
4157 case Op_VecD:
4158 case Op_VecX:
4159 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4160 break;
4161 case Op_VecY:
4162 case Op_VecZ:
4163 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4164 break;
4165 default:
4166 ShouldNotReachHere();
4167 }
4168 #endif
4169 }
4170 }
4171
4172 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4173 int stack_offset, int reg, uint ireg, outputStream* st) {
4174 if (masm) {
4175 if (is_load) {
4176 switch (ireg) {
4177 case Op_VecS:
4178 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4179 break;
4180 case Op_VecD:
4181 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4182 break;
4183 case Op_VecX:
4184 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4185 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4186 } else {
4187 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4188 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4189 }
4190 break;
4191 case Op_VecY:
4192 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4193 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4194 } else {
4195 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4196 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4197 }
4198 break;
4199 case Op_VecZ:
4200 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4201 break;
4202 default:
4203 ShouldNotReachHere();
4204 }
4205 } else { // store
4206 switch (ireg) {
4207 case Op_VecS:
4208 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4209 break;
4210 case Op_VecD:
4211 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4212 break;
4213 case Op_VecX:
4214 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4215 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4216 }
4217 else {
4218 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4219 }
4220 break;
4221 case Op_VecY:
4222 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4223 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4224 }
4225 else {
4226 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4227 }
4228 break;
4229 case Op_VecZ:
4230 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4231 break;
4232 default:
4233 ShouldNotReachHere();
4234 }
4235 }
4236 #ifndef PRODUCT
4237 } else {
4238 if (is_load) {
4239 switch (ireg) {
4240 case Op_VecS:
4241 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4242 break;
4243 case Op_VecD:
4244 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4245 break;
4246 case Op_VecX:
4247 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4248 break;
4249 case Op_VecY:
4250 case Op_VecZ:
4251 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4252 break;
4253 default:
4254 ShouldNotReachHere();
4255 }
4256 } else { // store
4257 switch (ireg) {
4258 case Op_VecS:
4259 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4260 break;
4261 case Op_VecD:
4262 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4263 break;
4264 case Op_VecX:
4265 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4266 break;
4267 case Op_VecY:
4268 case Op_VecZ:
4269 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4270 break;
4271 default:
4272 ShouldNotReachHere();
4273 }
4274 }
4275 #endif
4276 }
4277 }
4278
4279 template <class T>
4280 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4281 int size = type2aelembytes(bt) * len;
4282 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4283 for (int i = 0; i < len; i++) {
4284 int offset = i * type2aelembytes(bt);
4285 switch (bt) {
4286 case T_BYTE: val->at(i) = con; break;
4287 case T_SHORT: {
4288 jshort c = con;
4289 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4290 break;
4291 }
4292 case T_INT: {
4293 jint c = con;
4294 memcpy(val->adr_at(offset), &c, sizeof(jint));
4295 break;
4296 }
4297 case T_LONG: {
4298 jlong c = con;
4299 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4300 break;
4301 }
4302 case T_FLOAT: {
4303 jfloat c = con;
4304 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4305 break;
4306 }
4307 case T_DOUBLE: {
4308 jdouble c = con;
4309 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4310 break;
4311 }
4312 default: assert(false, "%s", type2name(bt));
4313 }
4314 }
4315 return val;
4316 }
4317
4318 static inline jlong high_bit_set(BasicType bt) {
4319 switch (bt) {
4320 case T_BYTE: return 0x8080808080808080;
4321 case T_SHORT: return 0x8000800080008000;
4322 case T_INT: return 0x8000000080000000;
4323 case T_LONG: return 0x8000000000000000;
4324 default:
4325 ShouldNotReachHere();
4326 return 0;
4327 }
4328 }
4329
4330 #ifndef PRODUCT
4331 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4332 st->print("nop \t# %d bytes pad for loops and calls", _count);
4333 }
4334 #endif
4335
4336 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4337 __ nop(_count);
4338 }
4339
4340 uint MachNopNode::size(PhaseRegAlloc*) const {
4341 return _count;
4342 }
4343
4344 #ifndef PRODUCT
4345 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4346 st->print("# breakpoint");
4347 }
4348 #endif
4349
4350 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4351 __ int3();
4352 }
4353
4354 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4355 return MachNode::size(ra_);
4356 }
4357
4358 %}
4359
4360 //----------ENCODING BLOCK-----------------------------------------------------
4361 // This block specifies the encoding classes used by the compiler to
4362 // output byte streams. Encoding classes are parameterized macros
4363 // used by Machine Instruction Nodes in order to generate the bit
4364 // encoding of the instruction. Operands specify their base encoding
4365 // interface with the interface keyword. There are currently
4366 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4367 // COND_INTER. REG_INTER causes an operand to generate a function
4368 // which returns its register number when queried. CONST_INTER causes
4369 // an operand to generate a function which returns the value of the
4370 // constant when queried. MEMORY_INTER causes an operand to generate
4371 // four functions which return the Base Register, the Index Register,
4372 // the Scale Value, and the Offset Value of the operand when queried.
4373 // COND_INTER causes an operand to generate six functions which return
4374 // the encoding code (ie - encoding bits for the instruction)
4375 // associated with each basic boolean condition for a conditional
4376 // instruction.
4377 //
4378 // Instructions specify two basic values for encoding. Again, a
4379 // function is available to check if the constant displacement is an
4380 // oop. They use the ins_encode keyword to specify their encoding
4381 // classes (which must be a sequence of enc_class names, and their
4382 // parameters, specified in the encoding block), and they use the
4383 // opcode keyword to specify, in order, their primary, secondary, and
4384 // tertiary opcode. Only the opcode sections which a particular
4385 // instruction needs for encoding need to be specified.
4386 encode %{
4387 enc_class cdql_enc(no_rax_rdx_RegI div)
4388 %{
4389 // Full implementation of Java idiv and irem; checks for
4390 // special case as described in JVM spec., p.243 & p.271.
4391 //
4392 // normal case special case
4393 //
4394 // input : rax: dividend min_int
4395 // reg: divisor -1
4396 //
4397 // output: rax: quotient (= rax idiv reg) min_int
4398 // rdx: remainder (= rax irem reg) 0
4399 //
4400 // Code sequnce:
4401 //
4402 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4403 // 5: 75 07/08 jne e <normal>
4404 // 7: 33 d2 xor %edx,%edx
4405 // [div >= 8 -> offset + 1]
4406 // [REX_B]
4407 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4408 // c: 74 03/04 je 11 <done>
4409 // 000000000000000e <normal>:
4410 // e: 99 cltd
4411 // [div >= 8 -> offset + 1]
4412 // [REX_B]
4413 // f: f7 f9 idiv $div
4414 // 0000000000000011 <done>:
4415 Label normal;
4416 Label done;
4417
4418 // cmp $0x80000000,%eax
4419 __ cmpl(as_Register(RAX_enc), 0x80000000);
4420
4421 // jne e <normal>
4422 __ jccb(Assembler::notEqual, normal);
4423
4424 // xor %edx,%edx
4425 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4426
4427 // cmp $0xffffffffffffffff,%ecx
4428 __ cmpl($div$$Register, -1);
4429
4430 // je 11 <done>
4431 __ jccb(Assembler::equal, done);
4432
4433 // <normal>
4434 // cltd
4435 __ bind(normal);
4436 __ cdql();
4437
4438 // idivl
4439 // <done>
4440 __ idivl($div$$Register);
4441 __ bind(done);
4442 %}
4443
4444 enc_class cdqq_enc(no_rax_rdx_RegL div)
4445 %{
4446 // Full implementation of Java ldiv and lrem; checks for
4447 // special case as described in JVM spec., p.243 & p.271.
4448 //
4449 // normal case special case
4450 //
4451 // input : rax: dividend min_long
4452 // reg: divisor -1
4453 //
4454 // output: rax: quotient (= rax idiv reg) min_long
4455 // rdx: remainder (= rax irem reg) 0
4456 //
4457 // Code sequnce:
4458 //
4459 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4460 // 7: 00 00 80
4461 // a: 48 39 d0 cmp %rdx,%rax
4462 // d: 75 08 jne 17 <normal>
4463 // f: 33 d2 xor %edx,%edx
4464 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4465 // 15: 74 05 je 1c <done>
4466 // 0000000000000017 <normal>:
4467 // 17: 48 99 cqto
4468 // 19: 48 f7 f9 idiv $div
4469 // 000000000000001c <done>:
4470 Label normal;
4471 Label done;
4472
4473 // mov $0x8000000000000000,%rdx
4474 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4475
4476 // cmp %rdx,%rax
4477 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4478
4479 // jne 17 <normal>
4480 __ jccb(Assembler::notEqual, normal);
4481
4482 // xor %edx,%edx
4483 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4484
4485 // cmp $0xffffffffffffffff,$div
4486 __ cmpq($div$$Register, -1);
4487
4488 // je 1e <done>
4489 __ jccb(Assembler::equal, done);
4490
4491 // <normal>
4492 // cqto
4493 __ bind(normal);
4494 __ cdqq();
4495
4496 // idivq (note: must be emitted by the user of this rule)
4497 // <done>
4498 __ idivq($div$$Register);
4499 __ bind(done);
4500 %}
4501
4502 enc_class clear_avx %{
4503 DEBUG_ONLY(int off0 = __ offset());
4504 if (generate_vzeroupper(Compile::current())) {
4505 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4506 // Clear upper bits of YMM registers when current compiled code uses
4507 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4508 __ vzeroupper();
4509 }
4510 DEBUG_ONLY(int off1 = __ offset());
4511 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4512 %}
4513
4514 enc_class Java_To_Runtime(method meth) %{
4515 __ lea(r10, RuntimeAddress((address)$meth$$method));
4516 __ call(r10);
4517 __ post_call_nop();
4518 %}
4519
4520 enc_class Java_Static_Call(method meth)
4521 %{
4522 // JAVA STATIC CALL
4523 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4524 // determine who we intended to call.
4525 if (!_method) {
4526 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4527 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4528 // The NOP here is purely to ensure that eliding a call to
4529 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4530 __ addr_nop_5();
4531 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4532 } else {
4533 int method_index = resolved_method_index(masm);
4534 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4535 : static_call_Relocation::spec(method_index);
4536 address mark = __ pc();
4537 int call_offset = __ offset();
4538 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4539 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4540 // Calls of the same statically bound method can share
4541 // a stub to the interpreter.
4542 __ code()->shared_stub_to_interp_for(_method, call_offset);
4543 } else {
4544 // Emit stubs for static call.
4545 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4546 __ clear_inst_mark();
4547 if (stub == nullptr) {
4548 ciEnv::current()->record_failure("CodeCache is full");
4549 return;
4550 }
4551 }
4552 }
4553 __ post_call_nop();
4554 %}
4555
4556 enc_class Java_Dynamic_Call(method meth) %{
4557 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4558 __ post_call_nop();
4559 %}
4560
4561 enc_class call_epilog %{
4562 if (VerifyStackAtCalls) {
4563 // Check that stack depth is unchanged: find majik cookie on stack
4564 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4565 Label L;
4566 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4567 __ jccb(Assembler::equal, L);
4568 // Die if stack mismatch
4569 __ int3();
4570 __ bind(L);
4571 }
4572 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4573 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4574 // Search for the corresponding projection, get the register and emit code that initialized it.
4575 uint con = (tf()->range_cc()->cnt() - 1);
4576 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4577 ProjNode* proj = fast_out(i)->as_Proj();
4578 if (proj->_con == con) {
4579 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4580 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4581 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4582 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4583 __ testq(rax, rax);
4584 __ setb(Assembler::notZero, toReg);
4585 __ movzbl(toReg, toReg);
4586 if (reg->is_stack()) {
4587 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4588 __ movq(Address(rsp, st_off), toReg);
4589 }
4590 break;
4591 }
4592 }
4593 if (return_value_is_used()) {
4594 // An inline type is returned as fields in multiple registers.
4595 // Rax either contains an oop if the inline type is buffered or a pointer
4596 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4597 // if the lowest bit is set to allow C2 to use the oop after null checking.
4598 // rax &= (rax & 1) - 1
4599 __ movptr(rscratch1, rax);
4600 __ andptr(rscratch1, 0x1);
4601 __ subptr(rscratch1, 0x1);
4602 __ andptr(rax, rscratch1);
4603 }
4604 }
4605 %}
4606
4607 %}
4608
4609 //----------FRAME--------------------------------------------------------------
4610 // Definition of frame structure and management information.
4611 //
4612 // S T A C K L A Y O U T Allocators stack-slot number
4613 // | (to get allocators register number
4614 // G Owned by | | v add OptoReg::stack0())
4615 // r CALLER | |
4616 // o | +--------+ pad to even-align allocators stack-slot
4617 // w V | pad0 | numbers; owned by CALLER
4618 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4619 // h ^ | in | 5
4620 // | | args | 4 Holes in incoming args owned by SELF
4621 // | | | | 3
4622 // | | +--------+
4623 // V | | old out| Empty on Intel, window on Sparc
4624 // | old |preserve| Must be even aligned.
4625 // | SP-+--------+----> Matcher::_old_SP, even aligned
4626 // | | in | 3 area for Intel ret address
4627 // Owned by |preserve| Empty on Sparc.
4628 // SELF +--------+
4629 // | | pad2 | 2 pad to align old SP
4630 // | +--------+ 1
4631 // | | locks | 0
4632 // | +--------+----> OptoReg::stack0(), even aligned
4633 // | | pad1 | 11 pad to align new SP
4634 // | +--------+
4635 // | | | 10
4636 // | | spills | 9 spills
4637 // V | | 8 (pad0 slot for callee)
4638 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4639 // ^ | out | 7
4640 // | | args | 6 Holes in outgoing args owned by CALLEE
4641 // Owned by +--------+
4642 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4643 // | new |preserve| Must be even-aligned.
4644 // | SP-+--------+----> Matcher::_new_SP, even aligned
4645 // | | |
4646 //
4647 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4648 // known from SELF's arguments and the Java calling convention.
4649 // Region 6-7 is determined per call site.
4650 // Note 2: If the calling convention leaves holes in the incoming argument
4651 // area, those holes are owned by SELF. Holes in the outgoing area
4652 // are owned by the CALLEE. Holes should not be necessary in the
4653 // incoming area, as the Java calling convention is completely under
4654 // the control of the AD file. Doubles can be sorted and packed to
4655 // avoid holes. Holes in the outgoing arguments may be necessary for
4656 // varargs C calling conventions.
4657 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4658 // even aligned with pad0 as needed.
4659 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4660 // region 6-11 is even aligned; it may be padded out more so that
4661 // the region from SP to FP meets the minimum stack alignment.
4662 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4663 // alignment. Region 11, pad1, may be dynamically extended so that
4664 // SP meets the minimum alignment.
4665
4666 frame
4667 %{
4668 // These three registers define part of the calling convention
4669 // between compiled code and the interpreter.
4670 inline_cache_reg(RAX); // Inline Cache Register
4671
4672 // Optional: name the operand used by cisc-spilling to access
4673 // [stack_pointer + offset]
4674 cisc_spilling_operand_name(indOffset32);
4675
4676 // Number of stack slots consumed by locking an object
4677 sync_stack_slots(2);
4678
4679 // Compiled code's Frame Pointer
4680 frame_pointer(RSP);
4681
4682 // Interpreter stores its frame pointer in a register which is
4683 // stored to the stack by I2CAdaptors.
4684 // I2CAdaptors convert from interpreted java to compiled java.
4685 interpreter_frame_pointer(RBP);
4686
4687 // Stack alignment requirement
4688 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4689
4690 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4691 // for calls to C. Supports the var-args backing area for register parms.
4692 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4693
4694 // The after-PROLOG location of the return address. Location of
4695 // return address specifies a type (REG or STACK) and a number
4696 // representing the register number (i.e. - use a register name) or
4697 // stack slot.
4698 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4699 // Otherwise, it is above the locks and verification slot and alignment word
4700 return_addr(STACK - 2 +
4701 align_up((Compile::current()->in_preserve_stack_slots() +
4702 Compile::current()->fixed_slots()),
4703 stack_alignment_in_slots()));
4704
4705 // Location of compiled Java return values. Same as C for now.
4706 return_value
4707 %{
4708 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4709 "only return normal values");
4710
4711 static const int lo[Op_RegL + 1] = {
4712 0,
4713 0,
4714 RAX_num, // Op_RegN
4715 RAX_num, // Op_RegI
4716 RAX_num, // Op_RegP
4717 XMM0_num, // Op_RegF
4718 XMM0_num, // Op_RegD
4719 RAX_num // Op_RegL
4720 };
4721 static const int hi[Op_RegL + 1] = {
4722 0,
4723 0,
4724 OptoReg::Bad, // Op_RegN
4725 OptoReg::Bad, // Op_RegI
4726 RAX_H_num, // Op_RegP
4727 OptoReg::Bad, // Op_RegF
4728 XMM0b_num, // Op_RegD
4729 RAX_H_num // Op_RegL
4730 };
4731 // Excluded flags and vector registers.
4732 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4733 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4734 %}
4735 %}
4736
4737 //----------ATTRIBUTES---------------------------------------------------------
4738 //----------Operand Attributes-------------------------------------------------
4739 op_attrib op_cost(0); // Required cost attribute
4740
4741 //----------Instruction Attributes---------------------------------------------
4742 ins_attrib ins_cost(100); // Required cost attribute
4743 ins_attrib ins_size(8); // Required size attribute (in bits)
4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4745 // a non-matching short branch variant
4746 // of some long branch?
4747 ins_attrib ins_alignment(1); // Required alignment attribute (must
4748 // be a power of 2) specifies the
4749 // alignment that some part of the
4750 // instruction (not necessarily the
4751 // start) requires. If > 1, a
4752 // compute_padding() function must be
4753 // provided for the instruction
4754
4755 // Whether this node is expanded during code emission into a sequence of
4756 // instructions and the first instruction can perform an implicit null check.
4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4758
4759 //----------OPERANDS-----------------------------------------------------------
4760 // Operand definitions must precede instruction definitions for correct parsing
4761 // in the ADLC because operands constitute user defined types which are used in
4762 // instruction definitions.
4763
4764 //----------Simple Operands----------------------------------------------------
4765 // Immediate Operands
4766 // Integer Immediate
4767 operand immI()
4768 %{
4769 match(ConI);
4770
4771 op_cost(10);
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 // Constant for test vs zero
4777 operand immI_0()
4778 %{
4779 predicate(n->get_int() == 0);
4780 match(ConI);
4781
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Constant for increment
4788 operand immI_1()
4789 %{
4790 predicate(n->get_int() == 1);
4791 match(ConI);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Constant for decrement
4799 operand immI_M1()
4800 %{
4801 predicate(n->get_int() == -1);
4802 match(ConI);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immI_2()
4810 %{
4811 predicate(n->get_int() == 2);
4812 match(ConI);
4813
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 operand immI_4()
4820 %{
4821 predicate(n->get_int() == 4);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 operand immI_8()
4830 %{
4831 predicate(n->get_int() == 8);
4832 match(ConI);
4833
4834 op_cost(0);
4835 format %{ %}
4836 interface(CONST_INTER);
4837 %}
4838
4839 // Valid scale values for addressing modes
4840 operand immI2()
4841 %{
4842 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4843 match(ConI);
4844
4845 format %{ %}
4846 interface(CONST_INTER);
4847 %}
4848
4849 operand immU7()
4850 %{
4851 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4852 match(ConI);
4853
4854 op_cost(5);
4855 format %{ %}
4856 interface(CONST_INTER);
4857 %}
4858
4859 operand immI8()
4860 %{
4861 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4862 match(ConI);
4863
4864 op_cost(5);
4865 format %{ %}
4866 interface(CONST_INTER);
4867 %}
4868
4869 operand immU8()
4870 %{
4871 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4872 match(ConI);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879 operand immI16()
4880 %{
4881 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4882 match(ConI);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Int Immediate non-negative
4890 operand immU31()
4891 %{
4892 predicate(n->get_int() >= 0);
4893 match(ConI);
4894
4895 op_cost(0);
4896 format %{ %}
4897 interface(CONST_INTER);
4898 %}
4899
4900 // Pointer Immediate
4901 operand immP()
4902 %{
4903 match(ConP);
4904
4905 op_cost(10);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910 // Null Pointer Immediate
4911 operand immP0()
4912 %{
4913 predicate(n->get_ptr() == 0);
4914 match(ConP);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Pointer Immediate
4922 operand immN() %{
4923 match(ConN);
4924
4925 op_cost(10);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immNKlass() %{
4931 match(ConNKlass);
4932
4933 op_cost(10);
4934 format %{ %}
4935 interface(CONST_INTER);
4936 %}
4937
4938 // Null Pointer Immediate
4939 operand immN0() %{
4940 predicate(n->get_narrowcon() == 0);
4941 match(ConN);
4942
4943 op_cost(5);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 operand immP31()
4949 %{
4950 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4951 && (n->get_ptr() >> 31) == 0);
4952 match(ConP);
4953
4954 op_cost(5);
4955 format %{ %}
4956 interface(CONST_INTER);
4957 %}
4958
4959
4960 // Long Immediate
4961 operand immL()
4962 %{
4963 match(ConL);
4964
4965 op_cost(20);
4966 format %{ %}
4967 interface(CONST_INTER);
4968 %}
4969
4970 // Long Immediate 8-bit
4971 operand immL8()
4972 %{
4973 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4974 match(ConL);
4975
4976 op_cost(5);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate 32-bit unsigned
4982 operand immUL32()
4983 %{
4984 predicate(n->get_long() == (unsigned int) (n->get_long()));
4985 match(ConL);
4986
4987 op_cost(10);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Long Immediate 32-bit signed
4993 operand immL32()
4994 %{
4995 predicate(n->get_long() == (int) (n->get_long()));
4996 match(ConL);
4997
4998 op_cost(15);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003 operand immL_Pow2()
5004 %{
5005 predicate(is_power_of_2((julong)n->get_long()));
5006 match(ConL);
5007
5008 op_cost(15);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 operand immL_NotPow2()
5014 %{
5015 predicate(is_power_of_2((julong)~n->get_long()));
5016 match(ConL);
5017
5018 op_cost(15);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate zero
5024 operand immL0()
5025 %{
5026 predicate(n->get_long() == 0L);
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Constant for increment
5035 operand immL1()
5036 %{
5037 predicate(n->get_long() == 1);
5038 match(ConL);
5039
5040 format %{ %}
5041 interface(CONST_INTER);
5042 %}
5043
5044 // Constant for decrement
5045 operand immL_M1()
5046 %{
5047 predicate(n->get_long() == -1);
5048 match(ConL);
5049
5050 format %{ %}
5051 interface(CONST_INTER);
5052 %}
5053
5054 // Long Immediate: low 32-bit mask
5055 operand immL_32bits()
5056 %{
5057 predicate(n->get_long() == 0xFFFFFFFFL);
5058 match(ConL);
5059 op_cost(20);
5060
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Int Immediate: 2^n-1, positive
5066 operand immI_Pow2M1()
5067 %{
5068 predicate((n->get_int() > 0)
5069 && is_power_of_2((juint)n->get_int() + 1));
5070 match(ConI);
5071
5072 op_cost(20);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Float Immediate zero
5078 operand immF0()
5079 %{
5080 predicate(jint_cast(n->getf()) == 0);
5081 match(ConF);
5082
5083 op_cost(5);
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Float Immediate
5089 operand immF()
5090 %{
5091 match(ConF);
5092
5093 op_cost(15);
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Half Float Immediate
5099 operand immH()
5100 %{
5101 match(ConH);
5102
5103 op_cost(15);
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Double Immediate zero
5109 operand immD0()
5110 %{
5111 predicate(jlong_cast(n->getd()) == 0);
5112 match(ConD);
5113
5114 op_cost(5);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Double Immediate
5120 operand immD()
5121 %{
5122 match(ConD);
5123
5124 op_cost(15);
5125 format %{ %}
5126 interface(CONST_INTER);
5127 %}
5128
5129 // Immediates for special shifts (sign extend)
5130
5131 // Constants for increment
5132 operand immI_16()
5133 %{
5134 predicate(n->get_int() == 16);
5135 match(ConI);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand immI_24()
5142 %{
5143 predicate(n->get_int() == 24);
5144 match(ConI);
5145
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Constant for byte-wide masking
5151 operand immI_255()
5152 %{
5153 predicate(n->get_int() == 255);
5154 match(ConI);
5155
5156 format %{ %}
5157 interface(CONST_INTER);
5158 %}
5159
5160 // Constant for short-wide masking
5161 operand immI_65535()
5162 %{
5163 predicate(n->get_int() == 65535);
5164 match(ConI);
5165
5166 format %{ %}
5167 interface(CONST_INTER);
5168 %}
5169
5170 // Constant for byte-wide masking
5171 operand immL_255()
5172 %{
5173 predicate(n->get_long() == 255);
5174 match(ConL);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 // Constant for short-wide masking
5181 operand immL_65535()
5182 %{
5183 predicate(n->get_long() == 65535);
5184 match(ConL);
5185
5186 format %{ %}
5187 interface(CONST_INTER);
5188 %}
5189
5190 operand kReg()
5191 %{
5192 constraint(ALLOC_IN_RC(vectmask_reg));
5193 match(RegVectMask);
5194 format %{%}
5195 interface(REG_INTER);
5196 %}
5197
5198 // Register Operands
5199 // Integer Register
5200 operand rRegI()
5201 %{
5202 constraint(ALLOC_IN_RC(int_reg));
5203 match(RegI);
5204
5205 match(rax_RegI);
5206 match(rbx_RegI);
5207 match(rcx_RegI);
5208 match(rdx_RegI);
5209 match(rdi_RegI);
5210
5211 format %{ %}
5212 interface(REG_INTER);
5213 %}
5214
5215 // Special Registers
5216 operand rax_RegI()
5217 %{
5218 constraint(ALLOC_IN_RC(int_rax_reg));
5219 match(RegI);
5220 match(rRegI);
5221
5222 format %{ "RAX" %}
5223 interface(REG_INTER);
5224 %}
5225
5226 // Special Registers
5227 operand rbx_RegI()
5228 %{
5229 constraint(ALLOC_IN_RC(int_rbx_reg));
5230 match(RegI);
5231 match(rRegI);
5232
5233 format %{ "RBX" %}
5234 interface(REG_INTER);
5235 %}
5236
5237 operand rcx_RegI()
5238 %{
5239 constraint(ALLOC_IN_RC(int_rcx_reg));
5240 match(RegI);
5241 match(rRegI);
5242
5243 format %{ "RCX" %}
5244 interface(REG_INTER);
5245 %}
5246
5247 operand rdx_RegI()
5248 %{
5249 constraint(ALLOC_IN_RC(int_rdx_reg));
5250 match(RegI);
5251 match(rRegI);
5252
5253 format %{ "RDX" %}
5254 interface(REG_INTER);
5255 %}
5256
5257 operand rdi_RegI()
5258 %{
5259 constraint(ALLOC_IN_RC(int_rdi_reg));
5260 match(RegI);
5261 match(rRegI);
5262
5263 format %{ "RDI" %}
5264 interface(REG_INTER);
5265 %}
5266
5267 operand no_rax_rdx_RegI()
5268 %{
5269 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5270 match(RegI);
5271 match(rbx_RegI);
5272 match(rcx_RegI);
5273 match(rdi_RegI);
5274
5275 format %{ %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand no_rbp_r13_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5282 match(RegI);
5283 match(rRegI);
5284 match(rax_RegI);
5285 match(rbx_RegI);
5286 match(rcx_RegI);
5287 match(rdx_RegI);
5288 match(rdi_RegI);
5289
5290 format %{ %}
5291 interface(REG_INTER);
5292 %}
5293
5294 // Pointer Register
5295 operand any_RegP()
5296 %{
5297 constraint(ALLOC_IN_RC(any_reg));
5298 match(RegP);
5299 match(rax_RegP);
5300 match(rbx_RegP);
5301 match(rdi_RegP);
5302 match(rsi_RegP);
5303 match(rbp_RegP);
5304 match(r15_RegP);
5305 match(rRegP);
5306
5307 format %{ %}
5308 interface(REG_INTER);
5309 %}
5310
5311 operand rRegP()
5312 %{
5313 constraint(ALLOC_IN_RC(ptr_reg));
5314 match(RegP);
5315 match(rax_RegP);
5316 match(rbx_RegP);
5317 match(rdi_RegP);
5318 match(rsi_RegP);
5319 match(rbp_RegP); // See Q&A below about
5320 match(r15_RegP); // r15_RegP and rbp_RegP.
5321
5322 format %{ %}
5323 interface(REG_INTER);
5324 %}
5325
5326 operand rRegN() %{
5327 constraint(ALLOC_IN_RC(int_reg));
5328 match(RegN);
5329
5330 format %{ %}
5331 interface(REG_INTER);
5332 %}
5333
5334 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5335 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5336 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5337 // The output of an instruction is controlled by the allocator, which respects
5338 // register class masks, not match rules. Unless an instruction mentions
5339 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5340 // by the allocator as an input.
5341 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5342 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5343 // result, RBP is not included in the output of the instruction either.
5344
5345 // This operand is not allowed to use RBP even if
5346 // RBP is not used to hold the frame pointer.
5347 operand no_rbp_RegP()
5348 %{
5349 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5350 match(RegP);
5351 match(rbx_RegP);
5352 match(rsi_RegP);
5353 match(rdi_RegP);
5354
5355 format %{ %}
5356 interface(REG_INTER);
5357 %}
5358
5359 // Special Registers
5360 // Return a pointer value
5361 operand rax_RegP()
5362 %{
5363 constraint(ALLOC_IN_RC(ptr_rax_reg));
5364 match(RegP);
5365 match(rRegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 // Special Registers
5372 // Return a compressed pointer value
5373 operand rax_RegN()
5374 %{
5375 constraint(ALLOC_IN_RC(int_rax_reg));
5376 match(RegN);
5377 match(rRegN);
5378
5379 format %{ %}
5380 interface(REG_INTER);
5381 %}
5382
5383 // Used in AtomicAdd
5384 operand rbx_RegP()
5385 %{
5386 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5387 match(RegP);
5388 match(rRegP);
5389
5390 format %{ %}
5391 interface(REG_INTER);
5392 %}
5393
5394 operand rsi_RegP()
5395 %{
5396 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5397 match(RegP);
5398 match(rRegP);
5399
5400 format %{ %}
5401 interface(REG_INTER);
5402 %}
5403
5404 operand rbp_RegP()
5405 %{
5406 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5407 match(RegP);
5408 match(rRegP);
5409
5410 format %{ %}
5411 interface(REG_INTER);
5412 %}
5413
5414 // Used in rep stosq
5415 operand rdi_RegP()
5416 %{
5417 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5418 match(RegP);
5419 match(rRegP);
5420
5421 format %{ %}
5422 interface(REG_INTER);
5423 %}
5424
5425 operand r15_RegP()
5426 %{
5427 constraint(ALLOC_IN_RC(ptr_r15_reg));
5428 match(RegP);
5429 match(rRegP);
5430
5431 format %{ %}
5432 interface(REG_INTER);
5433 %}
5434
5435 operand rRegL()
5436 %{
5437 constraint(ALLOC_IN_RC(long_reg));
5438 match(RegL);
5439 match(rax_RegL);
5440 match(rdx_RegL);
5441
5442 format %{ %}
5443 interface(REG_INTER);
5444 %}
5445
5446 // Special Registers
5447 operand no_rax_rdx_RegL()
5448 %{
5449 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5450 match(RegL);
5451 match(rRegL);
5452
5453 format %{ %}
5454 interface(REG_INTER);
5455 %}
5456
5457 operand rax_RegL()
5458 %{
5459 constraint(ALLOC_IN_RC(long_rax_reg));
5460 match(RegL);
5461 match(rRegL);
5462
5463 format %{ "RAX" %}
5464 interface(REG_INTER);
5465 %}
5466
5467 operand rcx_RegL()
5468 %{
5469 constraint(ALLOC_IN_RC(long_rcx_reg));
5470 match(RegL);
5471 match(rRegL);
5472
5473 format %{ %}
5474 interface(REG_INTER);
5475 %}
5476
5477 operand rdx_RegL()
5478 %{
5479 constraint(ALLOC_IN_RC(long_rdx_reg));
5480 match(RegL);
5481 match(rRegL);
5482
5483 format %{ %}
5484 interface(REG_INTER);
5485 %}
5486
5487 operand r11_RegL()
5488 %{
5489 constraint(ALLOC_IN_RC(long_r11_reg));
5490 match(RegL);
5491 match(rRegL);
5492
5493 format %{ %}
5494 interface(REG_INTER);
5495 %}
5496
5497 operand no_rbp_r13_RegL()
5498 %{
5499 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5500 match(RegL);
5501 match(rRegL);
5502 match(rax_RegL);
5503 match(rcx_RegL);
5504 match(rdx_RegL);
5505
5506 format %{ %}
5507 interface(REG_INTER);
5508 %}
5509
5510 // Flags register, used as output of compare instructions
5511 operand rFlagsReg()
5512 %{
5513 constraint(ALLOC_IN_RC(int_flags));
5514 match(RegFlags);
5515
5516 format %{ "RFLAGS" %}
5517 interface(REG_INTER);
5518 %}
5519
5520 // Flags register, used as output of FLOATING POINT compare instructions
5521 operand rFlagsRegU()
5522 %{
5523 constraint(ALLOC_IN_RC(int_flags));
5524 match(RegFlags);
5525
5526 format %{ "RFLAGS_U" %}
5527 interface(REG_INTER);
5528 %}
5529
5530 operand rFlagsRegUCF() %{
5531 constraint(ALLOC_IN_RC(int_flags));
5532 match(RegFlags);
5533 predicate(false);
5534
5535 format %{ "RFLAGS_U_CF" %}
5536 interface(REG_INTER);
5537 %}
5538
5539 // Float register operands
5540 operand regF() %{
5541 constraint(ALLOC_IN_RC(float_reg));
5542 match(RegF);
5543
5544 format %{ %}
5545 interface(REG_INTER);
5546 %}
5547
5548 // Float register operands
5549 operand legRegF() %{
5550 constraint(ALLOC_IN_RC(float_reg_legacy));
5551 match(RegF);
5552
5553 format %{ %}
5554 interface(REG_INTER);
5555 %}
5556
5557 // Float register operands
5558 operand vlRegF() %{
5559 constraint(ALLOC_IN_RC(float_reg_vl));
5560 match(RegF);
5561
5562 format %{ %}
5563 interface(REG_INTER);
5564 %}
5565
5566 // Double register operands
5567 operand regD() %{
5568 constraint(ALLOC_IN_RC(double_reg));
5569 match(RegD);
5570
5571 format %{ %}
5572 interface(REG_INTER);
5573 %}
5574
5575 // Double register operands
5576 operand legRegD() %{
5577 constraint(ALLOC_IN_RC(double_reg_legacy));
5578 match(RegD);
5579
5580 format %{ %}
5581 interface(REG_INTER);
5582 %}
5583
5584 // Double register operands
5585 operand vlRegD() %{
5586 constraint(ALLOC_IN_RC(double_reg_vl));
5587 match(RegD);
5588
5589 format %{ %}
5590 interface(REG_INTER);
5591 %}
5592
5593 //----------Memory Operands----------------------------------------------------
5594 // Direct Memory Operand
5595 // operand direct(immP addr)
5596 // %{
5597 // match(addr);
5598
5599 // format %{ "[$addr]" %}
5600 // interface(MEMORY_INTER) %{
5601 // base(0xFFFFFFFF);
5602 // index(0x4);
5603 // scale(0x0);
5604 // disp($addr);
5605 // %}
5606 // %}
5607
5608 // Indirect Memory Operand
5609 operand indirect(any_RegP reg)
5610 %{
5611 constraint(ALLOC_IN_RC(ptr_reg));
5612 match(reg);
5613
5614 format %{ "[$reg]" %}
5615 interface(MEMORY_INTER) %{
5616 base($reg);
5617 index(0x4);
5618 scale(0x0);
5619 disp(0x0);
5620 %}
5621 %}
5622
5623 // Indirect Memory Plus Short Offset Operand
5624 operand indOffset8(any_RegP reg, immL8 off)
5625 %{
5626 constraint(ALLOC_IN_RC(ptr_reg));
5627 match(AddP reg off);
5628
5629 format %{ "[$reg + $off (8-bit)]" %}
5630 interface(MEMORY_INTER) %{
5631 base($reg);
5632 index(0x4);
5633 scale(0x0);
5634 disp($off);
5635 %}
5636 %}
5637
5638 // Indirect Memory Plus Long Offset Operand
5639 operand indOffset32(any_RegP reg, immL32 off)
5640 %{
5641 constraint(ALLOC_IN_RC(ptr_reg));
5642 match(AddP reg off);
5643
5644 format %{ "[$reg + $off (32-bit)]" %}
5645 interface(MEMORY_INTER) %{
5646 base($reg);
5647 index(0x4);
5648 scale(0x0);
5649 disp($off);
5650 %}
5651 %}
5652
5653 // Indirect Memory Plus Index Register Plus Offset Operand
5654 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5655 %{
5656 constraint(ALLOC_IN_RC(ptr_reg));
5657 match(AddP (AddP reg lreg) off);
5658
5659 op_cost(10);
5660 format %{"[$reg + $off + $lreg]" %}
5661 interface(MEMORY_INTER) %{
5662 base($reg);
5663 index($lreg);
5664 scale(0x0);
5665 disp($off);
5666 %}
5667 %}
5668
5669 // Indirect Memory Plus Index Register Plus Offset Operand
5670 operand indIndex(any_RegP reg, rRegL lreg)
5671 %{
5672 constraint(ALLOC_IN_RC(ptr_reg));
5673 match(AddP reg lreg);
5674
5675 op_cost(10);
5676 format %{"[$reg + $lreg]" %}
5677 interface(MEMORY_INTER) %{
5678 base($reg);
5679 index($lreg);
5680 scale(0x0);
5681 disp(0x0);
5682 %}
5683 %}
5684
5685 // Indirect Memory Times Scale Plus Index Register
5686 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5687 %{
5688 constraint(ALLOC_IN_RC(ptr_reg));
5689 match(AddP reg (LShiftL lreg scale));
5690
5691 op_cost(10);
5692 format %{"[$reg + $lreg << $scale]" %}
5693 interface(MEMORY_INTER) %{
5694 base($reg);
5695 index($lreg);
5696 scale($scale);
5697 disp(0x0);
5698 %}
5699 %}
5700
5701 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5702 %{
5703 constraint(ALLOC_IN_RC(ptr_reg));
5704 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5705 match(AddP reg (LShiftL (ConvI2L idx) scale));
5706
5707 op_cost(10);
5708 format %{"[$reg + pos $idx << $scale]" %}
5709 interface(MEMORY_INTER) %{
5710 base($reg);
5711 index($idx);
5712 scale($scale);
5713 disp(0x0);
5714 %}
5715 %}
5716
5717 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5718 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5719 %{
5720 constraint(ALLOC_IN_RC(ptr_reg));
5721 match(AddP (AddP reg (LShiftL lreg scale)) off);
5722
5723 op_cost(10);
5724 format %{"[$reg + $off + $lreg << $scale]" %}
5725 interface(MEMORY_INTER) %{
5726 base($reg);
5727 index($lreg);
5728 scale($scale);
5729 disp($off);
5730 %}
5731 %}
5732
5733 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5734 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5735 %{
5736 constraint(ALLOC_IN_RC(ptr_reg));
5737 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5738 match(AddP (AddP reg (ConvI2L idx)) off);
5739
5740 op_cost(10);
5741 format %{"[$reg + $off + $idx]" %}
5742 interface(MEMORY_INTER) %{
5743 base($reg);
5744 index($idx);
5745 scale(0x0);
5746 disp($off);
5747 %}
5748 %}
5749
5750 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5751 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5752 %{
5753 constraint(ALLOC_IN_RC(ptr_reg));
5754 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5755 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5756
5757 op_cost(10);
5758 format %{"[$reg + $off + $idx << $scale]" %}
5759 interface(MEMORY_INTER) %{
5760 base($reg);
5761 index($idx);
5762 scale($scale);
5763 disp($off);
5764 %}
5765 %}
5766
5767 // Indirect Narrow Oop Operand
5768 operand indCompressedOop(rRegN reg) %{
5769 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5770 constraint(ALLOC_IN_RC(ptr_reg));
5771 match(DecodeN reg);
5772
5773 op_cost(10);
5774 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5775 interface(MEMORY_INTER) %{
5776 base(0xc); // R12
5777 index($reg);
5778 scale(0x3);
5779 disp(0x0);
5780 %}
5781 %}
5782
5783 // Indirect Narrow Oop Plus Offset Operand
5784 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5785 // we can't free r12 even with CompressedOops::base() == nullptr.
5786 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5787 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5788 constraint(ALLOC_IN_RC(ptr_reg));
5789 match(AddP (DecodeN reg) off);
5790
5791 op_cost(10);
5792 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5793 interface(MEMORY_INTER) %{
5794 base(0xc); // R12
5795 index($reg);
5796 scale(0x3);
5797 disp($off);
5798 %}
5799 %}
5800
5801 // Indirect Memory Operand
5802 operand indirectNarrow(rRegN reg)
5803 %{
5804 predicate(CompressedOops::shift() == 0);
5805 constraint(ALLOC_IN_RC(ptr_reg));
5806 match(DecodeN reg);
5807
5808 format %{ "[$reg]" %}
5809 interface(MEMORY_INTER) %{
5810 base($reg);
5811 index(0x4);
5812 scale(0x0);
5813 disp(0x0);
5814 %}
5815 %}
5816
5817 // Indirect Memory Plus Short Offset Operand
5818 operand indOffset8Narrow(rRegN reg, immL8 off)
5819 %{
5820 predicate(CompressedOops::shift() == 0);
5821 constraint(ALLOC_IN_RC(ptr_reg));
5822 match(AddP (DecodeN reg) off);
5823
5824 format %{ "[$reg + $off (8-bit)]" %}
5825 interface(MEMORY_INTER) %{
5826 base($reg);
5827 index(0x4);
5828 scale(0x0);
5829 disp($off);
5830 %}
5831 %}
5832
5833 // Indirect Memory Plus Long Offset Operand
5834 operand indOffset32Narrow(rRegN reg, immL32 off)
5835 %{
5836 predicate(CompressedOops::shift() == 0);
5837 constraint(ALLOC_IN_RC(ptr_reg));
5838 match(AddP (DecodeN reg) off);
5839
5840 format %{ "[$reg + $off (32-bit)]" %}
5841 interface(MEMORY_INTER) %{
5842 base($reg);
5843 index(0x4);
5844 scale(0x0);
5845 disp($off);
5846 %}
5847 %}
5848
5849 // Indirect Memory Plus Index Register Plus Offset Operand
5850 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5851 %{
5852 predicate(CompressedOops::shift() == 0);
5853 constraint(ALLOC_IN_RC(ptr_reg));
5854 match(AddP (AddP (DecodeN reg) lreg) off);
5855
5856 op_cost(10);
5857 format %{"[$reg + $off + $lreg]" %}
5858 interface(MEMORY_INTER) %{
5859 base($reg);
5860 index($lreg);
5861 scale(0x0);
5862 disp($off);
5863 %}
5864 %}
5865
5866 // Indirect Memory Plus Index Register Plus Offset Operand
5867 operand indIndexNarrow(rRegN reg, rRegL lreg)
5868 %{
5869 predicate(CompressedOops::shift() == 0);
5870 constraint(ALLOC_IN_RC(ptr_reg));
5871 match(AddP (DecodeN reg) lreg);
5872
5873 op_cost(10);
5874 format %{"[$reg + $lreg]" %}
5875 interface(MEMORY_INTER) %{
5876 base($reg);
5877 index($lreg);
5878 scale(0x0);
5879 disp(0x0);
5880 %}
5881 %}
5882
5883 // Indirect Memory Times Scale Plus Index Register
5884 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5885 %{
5886 predicate(CompressedOops::shift() == 0);
5887 constraint(ALLOC_IN_RC(ptr_reg));
5888 match(AddP (DecodeN reg) (LShiftL lreg scale));
5889
5890 op_cost(10);
5891 format %{"[$reg + $lreg << $scale]" %}
5892 interface(MEMORY_INTER) %{
5893 base($reg);
5894 index($lreg);
5895 scale($scale);
5896 disp(0x0);
5897 %}
5898 %}
5899
5900 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5901 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5902 %{
5903 predicate(CompressedOops::shift() == 0);
5904 constraint(ALLOC_IN_RC(ptr_reg));
5905 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5906
5907 op_cost(10);
5908 format %{"[$reg + $off + $lreg << $scale]" %}
5909 interface(MEMORY_INTER) %{
5910 base($reg);
5911 index($lreg);
5912 scale($scale);
5913 disp($off);
5914 %}
5915 %}
5916
5917 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5918 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5919 %{
5920 constraint(ALLOC_IN_RC(ptr_reg));
5921 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5922 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5923
5924 op_cost(10);
5925 format %{"[$reg + $off + $idx]" %}
5926 interface(MEMORY_INTER) %{
5927 base($reg);
5928 index($idx);
5929 scale(0x0);
5930 disp($off);
5931 %}
5932 %}
5933
5934 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5935 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5936 %{
5937 constraint(ALLOC_IN_RC(ptr_reg));
5938 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5939 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5940
5941 op_cost(10);
5942 format %{"[$reg + $off + $idx << $scale]" %}
5943 interface(MEMORY_INTER) %{
5944 base($reg);
5945 index($idx);
5946 scale($scale);
5947 disp($off);
5948 %}
5949 %}
5950
5951 //----------Special Memory Operands--------------------------------------------
5952 // Stack Slot Operand - This operand is used for loading and storing temporary
5953 // values on the stack where a match requires a value to
5954 // flow through memory.
5955 operand stackSlotP(sRegP reg)
5956 %{
5957 constraint(ALLOC_IN_RC(stack_slots));
5958 // No match rule because this operand is only generated in matching
5959
5960 format %{ "[$reg]" %}
5961 interface(MEMORY_INTER) %{
5962 base(0x4); // RSP
5963 index(0x4); // No Index
5964 scale(0x0); // No Scale
5965 disp($reg); // Stack Offset
5966 %}
5967 %}
5968
5969 operand stackSlotI(sRegI reg)
5970 %{
5971 constraint(ALLOC_IN_RC(stack_slots));
5972 // No match rule because this operand is only generated in matching
5973
5974 format %{ "[$reg]" %}
5975 interface(MEMORY_INTER) %{
5976 base(0x4); // RSP
5977 index(0x4); // No Index
5978 scale(0x0); // No Scale
5979 disp($reg); // Stack Offset
5980 %}
5981 %}
5982
5983 operand stackSlotF(sRegF reg)
5984 %{
5985 constraint(ALLOC_IN_RC(stack_slots));
5986 // No match rule because this operand is only generated in matching
5987
5988 format %{ "[$reg]" %}
5989 interface(MEMORY_INTER) %{
5990 base(0x4); // RSP
5991 index(0x4); // No Index
5992 scale(0x0); // No Scale
5993 disp($reg); // Stack Offset
5994 %}
5995 %}
5996
5997 operand stackSlotD(sRegD reg)
5998 %{
5999 constraint(ALLOC_IN_RC(stack_slots));
6000 // No match rule because this operand is only generated in matching
6001
6002 format %{ "[$reg]" %}
6003 interface(MEMORY_INTER) %{
6004 base(0x4); // RSP
6005 index(0x4); // No Index
6006 scale(0x0); // No Scale
6007 disp($reg); // Stack Offset
6008 %}
6009 %}
6010 operand stackSlotL(sRegL reg)
6011 %{
6012 constraint(ALLOC_IN_RC(stack_slots));
6013 // No match rule because this operand is only generated in matching
6014
6015 format %{ "[$reg]" %}
6016 interface(MEMORY_INTER) %{
6017 base(0x4); // RSP
6018 index(0x4); // No Index
6019 scale(0x0); // No Scale
6020 disp($reg); // Stack Offset
6021 %}
6022 %}
6023
6024 //----------Conditional Branch Operands----------------------------------------
6025 // Comparison Op - This is the operation of the comparison, and is limited to
6026 // the following set of codes:
6027 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6028 //
6029 // Other attributes of the comparison, such as unsignedness, are specified
6030 // by the comparison instruction that sets a condition code flags register.
6031 // That result is represented by a flags operand whose subtype is appropriate
6032 // to the unsignedness (etc.) of the comparison.
6033 //
6034 // Later, the instruction which matches both the Comparison Op (a Bool) and
6035 // the flags (produced by the Cmp) specifies the coding of the comparison op
6036 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6037
6038 // Comparison Code
6039 operand cmpOp()
6040 %{
6041 match(Bool);
6042
6043 format %{ "" %}
6044 interface(COND_INTER) %{
6045 equal(0x4, "e");
6046 not_equal(0x5, "ne");
6047 less(0xC, "l");
6048 greater_equal(0xD, "ge");
6049 less_equal(0xE, "le");
6050 greater(0xF, "g");
6051 overflow(0x0, "o");
6052 no_overflow(0x1, "no");
6053 %}
6054 %}
6055
6056 // Comparison Code, unsigned compare. Used by FP also, with
6057 // C2 (unordered) turned into GT or LT already. The other bits
6058 // C0 and C3 are turned into Carry & Zero flags.
6059 operand cmpOpU()
6060 %{
6061 match(Bool);
6062
6063 format %{ "" %}
6064 interface(COND_INTER) %{
6065 equal(0x4, "e");
6066 not_equal(0x5, "ne");
6067 less(0x2, "b");
6068 greater_equal(0x3, "ae");
6069 less_equal(0x6, "be");
6070 greater(0x7, "a");
6071 overflow(0x0, "o");
6072 no_overflow(0x1, "no");
6073 %}
6074 %}
6075
6076
6077 // Floating comparisons that don't require any fixup for the unordered case,
6078 // If both inputs of the comparison are the same, ZF is always set so we
6079 // don't need to use cmpOpUCF2 for eq/ne
6080 operand cmpOpUCF() %{
6081 match(Bool);
6082 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6083 n->as_Bool()->_test._test == BoolTest::ge ||
6084 n->as_Bool()->_test._test == BoolTest::le ||
6085 n->as_Bool()->_test._test == BoolTest::gt ||
6086 n->in(1)->in(1) == n->in(1)->in(2));
6087 format %{ "" %}
6088 interface(COND_INTER) %{
6089 equal(0xb, "np");
6090 not_equal(0xa, "p");
6091 less(0x2, "b");
6092 greater_equal(0x3, "ae");
6093 less_equal(0x6, "be");
6094 greater(0x7, "a");
6095 overflow(0x0, "o");
6096 no_overflow(0x1, "no");
6097 %}
6098 %}
6099
6100
6101 // Floating comparisons that can be fixed up with extra conditional jumps
6102 operand cmpOpUCF2() %{
6103 match(Bool);
6104 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6105 n->as_Bool()->_test._test == BoolTest::eq) &&
6106 n->in(1)->in(1) != n->in(1)->in(2));
6107 format %{ "" %}
6108 interface(COND_INTER) %{
6109 equal(0x4, "e");
6110 not_equal(0x5, "ne");
6111 less(0x2, "b");
6112 greater_equal(0x3, "ae");
6113 less_equal(0x6, "be");
6114 greater(0x7, "a");
6115 overflow(0x0, "o");
6116 no_overflow(0x1, "no");
6117 %}
6118 %}
6119
6120 // Operands for bound floating pointer register arguments
6121 operand rxmm0() %{
6122 constraint(ALLOC_IN_RC(xmm0_reg));
6123 match(VecX);
6124 format%{%}
6125 interface(REG_INTER);
6126 %}
6127
6128 // Vectors
6129
6130 // Dummy generic vector class. Should be used for all vector operands.
6131 // Replaced with vec[SDXYZ] during post-selection pass.
6132 operand vec() %{
6133 constraint(ALLOC_IN_RC(dynamic));
6134 match(VecX);
6135 match(VecY);
6136 match(VecZ);
6137 match(VecS);
6138 match(VecD);
6139
6140 format %{ %}
6141 interface(REG_INTER);
6142 %}
6143
6144 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6145 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6146 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6147 // runtime code generation via reg_class_dynamic.
6148 operand legVec() %{
6149 constraint(ALLOC_IN_RC(dynamic));
6150 match(VecX);
6151 match(VecY);
6152 match(VecZ);
6153 match(VecS);
6154 match(VecD);
6155
6156 format %{ %}
6157 interface(REG_INTER);
6158 %}
6159
6160 // Replaces vec during post-selection cleanup. See above.
6161 operand vecS() %{
6162 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6163 match(VecS);
6164
6165 format %{ %}
6166 interface(REG_INTER);
6167 %}
6168
6169 // Replaces legVec during post-selection cleanup. See above.
6170 operand legVecS() %{
6171 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6172 match(VecS);
6173
6174 format %{ %}
6175 interface(REG_INTER);
6176 %}
6177
6178 // Replaces vec during post-selection cleanup. See above.
6179 operand vecD() %{
6180 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6181 match(VecD);
6182
6183 format %{ %}
6184 interface(REG_INTER);
6185 %}
6186
6187 // Replaces legVec during post-selection cleanup. See above.
6188 operand legVecD() %{
6189 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6190 match(VecD);
6191
6192 format %{ %}
6193 interface(REG_INTER);
6194 %}
6195
6196 // Replaces vec during post-selection cleanup. See above.
6197 operand vecX() %{
6198 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6199 match(VecX);
6200
6201 format %{ %}
6202 interface(REG_INTER);
6203 %}
6204
6205 // Replaces legVec during post-selection cleanup. See above.
6206 operand legVecX() %{
6207 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6208 match(VecX);
6209
6210 format %{ %}
6211 interface(REG_INTER);
6212 %}
6213
6214 // Replaces vec during post-selection cleanup. See above.
6215 operand vecY() %{
6216 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6217 match(VecY);
6218
6219 format %{ %}
6220 interface(REG_INTER);
6221 %}
6222
6223 // Replaces legVec during post-selection cleanup. See above.
6224 operand legVecY() %{
6225 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6226 match(VecY);
6227
6228 format %{ %}
6229 interface(REG_INTER);
6230 %}
6231
6232 // Replaces vec during post-selection cleanup. See above.
6233 operand vecZ() %{
6234 constraint(ALLOC_IN_RC(vectorz_reg));
6235 match(VecZ);
6236
6237 format %{ %}
6238 interface(REG_INTER);
6239 %}
6240
6241 // Replaces legVec during post-selection cleanup. See above.
6242 operand legVecZ() %{
6243 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6244 match(VecZ);
6245
6246 format %{ %}
6247 interface(REG_INTER);
6248 %}
6249
6250 //----------OPERAND CLASSES----------------------------------------------------
6251 // Operand Classes are groups of operands that are used as to simplify
6252 // instruction definitions by not requiring the AD writer to specify separate
6253 // instructions for every form of operand when the instruction accepts
6254 // multiple operand types with the same basic encoding and format. The classic
6255 // case of this is memory operands.
6256
6257 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6258 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6259 indCompressedOop, indCompressedOopOffset,
6260 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6261 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6262 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6263
6264 //----------PIPELINE-----------------------------------------------------------
6265 // Rules which define the behavior of the target architectures pipeline.
6266 pipeline %{
6267
6268 //----------ATTRIBUTES---------------------------------------------------------
6269 attributes %{
6270 variable_size_instructions; // Fixed size instructions
6271 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6272 instruction_unit_size = 1; // An instruction is 1 bytes long
6273 instruction_fetch_unit_size = 16; // The processor fetches one line
6274 instruction_fetch_units = 1; // of 16 bytes
6275 %}
6276
6277 //----------RESOURCES----------------------------------------------------------
6278 // Resources are the functional units available to the machine
6279
6280 // Generic P2/P3 pipeline
6281 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6282 // 3 instructions decoded per cycle.
6283 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6284 // 3 ALU op, only ALU0 handles mul instructions.
6285 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6286 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6287 BR, FPU,
6288 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6289
6290 //----------PIPELINE DESCRIPTION-----------------------------------------------
6291 // Pipeline Description specifies the stages in the machine's pipeline
6292
6293 // Generic P2/P3 pipeline
6294 pipe_desc(S0, S1, S2, S3, S4, S5);
6295
6296 //----------PIPELINE CLASSES---------------------------------------------------
6297 // Pipeline Classes describe the stages in which input and output are
6298 // referenced by the hardware pipeline.
6299
6300 // Naming convention: ialu or fpu
6301 // Then: _reg
6302 // Then: _reg if there is a 2nd register
6303 // Then: _long if it's a pair of instructions implementing a long
6304 // Then: _fat if it requires the big decoder
6305 // Or: _mem if it requires the big decoder and a memory unit.
6306
6307 // Integer ALU reg operation
6308 pipe_class ialu_reg(rRegI dst)
6309 %{
6310 single_instruction;
6311 dst : S4(write);
6312 dst : S3(read);
6313 DECODE : S0; // any decoder
6314 ALU : S3; // any alu
6315 %}
6316
6317 // Long ALU reg operation
6318 pipe_class ialu_reg_long(rRegL dst)
6319 %{
6320 instruction_count(2);
6321 dst : S4(write);
6322 dst : S3(read);
6323 DECODE : S0(2); // any 2 decoders
6324 ALU : S3(2); // both alus
6325 %}
6326
6327 // Integer ALU reg operation using big decoder
6328 pipe_class ialu_reg_fat(rRegI dst)
6329 %{
6330 single_instruction;
6331 dst : S4(write);
6332 dst : S3(read);
6333 D0 : S0; // big decoder only
6334 ALU : S3; // any alu
6335 %}
6336
6337 // Integer ALU reg-reg operation
6338 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6339 %{
6340 single_instruction;
6341 dst : S4(write);
6342 src : S3(read);
6343 DECODE : S0; // any decoder
6344 ALU : S3; // any alu
6345 %}
6346
6347 // Integer ALU reg-reg operation
6348 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6349 %{
6350 single_instruction;
6351 dst : S4(write);
6352 src : S3(read);
6353 D0 : S0; // big decoder only
6354 ALU : S3; // any alu
6355 %}
6356
6357 // Integer ALU reg-mem operation
6358 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6359 %{
6360 single_instruction;
6361 dst : S5(write);
6362 mem : S3(read);
6363 D0 : S0; // big decoder only
6364 ALU : S4; // any alu
6365 MEM : S3; // any mem
6366 %}
6367
6368 // Integer mem operation (prefetch)
6369 pipe_class ialu_mem(memory mem)
6370 %{
6371 single_instruction;
6372 mem : S3(read);
6373 D0 : S0; // big decoder only
6374 MEM : S3; // any mem
6375 %}
6376
6377 // Integer Store to Memory
6378 pipe_class ialu_mem_reg(memory mem, rRegI src)
6379 %{
6380 single_instruction;
6381 mem : S3(read);
6382 src : S5(read);
6383 D0 : S0; // big decoder only
6384 ALU : S4; // any alu
6385 MEM : S3;
6386 %}
6387
6388 // // Long Store to Memory
6389 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6390 // %{
6391 // instruction_count(2);
6392 // mem : S3(read);
6393 // src : S5(read);
6394 // D0 : S0(2); // big decoder only; twice
6395 // ALU : S4(2); // any 2 alus
6396 // MEM : S3(2); // Both mems
6397 // %}
6398
6399 // Integer Store to Memory
6400 pipe_class ialu_mem_imm(memory mem)
6401 %{
6402 single_instruction;
6403 mem : S3(read);
6404 D0 : S0; // big decoder only
6405 ALU : S4; // any alu
6406 MEM : S3;
6407 %}
6408
6409 // Integer ALU0 reg-reg operation
6410 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6411 %{
6412 single_instruction;
6413 dst : S4(write);
6414 src : S3(read);
6415 D0 : S0; // Big decoder only
6416 ALU0 : S3; // only alu0
6417 %}
6418
6419 // Integer ALU0 reg-mem operation
6420 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6421 %{
6422 single_instruction;
6423 dst : S5(write);
6424 mem : S3(read);
6425 D0 : S0; // big decoder only
6426 ALU0 : S4; // ALU0 only
6427 MEM : S3; // any mem
6428 %}
6429
6430 // Integer ALU reg-reg operation
6431 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6432 %{
6433 single_instruction;
6434 cr : S4(write);
6435 src1 : S3(read);
6436 src2 : S3(read);
6437 DECODE : S0; // any decoder
6438 ALU : S3; // any alu
6439 %}
6440
6441 // Integer ALU reg-imm operation
6442 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6443 %{
6444 single_instruction;
6445 cr : S4(write);
6446 src1 : S3(read);
6447 DECODE : S0; // any decoder
6448 ALU : S3; // any alu
6449 %}
6450
6451 // Integer ALU reg-mem operation
6452 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6453 %{
6454 single_instruction;
6455 cr : S4(write);
6456 src1 : S3(read);
6457 src2 : S3(read);
6458 D0 : S0; // big decoder only
6459 ALU : S4; // any alu
6460 MEM : S3;
6461 %}
6462
6463 // Conditional move reg-reg
6464 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6465 %{
6466 instruction_count(4);
6467 y : S4(read);
6468 q : S3(read);
6469 p : S3(read);
6470 DECODE : S0(4); // any decoder
6471 %}
6472
6473 // Conditional move reg-reg
6474 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6475 %{
6476 single_instruction;
6477 dst : S4(write);
6478 src : S3(read);
6479 cr : S3(read);
6480 DECODE : S0; // any decoder
6481 %}
6482
6483 // Conditional move reg-mem
6484 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6485 %{
6486 single_instruction;
6487 dst : S4(write);
6488 src : S3(read);
6489 cr : S3(read);
6490 DECODE : S0; // any decoder
6491 MEM : S3;
6492 %}
6493
6494 // Conditional move reg-reg long
6495 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6496 %{
6497 single_instruction;
6498 dst : S4(write);
6499 src : S3(read);
6500 cr : S3(read);
6501 DECODE : S0(2); // any 2 decoders
6502 %}
6503
6504 // Float reg-reg operation
6505 pipe_class fpu_reg(regD dst)
6506 %{
6507 instruction_count(2);
6508 dst : S3(read);
6509 DECODE : S0(2); // any 2 decoders
6510 FPU : S3;
6511 %}
6512
6513 // Float reg-reg operation
6514 pipe_class fpu_reg_reg(regD dst, regD src)
6515 %{
6516 instruction_count(2);
6517 dst : S4(write);
6518 src : S3(read);
6519 DECODE : S0(2); // any 2 decoders
6520 FPU : S3;
6521 %}
6522
6523 // Float reg-reg operation
6524 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6525 %{
6526 instruction_count(3);
6527 dst : S4(write);
6528 src1 : S3(read);
6529 src2 : S3(read);
6530 DECODE : S0(3); // any 3 decoders
6531 FPU : S3(2);
6532 %}
6533
6534 // Float reg-reg operation
6535 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6536 %{
6537 instruction_count(4);
6538 dst : S4(write);
6539 src1 : S3(read);
6540 src2 : S3(read);
6541 src3 : S3(read);
6542 DECODE : S0(4); // any 3 decoders
6543 FPU : S3(2);
6544 %}
6545
6546 // Float reg-reg operation
6547 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6548 %{
6549 instruction_count(4);
6550 dst : S4(write);
6551 src1 : S3(read);
6552 src2 : S3(read);
6553 src3 : S3(read);
6554 DECODE : S1(3); // any 3 decoders
6555 D0 : S0; // Big decoder only
6556 FPU : S3(2);
6557 MEM : S3;
6558 %}
6559
6560 // Float reg-mem operation
6561 pipe_class fpu_reg_mem(regD dst, memory mem)
6562 %{
6563 instruction_count(2);
6564 dst : S5(write);
6565 mem : S3(read);
6566 D0 : S0; // big decoder only
6567 DECODE : S1; // any decoder for FPU POP
6568 FPU : S4;
6569 MEM : S3; // any mem
6570 %}
6571
6572 // Float reg-mem operation
6573 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6574 %{
6575 instruction_count(3);
6576 dst : S5(write);
6577 src1 : S3(read);
6578 mem : S3(read);
6579 D0 : S0; // big decoder only
6580 DECODE : S1(2); // any decoder for FPU POP
6581 FPU : S4;
6582 MEM : S3; // any mem
6583 %}
6584
6585 // Float mem-reg operation
6586 pipe_class fpu_mem_reg(memory mem, regD src)
6587 %{
6588 instruction_count(2);
6589 src : S5(read);
6590 mem : S3(read);
6591 DECODE : S0; // any decoder for FPU PUSH
6592 D0 : S1; // big decoder only
6593 FPU : S4;
6594 MEM : S3; // any mem
6595 %}
6596
6597 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6598 %{
6599 instruction_count(3);
6600 src1 : S3(read);
6601 src2 : S3(read);
6602 mem : S3(read);
6603 DECODE : S0(2); // any decoder for FPU PUSH
6604 D0 : S1; // big decoder only
6605 FPU : S4;
6606 MEM : S3; // any mem
6607 %}
6608
6609 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6610 %{
6611 instruction_count(3);
6612 src1 : S3(read);
6613 src2 : S3(read);
6614 mem : S4(read);
6615 DECODE : S0; // any decoder for FPU PUSH
6616 D0 : S0(2); // big decoder only
6617 FPU : S4;
6618 MEM : S3(2); // any mem
6619 %}
6620
6621 pipe_class fpu_mem_mem(memory dst, memory src1)
6622 %{
6623 instruction_count(2);
6624 src1 : S3(read);
6625 dst : S4(read);
6626 D0 : S0(2); // big decoder only
6627 MEM : S3(2); // any mem
6628 %}
6629
6630 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6631 %{
6632 instruction_count(3);
6633 src1 : S3(read);
6634 src2 : S3(read);
6635 dst : S4(read);
6636 D0 : S0(3); // big decoder only
6637 FPU : S4;
6638 MEM : S3(3); // any mem
6639 %}
6640
6641 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6642 %{
6643 instruction_count(3);
6644 src1 : S4(read);
6645 mem : S4(read);
6646 DECODE : S0; // any decoder for FPU PUSH
6647 D0 : S0(2); // big decoder only
6648 FPU : S4;
6649 MEM : S3(2); // any mem
6650 %}
6651
6652 // Float load constant
6653 pipe_class fpu_reg_con(regD dst)
6654 %{
6655 instruction_count(2);
6656 dst : S5(write);
6657 D0 : S0; // big decoder only for the load
6658 DECODE : S1; // any decoder for FPU POP
6659 FPU : S4;
6660 MEM : S3; // any mem
6661 %}
6662
6663 // Float load constant
6664 pipe_class fpu_reg_reg_con(regD dst, regD src)
6665 %{
6666 instruction_count(3);
6667 dst : S5(write);
6668 src : S3(read);
6669 D0 : S0; // big decoder only for the load
6670 DECODE : S1(2); // any decoder for FPU POP
6671 FPU : S4;
6672 MEM : S3; // any mem
6673 %}
6674
6675 // UnConditional branch
6676 pipe_class pipe_jmp(label labl)
6677 %{
6678 single_instruction;
6679 BR : S3;
6680 %}
6681
6682 // Conditional branch
6683 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6684 %{
6685 single_instruction;
6686 cr : S1(read);
6687 BR : S3;
6688 %}
6689
6690 // Allocation idiom
6691 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6692 %{
6693 instruction_count(1); force_serialization;
6694 fixed_latency(6);
6695 heap_ptr : S3(read);
6696 DECODE : S0(3);
6697 D0 : S2;
6698 MEM : S3;
6699 ALU : S3(2);
6700 dst : S5(write);
6701 BR : S5;
6702 %}
6703
6704 // Generic big/slow expanded idiom
6705 pipe_class pipe_slow()
6706 %{
6707 instruction_count(10); multiple_bundles; force_serialization;
6708 fixed_latency(100);
6709 D0 : S0(2);
6710 MEM : S3(2);
6711 %}
6712
6713 // The real do-nothing guy
6714 pipe_class empty()
6715 %{
6716 instruction_count(0);
6717 %}
6718
6719 // Define the class for the Nop node
6720 define
6721 %{
6722 MachNop = empty;
6723 %}
6724
6725 %}
6726
6727 //----------INSTRUCTIONS-------------------------------------------------------
6728 //
6729 // match -- States which machine-independent subtree may be replaced
6730 // by this instruction.
6731 // ins_cost -- The estimated cost of this instruction is used by instruction
6732 // selection to identify a minimum cost tree of machine
6733 // instructions that matches a tree of machine-independent
6734 // instructions.
6735 // format -- A string providing the disassembly for this instruction.
6736 // The value of an instruction's operand may be inserted
6737 // by referring to it with a '$' prefix.
6738 // opcode -- Three instruction opcodes may be provided. These are referred
6739 // to within an encode class as $primary, $secondary, and $tertiary
6740 // rrspectively. The primary opcode is commonly used to
6741 // indicate the type of machine instruction, while secondary
6742 // and tertiary are often used for prefix options or addressing
6743 // modes.
6744 // ins_encode -- A list of encode classes with parameters. The encode class
6745 // name must have been defined in an 'enc_class' specification
6746 // in the encode section of the architecture description.
6747
6748 // ============================================================================
6749
6750 instruct ShouldNotReachHere() %{
6751 match(Halt);
6752 format %{ "stop\t# ShouldNotReachHere" %}
6753 ins_encode %{
6754 if (is_reachable()) {
6755 const char* str = __ code_string(_halt_reason);
6756 __ stop(str);
6757 }
6758 %}
6759 ins_pipe(pipe_slow);
6760 %}
6761
6762 // ============================================================================
6763
6764 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6765 // Load Float
6766 instruct MoveF2VL(vlRegF dst, regF src) %{
6767 match(Set dst src);
6768 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6769 ins_encode %{
6770 ShouldNotReachHere();
6771 %}
6772 ins_pipe( fpu_reg_reg );
6773 %}
6774
6775 // Load Float
6776 instruct MoveF2LEG(legRegF dst, regF src) %{
6777 match(Set dst src);
6778 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6779 ins_encode %{
6780 ShouldNotReachHere();
6781 %}
6782 ins_pipe( fpu_reg_reg );
6783 %}
6784
6785 // Load Float
6786 instruct MoveVL2F(regF dst, vlRegF src) %{
6787 match(Set dst src);
6788 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6789 ins_encode %{
6790 ShouldNotReachHere();
6791 %}
6792 ins_pipe( fpu_reg_reg );
6793 %}
6794
6795 // Load Float
6796 instruct MoveLEG2F(regF dst, legRegF src) %{
6797 match(Set dst src);
6798 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6799 ins_encode %{
6800 ShouldNotReachHere();
6801 %}
6802 ins_pipe( fpu_reg_reg );
6803 %}
6804
6805 // Load Double
6806 instruct MoveD2VL(vlRegD dst, regD src) %{
6807 match(Set dst src);
6808 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6809 ins_encode %{
6810 ShouldNotReachHere();
6811 %}
6812 ins_pipe( fpu_reg_reg );
6813 %}
6814
6815 // Load Double
6816 instruct MoveD2LEG(legRegD dst, regD src) %{
6817 match(Set dst src);
6818 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6819 ins_encode %{
6820 ShouldNotReachHere();
6821 %}
6822 ins_pipe( fpu_reg_reg );
6823 %}
6824
6825 // Load Double
6826 instruct MoveVL2D(regD dst, vlRegD src) %{
6827 match(Set dst src);
6828 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6829 ins_encode %{
6830 ShouldNotReachHere();
6831 %}
6832 ins_pipe( fpu_reg_reg );
6833 %}
6834
6835 // Load Double
6836 instruct MoveLEG2D(regD dst, legRegD src) %{
6837 match(Set dst src);
6838 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6839 ins_encode %{
6840 ShouldNotReachHere();
6841 %}
6842 ins_pipe( fpu_reg_reg );
6843 %}
6844
6845 //----------Load/Store/Move Instructions---------------------------------------
6846 //----------Load Instructions--------------------------------------------------
6847
6848 // Load Byte (8 bit signed)
6849 instruct loadB(rRegI dst, memory mem)
6850 %{
6851 match(Set dst (LoadB mem));
6852
6853 ins_cost(125);
6854 format %{ "movsbl $dst, $mem\t# byte" %}
6855
6856 ins_encode %{
6857 __ movsbl($dst$$Register, $mem$$Address);
6858 %}
6859
6860 ins_pipe(ialu_reg_mem);
6861 %}
6862
6863 // Load Byte (8 bit signed) into Long Register
6864 instruct loadB2L(rRegL dst, memory mem)
6865 %{
6866 match(Set dst (ConvI2L (LoadB mem)));
6867
6868 ins_cost(125);
6869 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6870
6871 ins_encode %{
6872 __ movsbq($dst$$Register, $mem$$Address);
6873 %}
6874
6875 ins_pipe(ialu_reg_mem);
6876 %}
6877
6878 // Load Unsigned Byte (8 bit UNsigned)
6879 instruct loadUB(rRegI dst, memory mem)
6880 %{
6881 match(Set dst (LoadUB mem));
6882
6883 ins_cost(125);
6884 format %{ "movzbl $dst, $mem\t# ubyte" %}
6885
6886 ins_encode %{
6887 __ movzbl($dst$$Register, $mem$$Address);
6888 %}
6889
6890 ins_pipe(ialu_reg_mem);
6891 %}
6892
6893 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6894 instruct loadUB2L(rRegL dst, memory mem)
6895 %{
6896 match(Set dst (ConvI2L (LoadUB mem)));
6897
6898 ins_cost(125);
6899 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6900
6901 ins_encode %{
6902 __ movzbq($dst$$Register, $mem$$Address);
6903 %}
6904
6905 ins_pipe(ialu_reg_mem);
6906 %}
6907
6908 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6909 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6910 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6911 effect(KILL cr);
6912
6913 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6914 "andl $dst, right_n_bits($mask, 8)" %}
6915 ins_encode %{
6916 Register Rdst = $dst$$Register;
6917 __ movzbq(Rdst, $mem$$Address);
6918 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6919 %}
6920 ins_pipe(ialu_reg_mem);
6921 %}
6922
6923 // Load Short (16 bit signed)
6924 instruct loadS(rRegI dst, memory mem)
6925 %{
6926 match(Set dst (LoadS mem));
6927
6928 ins_cost(125);
6929 format %{ "movswl $dst, $mem\t# short" %}
6930
6931 ins_encode %{
6932 __ movswl($dst$$Register, $mem$$Address);
6933 %}
6934
6935 ins_pipe(ialu_reg_mem);
6936 %}
6937
6938 // Load Short (16 bit signed) to Byte (8 bit signed)
6939 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6940 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6941
6942 ins_cost(125);
6943 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6944 ins_encode %{
6945 __ movsbl($dst$$Register, $mem$$Address);
6946 %}
6947 ins_pipe(ialu_reg_mem);
6948 %}
6949
6950 // Load Short (16 bit signed) into Long Register
6951 instruct loadS2L(rRegL dst, memory mem)
6952 %{
6953 match(Set dst (ConvI2L (LoadS mem)));
6954
6955 ins_cost(125);
6956 format %{ "movswq $dst, $mem\t# short -> long" %}
6957
6958 ins_encode %{
6959 __ movswq($dst$$Register, $mem$$Address);
6960 %}
6961
6962 ins_pipe(ialu_reg_mem);
6963 %}
6964
6965 // Load Unsigned Short/Char (16 bit UNsigned)
6966 instruct loadUS(rRegI dst, memory mem)
6967 %{
6968 match(Set dst (LoadUS mem));
6969
6970 ins_cost(125);
6971 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6972
6973 ins_encode %{
6974 __ movzwl($dst$$Register, $mem$$Address);
6975 %}
6976
6977 ins_pipe(ialu_reg_mem);
6978 %}
6979
6980 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6981 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6982 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6983
6984 ins_cost(125);
6985 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6986 ins_encode %{
6987 __ movsbl($dst$$Register, $mem$$Address);
6988 %}
6989 ins_pipe(ialu_reg_mem);
6990 %}
6991
6992 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6993 instruct loadUS2L(rRegL dst, memory mem)
6994 %{
6995 match(Set dst (ConvI2L (LoadUS mem)));
6996
6997 ins_cost(125);
6998 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6999
7000 ins_encode %{
7001 __ movzwq($dst$$Register, $mem$$Address);
7002 %}
7003
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7008 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7009 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7010
7011 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7012 ins_encode %{
7013 __ movzbq($dst$$Register, $mem$$Address);
7014 %}
7015 ins_pipe(ialu_reg_mem);
7016 %}
7017
7018 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7019 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7020 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7021 effect(KILL cr);
7022
7023 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7024 "andl $dst, right_n_bits($mask, 16)" %}
7025 ins_encode %{
7026 Register Rdst = $dst$$Register;
7027 __ movzwq(Rdst, $mem$$Address);
7028 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Integer
7034 instruct loadI(rRegI dst, memory mem)
7035 %{
7036 match(Set dst (LoadI mem));
7037
7038 ins_cost(125);
7039 format %{ "movl $dst, $mem\t# int" %}
7040
7041 ins_encode %{
7042 __ movl($dst$$Register, $mem$$Address);
7043 %}
7044
7045 ins_pipe(ialu_reg_mem);
7046 %}
7047
7048 // Load Integer (32 bit signed) to Byte (8 bit signed)
7049 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7050 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7051
7052 ins_cost(125);
7053 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7054 ins_encode %{
7055 __ movsbl($dst$$Register, $mem$$Address);
7056 %}
7057 ins_pipe(ialu_reg_mem);
7058 %}
7059
7060 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7061 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7062 match(Set dst (AndI (LoadI mem) mask));
7063
7064 ins_cost(125);
7065 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7066 ins_encode %{
7067 __ movzbl($dst$$Register, $mem$$Address);
7068 %}
7069 ins_pipe(ialu_reg_mem);
7070 %}
7071
7072 // Load Integer (32 bit signed) to Short (16 bit signed)
7073 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7074 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7075
7076 ins_cost(125);
7077 format %{ "movswl $dst, $mem\t# int -> short" %}
7078 ins_encode %{
7079 __ movswl($dst$$Register, $mem$$Address);
7080 %}
7081 ins_pipe(ialu_reg_mem);
7082 %}
7083
7084 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7085 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7086 match(Set dst (AndI (LoadI mem) mask));
7087
7088 ins_cost(125);
7089 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7090 ins_encode %{
7091 __ movzwl($dst$$Register, $mem$$Address);
7092 %}
7093 ins_pipe(ialu_reg_mem);
7094 %}
7095
7096 // Load Integer into Long Register
7097 instruct loadI2L(rRegL dst, memory mem)
7098 %{
7099 match(Set dst (ConvI2L (LoadI mem)));
7100
7101 ins_cost(125);
7102 format %{ "movslq $dst, $mem\t# int -> long" %}
7103
7104 ins_encode %{
7105 __ movslq($dst$$Register, $mem$$Address);
7106 %}
7107
7108 ins_pipe(ialu_reg_mem);
7109 %}
7110
7111 // Load Integer with mask 0xFF into Long Register
7112 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7113 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7114
7115 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7116 ins_encode %{
7117 __ movzbq($dst$$Register, $mem$$Address);
7118 %}
7119 ins_pipe(ialu_reg_mem);
7120 %}
7121
7122 // Load Integer with mask 0xFFFF into Long Register
7123 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7124 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7125
7126 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7127 ins_encode %{
7128 __ movzwq($dst$$Register, $mem$$Address);
7129 %}
7130 ins_pipe(ialu_reg_mem);
7131 %}
7132
7133 // Load Integer with a 31-bit mask into Long Register
7134 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7135 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7136 effect(KILL cr);
7137
7138 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7139 "andl $dst, $mask" %}
7140 ins_encode %{
7141 Register Rdst = $dst$$Register;
7142 __ movl(Rdst, $mem$$Address);
7143 __ andl(Rdst, $mask$$constant);
7144 %}
7145 ins_pipe(ialu_reg_mem);
7146 %}
7147
7148 // Load Unsigned Integer into Long Register
7149 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7150 %{
7151 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7152
7153 ins_cost(125);
7154 format %{ "movl $dst, $mem\t# uint -> long" %}
7155
7156 ins_encode %{
7157 __ movl($dst$$Register, $mem$$Address);
7158 %}
7159
7160 ins_pipe(ialu_reg_mem);
7161 %}
7162
7163 // Load Long
7164 instruct loadL(rRegL dst, memory mem)
7165 %{
7166 match(Set dst (LoadL mem));
7167
7168 ins_cost(125);
7169 format %{ "movq $dst, $mem\t# long" %}
7170
7171 ins_encode %{
7172 __ movq($dst$$Register, $mem$$Address);
7173 %}
7174
7175 ins_pipe(ialu_reg_mem); // XXX
7176 %}
7177
7178 // Load Range
7179 instruct loadRange(rRegI dst, memory mem)
7180 %{
7181 match(Set dst (LoadRange mem));
7182
7183 ins_cost(125); // XXX
7184 format %{ "movl $dst, $mem\t# range" %}
7185 ins_encode %{
7186 __ movl($dst$$Register, $mem$$Address);
7187 %}
7188 ins_pipe(ialu_reg_mem);
7189 %}
7190
7191 // Load Pointer
7192 instruct loadP(rRegP dst, memory mem)
7193 %{
7194 match(Set dst (LoadP mem));
7195 predicate(n->as_Load()->barrier_data() == 0);
7196
7197 ins_cost(125); // XXX
7198 format %{ "movq $dst, $mem\t# ptr" %}
7199 ins_encode %{
7200 __ movq($dst$$Register, $mem$$Address);
7201 %}
7202 ins_pipe(ialu_reg_mem); // XXX
7203 %}
7204
7205 // Load Compressed Pointer
7206 instruct loadN(rRegN dst, memory mem)
7207 %{
7208 predicate(n->as_Load()->barrier_data() == 0);
7209 match(Set dst (LoadN mem));
7210
7211 ins_cost(125); // XXX
7212 format %{ "movl $dst, $mem\t# compressed ptr" %}
7213 ins_encode %{
7214 __ movl($dst$$Register, $mem$$Address);
7215 %}
7216 ins_pipe(ialu_reg_mem); // XXX
7217 %}
7218
7219
7220 // Load Klass Pointer
7221 instruct loadKlass(rRegP dst, memory mem)
7222 %{
7223 match(Set dst (LoadKlass mem));
7224
7225 ins_cost(125); // XXX
7226 format %{ "movq $dst, $mem\t# class" %}
7227 ins_encode %{
7228 __ movq($dst$$Register, $mem$$Address);
7229 %}
7230 ins_pipe(ialu_reg_mem); // XXX
7231 %}
7232
7233 // Load narrow Klass Pointer
7234 instruct loadNKlass(rRegN dst, memory mem)
7235 %{
7236 predicate(!UseCompactObjectHeaders);
7237 match(Set dst (LoadNKlass mem));
7238
7239 ins_cost(125); // XXX
7240 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7241 ins_encode %{
7242 __ movl($dst$$Register, $mem$$Address);
7243 %}
7244 ins_pipe(ialu_reg_mem); // XXX
7245 %}
7246
7247 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7248 %{
7249 predicate(UseCompactObjectHeaders);
7250 match(Set dst (LoadNKlass mem));
7251 effect(KILL cr);
7252 ins_cost(125);
7253 format %{
7254 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7255 "shrl $dst, markWord::klass_shift_at_offset"
7256 %}
7257 ins_encode %{
7258 if (UseAPX) {
7259 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7260 }
7261 else {
7262 __ movl($dst$$Register, $mem$$Address);
7263 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7264 }
7265 %}
7266 ins_pipe(ialu_reg_mem);
7267 %}
7268
7269 // Load Float
7270 instruct loadF(regF dst, memory mem)
7271 %{
7272 match(Set dst (LoadF mem));
7273
7274 ins_cost(145); // XXX
7275 format %{ "movss $dst, $mem\t# float" %}
7276 ins_encode %{
7277 __ movflt($dst$$XMMRegister, $mem$$Address);
7278 %}
7279 ins_pipe(pipe_slow); // XXX
7280 %}
7281
7282 // Load Double
7283 instruct loadD_partial(regD dst, memory mem)
7284 %{
7285 predicate(!UseXmmLoadAndClearUpper);
7286 match(Set dst (LoadD mem));
7287
7288 ins_cost(145); // XXX
7289 format %{ "movlpd $dst, $mem\t# double" %}
7290 ins_encode %{
7291 __ movdbl($dst$$XMMRegister, $mem$$Address);
7292 %}
7293 ins_pipe(pipe_slow); // XXX
7294 %}
7295
7296 instruct loadD(regD dst, memory mem)
7297 %{
7298 predicate(UseXmmLoadAndClearUpper);
7299 match(Set dst (LoadD mem));
7300
7301 ins_cost(145); // XXX
7302 format %{ "movsd $dst, $mem\t# double" %}
7303 ins_encode %{
7304 __ movdbl($dst$$XMMRegister, $mem$$Address);
7305 %}
7306 ins_pipe(pipe_slow); // XXX
7307 %}
7308
7309 // max = java.lang.Math.max(float a, float b)
7310 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7311 predicate(VM_Version::supports_avx10_2());
7312 match(Set dst (MaxF a b));
7313 format %{ "maxF $dst, $a, $b" %}
7314 ins_encode %{
7315 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7316 %}
7317 ins_pipe( pipe_slow );
7318 %}
7319
7320 // max = java.lang.Math.max(float a, float b)
7321 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7322 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7323 match(Set dst (MaxF a b));
7324 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7325 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7326 ins_encode %{
7327 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7328 %}
7329 ins_pipe( pipe_slow );
7330 %}
7331
7332 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7333 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7334 match(Set dst (MaxF a b));
7335 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7336
7337 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7338 ins_encode %{
7339 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7340 false /*min*/, true /*single*/);
7341 %}
7342 ins_pipe( pipe_slow );
7343 %}
7344
7345 // max = java.lang.Math.max(double a, double b)
7346 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7347 predicate(VM_Version::supports_avx10_2());
7348 match(Set dst (MaxD a b));
7349 format %{ "maxD $dst, $a, $b" %}
7350 ins_encode %{
7351 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7352 %}
7353 ins_pipe( pipe_slow );
7354 %}
7355
7356 // max = java.lang.Math.max(double a, double b)
7357 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7358 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7359 match(Set dst (MaxD a b));
7360 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7361 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7362 ins_encode %{
7363 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7364 %}
7365 ins_pipe( pipe_slow );
7366 %}
7367
7368 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7369 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7370 match(Set dst (MaxD a b));
7371 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7372
7373 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7374 ins_encode %{
7375 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7376 false /*min*/, false /*single*/);
7377 %}
7378 ins_pipe( pipe_slow );
7379 %}
7380
7381 // max = java.lang.Math.min(float a, float b)
7382 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7383 predicate(VM_Version::supports_avx10_2());
7384 match(Set dst (MinF a b));
7385 format %{ "minF $dst, $a, $b" %}
7386 ins_encode %{
7387 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7388 %}
7389 ins_pipe( pipe_slow );
7390 %}
7391
7392 // min = java.lang.Math.min(float a, float b)
7393 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7394 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7395 match(Set dst (MinF a b));
7396 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7397 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7398 ins_encode %{
7399 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7400 %}
7401 ins_pipe( pipe_slow );
7402 %}
7403
7404 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7405 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7406 match(Set dst (MinF a b));
7407 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7408
7409 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7410 ins_encode %{
7411 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7412 true /*min*/, true /*single*/);
7413 %}
7414 ins_pipe( pipe_slow );
7415 %}
7416
7417 // max = java.lang.Math.min(double a, double b)
7418 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7419 predicate(VM_Version::supports_avx10_2());
7420 match(Set dst (MinD a b));
7421 format %{ "minD $dst, $a, $b" %}
7422 ins_encode %{
7423 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7424 %}
7425 ins_pipe( pipe_slow );
7426 %}
7427
7428 // min = java.lang.Math.min(double a, double b)
7429 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7430 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7431 match(Set dst (MinD a b));
7432 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7433 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7434 ins_encode %{
7435 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7436 %}
7437 ins_pipe( pipe_slow );
7438 %}
7439
7440 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7441 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7442 match(Set dst (MinD a b));
7443 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7444
7445 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7446 ins_encode %{
7447 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7448 true /*min*/, false /*single*/);
7449 %}
7450 ins_pipe( pipe_slow );
7451 %}
7452
7453 // Load Effective Address
7454 instruct leaP8(rRegP dst, indOffset8 mem)
7455 %{
7456 match(Set dst mem);
7457
7458 ins_cost(110); // XXX
7459 format %{ "leaq $dst, $mem\t# ptr 8" %}
7460 ins_encode %{
7461 __ leaq($dst$$Register, $mem$$Address);
7462 %}
7463 ins_pipe(ialu_reg_reg_fat);
7464 %}
7465
7466 instruct leaP32(rRegP dst, indOffset32 mem)
7467 %{
7468 match(Set dst mem);
7469
7470 ins_cost(110);
7471 format %{ "leaq $dst, $mem\t# ptr 32" %}
7472 ins_encode %{
7473 __ leaq($dst$$Register, $mem$$Address);
7474 %}
7475 ins_pipe(ialu_reg_reg_fat);
7476 %}
7477
7478 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7479 %{
7480 match(Set dst mem);
7481
7482 ins_cost(110);
7483 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7484 ins_encode %{
7485 __ leaq($dst$$Register, $mem$$Address);
7486 %}
7487 ins_pipe(ialu_reg_reg_fat);
7488 %}
7489
7490 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7491 %{
7492 match(Set dst mem);
7493
7494 ins_cost(110);
7495 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7496 ins_encode %{
7497 __ leaq($dst$$Register, $mem$$Address);
7498 %}
7499 ins_pipe(ialu_reg_reg_fat);
7500 %}
7501
7502 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7503 %{
7504 match(Set dst mem);
7505
7506 ins_cost(110);
7507 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7508 ins_encode %{
7509 __ leaq($dst$$Register, $mem$$Address);
7510 %}
7511 ins_pipe(ialu_reg_reg_fat);
7512 %}
7513
7514 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7515 %{
7516 match(Set dst mem);
7517
7518 ins_cost(110);
7519 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7520 ins_encode %{
7521 __ leaq($dst$$Register, $mem$$Address);
7522 %}
7523 ins_pipe(ialu_reg_reg_fat);
7524 %}
7525
7526 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7527 %{
7528 match(Set dst mem);
7529
7530 ins_cost(110);
7531 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7532 ins_encode %{
7533 __ leaq($dst$$Register, $mem$$Address);
7534 %}
7535 ins_pipe(ialu_reg_reg_fat);
7536 %}
7537
7538 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7539 %{
7540 match(Set dst mem);
7541
7542 ins_cost(110);
7543 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7544 ins_encode %{
7545 __ leaq($dst$$Register, $mem$$Address);
7546 %}
7547 ins_pipe(ialu_reg_reg_fat);
7548 %}
7549
7550 // Load Effective Address which uses Narrow (32-bits) oop
7551 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7552 %{
7553 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7554 match(Set dst mem);
7555
7556 ins_cost(110);
7557 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7558 ins_encode %{
7559 __ leaq($dst$$Register, $mem$$Address);
7560 %}
7561 ins_pipe(ialu_reg_reg_fat);
7562 %}
7563
7564 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7565 %{
7566 predicate(CompressedOops::shift() == 0);
7567 match(Set dst mem);
7568
7569 ins_cost(110); // XXX
7570 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7578 %{
7579 predicate(CompressedOops::shift() == 0);
7580 match(Set dst mem);
7581
7582 ins_cost(110);
7583 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7584 ins_encode %{
7585 __ leaq($dst$$Register, $mem$$Address);
7586 %}
7587 ins_pipe(ialu_reg_reg_fat);
7588 %}
7589
7590 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7591 %{
7592 predicate(CompressedOops::shift() == 0);
7593 match(Set dst mem);
7594
7595 ins_cost(110);
7596 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7597 ins_encode %{
7598 __ leaq($dst$$Register, $mem$$Address);
7599 %}
7600 ins_pipe(ialu_reg_reg_fat);
7601 %}
7602
7603 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7604 %{
7605 predicate(CompressedOops::shift() == 0);
7606 match(Set dst mem);
7607
7608 ins_cost(110);
7609 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7610 ins_encode %{
7611 __ leaq($dst$$Register, $mem$$Address);
7612 %}
7613 ins_pipe(ialu_reg_reg_fat);
7614 %}
7615
7616 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7617 %{
7618 predicate(CompressedOops::shift() == 0);
7619 match(Set dst mem);
7620
7621 ins_cost(110);
7622 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7623 ins_encode %{
7624 __ leaq($dst$$Register, $mem$$Address);
7625 %}
7626 ins_pipe(ialu_reg_reg_fat);
7627 %}
7628
7629 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7630 %{
7631 predicate(CompressedOops::shift() == 0);
7632 match(Set dst mem);
7633
7634 ins_cost(110);
7635 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7636 ins_encode %{
7637 __ leaq($dst$$Register, $mem$$Address);
7638 %}
7639 ins_pipe(ialu_reg_reg_fat);
7640 %}
7641
7642 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7643 %{
7644 predicate(CompressedOops::shift() == 0);
7645 match(Set dst mem);
7646
7647 ins_cost(110);
7648 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7649 ins_encode %{
7650 __ leaq($dst$$Register, $mem$$Address);
7651 %}
7652 ins_pipe(ialu_reg_reg_fat);
7653 %}
7654
7655 instruct loadConI(rRegI dst, immI src)
7656 %{
7657 match(Set dst src);
7658
7659 format %{ "movl $dst, $src\t# int" %}
7660 ins_encode %{
7661 __ movl($dst$$Register, $src$$constant);
7662 %}
7663 ins_pipe(ialu_reg_fat); // XXX
7664 %}
7665
7666 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7667 %{
7668 match(Set dst src);
7669 effect(KILL cr);
7670
7671 ins_cost(50);
7672 format %{ "xorl $dst, $dst\t# int" %}
7673 ins_encode %{
7674 __ xorl($dst$$Register, $dst$$Register);
7675 %}
7676 ins_pipe(ialu_reg);
7677 %}
7678
7679 instruct loadConL(rRegL dst, immL src)
7680 %{
7681 match(Set dst src);
7682
7683 ins_cost(150);
7684 format %{ "movq $dst, $src\t# long" %}
7685 ins_encode %{
7686 __ mov64($dst$$Register, $src$$constant);
7687 %}
7688 ins_pipe(ialu_reg);
7689 %}
7690
7691 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7692 %{
7693 match(Set dst src);
7694 effect(KILL cr);
7695
7696 ins_cost(50);
7697 format %{ "xorl $dst, $dst\t# long" %}
7698 ins_encode %{
7699 __ xorl($dst$$Register, $dst$$Register);
7700 %}
7701 ins_pipe(ialu_reg); // XXX
7702 %}
7703
7704 instruct loadConUL32(rRegL dst, immUL32 src)
7705 %{
7706 match(Set dst src);
7707
7708 ins_cost(60);
7709 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7710 ins_encode %{
7711 __ movl($dst$$Register, $src$$constant);
7712 %}
7713 ins_pipe(ialu_reg);
7714 %}
7715
7716 instruct loadConL32(rRegL dst, immL32 src)
7717 %{
7718 match(Set dst src);
7719
7720 ins_cost(70);
7721 format %{ "movq $dst, $src\t# long (32-bit)" %}
7722 ins_encode %{
7723 __ movq($dst$$Register, $src$$constant);
7724 %}
7725 ins_pipe(ialu_reg);
7726 %}
7727
7728 instruct loadConP(rRegP dst, immP con) %{
7729 match(Set dst con);
7730
7731 format %{ "movq $dst, $con\t# ptr" %}
7732 ins_encode %{
7733 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7734 %}
7735 ins_pipe(ialu_reg_fat); // XXX
7736 %}
7737
7738 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7739 %{
7740 match(Set dst src);
7741 effect(KILL cr);
7742
7743 ins_cost(50);
7744 format %{ "xorl $dst, $dst\t# ptr" %}
7745 ins_encode %{
7746 __ xorl($dst$$Register, $dst$$Register);
7747 %}
7748 ins_pipe(ialu_reg);
7749 %}
7750
7751 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7752 %{
7753 match(Set dst src);
7754 effect(KILL cr);
7755
7756 ins_cost(60);
7757 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7758 ins_encode %{
7759 __ movl($dst$$Register, $src$$constant);
7760 %}
7761 ins_pipe(ialu_reg);
7762 %}
7763
7764 instruct loadConF(regF dst, immF con) %{
7765 match(Set dst con);
7766 ins_cost(125);
7767 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7768 ins_encode %{
7769 __ movflt($dst$$XMMRegister, $constantaddress($con));
7770 %}
7771 ins_pipe(pipe_slow);
7772 %}
7773
7774 instruct loadConH(regF dst, immH con) %{
7775 match(Set dst con);
7776 ins_cost(125);
7777 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7778 ins_encode %{
7779 __ movflt($dst$$XMMRegister, $constantaddress($con));
7780 %}
7781 ins_pipe(pipe_slow);
7782 %}
7783
7784 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7785 match(Set dst src);
7786 effect(KILL cr);
7787 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7788 ins_encode %{
7789 __ xorq($dst$$Register, $dst$$Register);
7790 %}
7791 ins_pipe(ialu_reg);
7792 %}
7793
7794 instruct loadConN(rRegN dst, immN src) %{
7795 match(Set dst src);
7796
7797 ins_cost(125);
7798 format %{ "movl $dst, $src\t# compressed ptr" %}
7799 ins_encode %{
7800 address con = (address)$src$$constant;
7801 if (con == nullptr) {
7802 ShouldNotReachHere();
7803 } else {
7804 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7805 }
7806 %}
7807 ins_pipe(ialu_reg_fat); // XXX
7808 %}
7809
7810 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7811 match(Set dst src);
7812
7813 ins_cost(125);
7814 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7815 ins_encode %{
7816 address con = (address)$src$$constant;
7817 if (con == nullptr) {
7818 ShouldNotReachHere();
7819 } else {
7820 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7821 }
7822 %}
7823 ins_pipe(ialu_reg_fat); // XXX
7824 %}
7825
7826 instruct loadConF0(regF dst, immF0 src)
7827 %{
7828 match(Set dst src);
7829 ins_cost(100);
7830
7831 format %{ "xorps $dst, $dst\t# float 0.0" %}
7832 ins_encode %{
7833 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7834 %}
7835 ins_pipe(pipe_slow);
7836 %}
7837
7838 // Use the same format since predicate() can not be used here.
7839 instruct loadConD(regD dst, immD con) %{
7840 match(Set dst con);
7841 ins_cost(125);
7842 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7843 ins_encode %{
7844 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7845 %}
7846 ins_pipe(pipe_slow);
7847 %}
7848
7849 instruct loadConD0(regD dst, immD0 src)
7850 %{
7851 match(Set dst src);
7852 ins_cost(100);
7853
7854 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7855 ins_encode %{
7856 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7857 %}
7858 ins_pipe(pipe_slow);
7859 %}
7860
7861 instruct loadSSI(rRegI dst, stackSlotI src)
7862 %{
7863 match(Set dst src);
7864
7865 ins_cost(125);
7866 format %{ "movl $dst, $src\t# int stk" %}
7867 ins_encode %{
7868 __ movl($dst$$Register, $src$$Address);
7869 %}
7870 ins_pipe(ialu_reg_mem);
7871 %}
7872
7873 instruct loadSSL(rRegL dst, stackSlotL src)
7874 %{
7875 match(Set dst src);
7876
7877 ins_cost(125);
7878 format %{ "movq $dst, $src\t# long stk" %}
7879 ins_encode %{
7880 __ movq($dst$$Register, $src$$Address);
7881 %}
7882 ins_pipe(ialu_reg_mem);
7883 %}
7884
7885 instruct loadSSP(rRegP dst, stackSlotP src)
7886 %{
7887 match(Set dst src);
7888
7889 ins_cost(125);
7890 format %{ "movq $dst, $src\t# ptr stk" %}
7891 ins_encode %{
7892 __ movq($dst$$Register, $src$$Address);
7893 %}
7894 ins_pipe(ialu_reg_mem);
7895 %}
7896
7897 instruct loadSSF(regF dst, stackSlotF src)
7898 %{
7899 match(Set dst src);
7900
7901 ins_cost(125);
7902 format %{ "movss $dst, $src\t# float stk" %}
7903 ins_encode %{
7904 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7905 %}
7906 ins_pipe(pipe_slow); // XXX
7907 %}
7908
7909 // Use the same format since predicate() can not be used here.
7910 instruct loadSSD(regD dst, stackSlotD src)
7911 %{
7912 match(Set dst src);
7913
7914 ins_cost(125);
7915 format %{ "movsd $dst, $src\t# double stk" %}
7916 ins_encode %{
7917 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7918 %}
7919 ins_pipe(pipe_slow); // XXX
7920 %}
7921
7922 // Prefetch instructions for allocation.
7923 // Must be safe to execute with invalid address (cannot fault).
7924
7925 instruct prefetchAlloc( memory mem ) %{
7926 predicate(AllocatePrefetchInstr==3);
7927 match(PrefetchAllocation mem);
7928 ins_cost(125);
7929
7930 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7931 ins_encode %{
7932 __ prefetchw($mem$$Address);
7933 %}
7934 ins_pipe(ialu_mem);
7935 %}
7936
7937 instruct prefetchAllocNTA( memory mem ) %{
7938 predicate(AllocatePrefetchInstr==0);
7939 match(PrefetchAllocation mem);
7940 ins_cost(125);
7941
7942 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7943 ins_encode %{
7944 __ prefetchnta($mem$$Address);
7945 %}
7946 ins_pipe(ialu_mem);
7947 %}
7948
7949 instruct prefetchAllocT0( memory mem ) %{
7950 predicate(AllocatePrefetchInstr==1);
7951 match(PrefetchAllocation mem);
7952 ins_cost(125);
7953
7954 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7955 ins_encode %{
7956 __ prefetcht0($mem$$Address);
7957 %}
7958 ins_pipe(ialu_mem);
7959 %}
7960
7961 instruct prefetchAllocT2( memory mem ) %{
7962 predicate(AllocatePrefetchInstr==2);
7963 match(PrefetchAllocation mem);
7964 ins_cost(125);
7965
7966 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7967 ins_encode %{
7968 __ prefetcht2($mem$$Address);
7969 %}
7970 ins_pipe(ialu_mem);
7971 %}
7972
7973 //----------Store Instructions-------------------------------------------------
7974
7975 // Store Byte
7976 instruct storeB(memory mem, rRegI src)
7977 %{
7978 match(Set mem (StoreB mem src));
7979
7980 ins_cost(125); // XXX
7981 format %{ "movb $mem, $src\t# byte" %}
7982 ins_encode %{
7983 __ movb($mem$$Address, $src$$Register);
7984 %}
7985 ins_pipe(ialu_mem_reg);
7986 %}
7987
7988 // Store Char/Short
7989 instruct storeC(memory mem, rRegI src)
7990 %{
7991 match(Set mem (StoreC mem src));
7992
7993 ins_cost(125); // XXX
7994 format %{ "movw $mem, $src\t# char/short" %}
7995 ins_encode %{
7996 __ movw($mem$$Address, $src$$Register);
7997 %}
7998 ins_pipe(ialu_mem_reg);
7999 %}
8000
8001 // Store Integer
8002 instruct storeI(memory mem, rRegI src)
8003 %{
8004 match(Set mem (StoreI mem src));
8005
8006 ins_cost(125); // XXX
8007 format %{ "movl $mem, $src\t# int" %}
8008 ins_encode %{
8009 __ movl($mem$$Address, $src$$Register);
8010 %}
8011 ins_pipe(ialu_mem_reg);
8012 %}
8013
8014 // Store Long
8015 instruct storeL(memory mem, rRegL src)
8016 %{
8017 match(Set mem (StoreL mem src));
8018
8019 ins_cost(125); // XXX
8020 format %{ "movq $mem, $src\t# long" %}
8021 ins_encode %{
8022 __ movq($mem$$Address, $src$$Register);
8023 %}
8024 ins_pipe(ialu_mem_reg); // XXX
8025 %}
8026
8027 // Store Pointer
8028 instruct storeP(memory mem, any_RegP src)
8029 %{
8030 predicate(n->as_Store()->barrier_data() == 0);
8031 match(Set mem (StoreP mem src));
8032
8033 ins_cost(125); // XXX
8034 format %{ "movq $mem, $src\t# ptr" %}
8035 ins_encode %{
8036 __ movq($mem$$Address, $src$$Register);
8037 %}
8038 ins_pipe(ialu_mem_reg);
8039 %}
8040
8041 instruct storeImmP0(memory mem, immP0 zero)
8042 %{
8043 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8044 match(Set mem (StoreP mem zero));
8045
8046 ins_cost(125); // XXX
8047 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8048 ins_encode %{
8049 __ movq($mem$$Address, r12);
8050 %}
8051 ins_pipe(ialu_mem_reg);
8052 %}
8053
8054 // Store Null Pointer, mark word, or other simple pointer constant.
8055 instruct storeImmP(memory mem, immP31 src)
8056 %{
8057 predicate(n->as_Store()->barrier_data() == 0);
8058 match(Set mem (StoreP mem src));
8059
8060 ins_cost(150); // XXX
8061 format %{ "movq $mem, $src\t# ptr" %}
8062 ins_encode %{
8063 __ movq($mem$$Address, $src$$constant);
8064 %}
8065 ins_pipe(ialu_mem_imm);
8066 %}
8067
8068 // Store Compressed Pointer
8069 instruct storeN(memory mem, rRegN src)
8070 %{
8071 predicate(n->as_Store()->barrier_data() == 0);
8072 match(Set mem (StoreN mem src));
8073
8074 ins_cost(125); // XXX
8075 format %{ "movl $mem, $src\t# compressed ptr" %}
8076 ins_encode %{
8077 __ movl($mem$$Address, $src$$Register);
8078 %}
8079 ins_pipe(ialu_mem_reg);
8080 %}
8081
8082 instruct storeNKlass(memory mem, rRegN src)
8083 %{
8084 match(Set mem (StoreNKlass mem src));
8085
8086 ins_cost(125); // XXX
8087 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8088 ins_encode %{
8089 __ movl($mem$$Address, $src$$Register);
8090 %}
8091 ins_pipe(ialu_mem_reg);
8092 %}
8093
8094 instruct storeImmN0(memory mem, immN0 zero)
8095 %{
8096 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8097 match(Set mem (StoreN mem zero));
8098
8099 ins_cost(125); // XXX
8100 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8101 ins_encode %{
8102 __ movl($mem$$Address, r12);
8103 %}
8104 ins_pipe(ialu_mem_reg);
8105 %}
8106
8107 instruct storeImmN(memory mem, immN src)
8108 %{
8109 predicate(n->as_Store()->barrier_data() == 0);
8110 match(Set mem (StoreN mem src));
8111
8112 ins_cost(150); // XXX
8113 format %{ "movl $mem, $src\t# compressed ptr" %}
8114 ins_encode %{
8115 address con = (address)$src$$constant;
8116 if (con == nullptr) {
8117 __ movl($mem$$Address, 0);
8118 } else {
8119 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8120 }
8121 %}
8122 ins_pipe(ialu_mem_imm);
8123 %}
8124
8125 instruct storeImmNKlass(memory mem, immNKlass src)
8126 %{
8127 match(Set mem (StoreNKlass mem src));
8128
8129 ins_cost(150); // XXX
8130 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8131 ins_encode %{
8132 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8133 %}
8134 ins_pipe(ialu_mem_imm);
8135 %}
8136
8137 // Store Integer Immediate
8138 instruct storeImmI0(memory mem, immI_0 zero)
8139 %{
8140 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8141 match(Set mem (StoreI mem zero));
8142
8143 ins_cost(125); // XXX
8144 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8145 ins_encode %{
8146 __ movl($mem$$Address, r12);
8147 %}
8148 ins_pipe(ialu_mem_reg);
8149 %}
8150
8151 instruct storeImmI(memory mem, immI src)
8152 %{
8153 match(Set mem (StoreI mem src));
8154
8155 ins_cost(150);
8156 format %{ "movl $mem, $src\t# int" %}
8157 ins_encode %{
8158 __ movl($mem$$Address, $src$$constant);
8159 %}
8160 ins_pipe(ialu_mem_imm);
8161 %}
8162
8163 // Store Long Immediate
8164 instruct storeImmL0(memory mem, immL0 zero)
8165 %{
8166 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8167 match(Set mem (StoreL mem zero));
8168
8169 ins_cost(125); // XXX
8170 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8171 ins_encode %{
8172 __ movq($mem$$Address, r12);
8173 %}
8174 ins_pipe(ialu_mem_reg);
8175 %}
8176
8177 instruct storeImmL(memory mem, immL32 src)
8178 %{
8179 match(Set mem (StoreL mem src));
8180
8181 ins_cost(150);
8182 format %{ "movq $mem, $src\t# long" %}
8183 ins_encode %{
8184 __ movq($mem$$Address, $src$$constant);
8185 %}
8186 ins_pipe(ialu_mem_imm);
8187 %}
8188
8189 // Store Short/Char Immediate
8190 instruct storeImmC0(memory mem, immI_0 zero)
8191 %{
8192 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8193 match(Set mem (StoreC mem zero));
8194
8195 ins_cost(125); // XXX
8196 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8197 ins_encode %{
8198 __ movw($mem$$Address, r12);
8199 %}
8200 ins_pipe(ialu_mem_reg);
8201 %}
8202
8203 instruct storeImmI16(memory mem, immI16 src)
8204 %{
8205 predicate(UseStoreImmI16);
8206 match(Set mem (StoreC mem src));
8207
8208 ins_cost(150);
8209 format %{ "movw $mem, $src\t# short/char" %}
8210 ins_encode %{
8211 __ movw($mem$$Address, $src$$constant);
8212 %}
8213 ins_pipe(ialu_mem_imm);
8214 %}
8215
8216 // Store Byte Immediate
8217 instruct storeImmB0(memory mem, immI_0 zero)
8218 %{
8219 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8220 match(Set mem (StoreB mem zero));
8221
8222 ins_cost(125); // XXX
8223 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8224 ins_encode %{
8225 __ movb($mem$$Address, r12);
8226 %}
8227 ins_pipe(ialu_mem_reg);
8228 %}
8229
8230 instruct storeImmB(memory mem, immI8 src)
8231 %{
8232 match(Set mem (StoreB mem src));
8233
8234 ins_cost(150); // XXX
8235 format %{ "movb $mem, $src\t# byte" %}
8236 ins_encode %{
8237 __ movb($mem$$Address, $src$$constant);
8238 %}
8239 ins_pipe(ialu_mem_imm);
8240 %}
8241
8242 // Store Float
8243 instruct storeF(memory mem, regF src)
8244 %{
8245 match(Set mem (StoreF mem src));
8246
8247 ins_cost(95); // XXX
8248 format %{ "movss $mem, $src\t# float" %}
8249 ins_encode %{
8250 __ movflt($mem$$Address, $src$$XMMRegister);
8251 %}
8252 ins_pipe(pipe_slow); // XXX
8253 %}
8254
8255 // Store immediate Float value (it is faster than store from XMM register)
8256 instruct storeF0(memory mem, immF0 zero)
8257 %{
8258 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8259 match(Set mem (StoreF mem zero));
8260
8261 ins_cost(25); // XXX
8262 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8263 ins_encode %{
8264 __ movl($mem$$Address, r12);
8265 %}
8266 ins_pipe(ialu_mem_reg);
8267 %}
8268
8269 instruct storeF_imm(memory mem, immF src)
8270 %{
8271 match(Set mem (StoreF mem src));
8272
8273 ins_cost(50);
8274 format %{ "movl $mem, $src\t# float" %}
8275 ins_encode %{
8276 __ movl($mem$$Address, jint_cast($src$$constant));
8277 %}
8278 ins_pipe(ialu_mem_imm);
8279 %}
8280
8281 // Store Double
8282 instruct storeD(memory mem, regD src)
8283 %{
8284 match(Set mem (StoreD mem src));
8285
8286 ins_cost(95); // XXX
8287 format %{ "movsd $mem, $src\t# double" %}
8288 ins_encode %{
8289 __ movdbl($mem$$Address, $src$$XMMRegister);
8290 %}
8291 ins_pipe(pipe_slow); // XXX
8292 %}
8293
8294 // Store immediate double 0.0 (it is faster than store from XMM register)
8295 instruct storeD0_imm(memory mem, immD0 src)
8296 %{
8297 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8298 match(Set mem (StoreD mem src));
8299
8300 ins_cost(50);
8301 format %{ "movq $mem, $src\t# double 0." %}
8302 ins_encode %{
8303 __ movq($mem$$Address, $src$$constant);
8304 %}
8305 ins_pipe(ialu_mem_imm);
8306 %}
8307
8308 instruct storeD0(memory mem, immD0 zero)
8309 %{
8310 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8311 match(Set mem (StoreD mem zero));
8312
8313 ins_cost(25); // XXX
8314 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8315 ins_encode %{
8316 __ movq($mem$$Address, r12);
8317 %}
8318 ins_pipe(ialu_mem_reg);
8319 %}
8320
8321 instruct storeSSI(stackSlotI dst, rRegI src)
8322 %{
8323 match(Set dst src);
8324
8325 ins_cost(100);
8326 format %{ "movl $dst, $src\t# int stk" %}
8327 ins_encode %{
8328 __ movl($dst$$Address, $src$$Register);
8329 %}
8330 ins_pipe( ialu_mem_reg );
8331 %}
8332
8333 instruct storeSSL(stackSlotL dst, rRegL src)
8334 %{
8335 match(Set dst src);
8336
8337 ins_cost(100);
8338 format %{ "movq $dst, $src\t# long stk" %}
8339 ins_encode %{
8340 __ movq($dst$$Address, $src$$Register);
8341 %}
8342 ins_pipe(ialu_mem_reg);
8343 %}
8344
8345 instruct storeSSP(stackSlotP dst, rRegP src)
8346 %{
8347 match(Set dst src);
8348
8349 ins_cost(100);
8350 format %{ "movq $dst, $src\t# ptr stk" %}
8351 ins_encode %{
8352 __ movq($dst$$Address, $src$$Register);
8353 %}
8354 ins_pipe(ialu_mem_reg);
8355 %}
8356
8357 instruct storeSSF(stackSlotF dst, regF src)
8358 %{
8359 match(Set dst src);
8360
8361 ins_cost(95); // XXX
8362 format %{ "movss $dst, $src\t# float stk" %}
8363 ins_encode %{
8364 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8365 %}
8366 ins_pipe(pipe_slow); // XXX
8367 %}
8368
8369 instruct storeSSD(stackSlotD dst, regD src)
8370 %{
8371 match(Set dst src);
8372
8373 ins_cost(95); // XXX
8374 format %{ "movsd $dst, $src\t# double stk" %}
8375 ins_encode %{
8376 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8377 %}
8378 ins_pipe(pipe_slow); // XXX
8379 %}
8380
8381 instruct cacheWB(indirect addr)
8382 %{
8383 predicate(VM_Version::supports_data_cache_line_flush());
8384 match(CacheWB addr);
8385
8386 ins_cost(100);
8387 format %{"cache wb $addr" %}
8388 ins_encode %{
8389 assert($addr->index_position() < 0, "should be");
8390 assert($addr$$disp == 0, "should be");
8391 __ cache_wb(Address($addr$$base$$Register, 0));
8392 %}
8393 ins_pipe(pipe_slow); // XXX
8394 %}
8395
8396 instruct cacheWBPreSync()
8397 %{
8398 predicate(VM_Version::supports_data_cache_line_flush());
8399 match(CacheWBPreSync);
8400
8401 ins_cost(100);
8402 format %{"cache wb presync" %}
8403 ins_encode %{
8404 __ cache_wbsync(true);
8405 %}
8406 ins_pipe(pipe_slow); // XXX
8407 %}
8408
8409 instruct cacheWBPostSync()
8410 %{
8411 predicate(VM_Version::supports_data_cache_line_flush());
8412 match(CacheWBPostSync);
8413
8414 ins_cost(100);
8415 format %{"cache wb postsync" %}
8416 ins_encode %{
8417 __ cache_wbsync(false);
8418 %}
8419 ins_pipe(pipe_slow); // XXX
8420 %}
8421
8422 //----------BSWAP Instructions-------------------------------------------------
8423 instruct bytes_reverse_int(rRegI dst) %{
8424 match(Set dst (ReverseBytesI dst));
8425
8426 format %{ "bswapl $dst" %}
8427 ins_encode %{
8428 __ bswapl($dst$$Register);
8429 %}
8430 ins_pipe( ialu_reg );
8431 %}
8432
8433 instruct bytes_reverse_long(rRegL dst) %{
8434 match(Set dst (ReverseBytesL dst));
8435
8436 format %{ "bswapq $dst" %}
8437 ins_encode %{
8438 __ bswapq($dst$$Register);
8439 %}
8440 ins_pipe( ialu_reg);
8441 %}
8442
8443 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8444 match(Set dst (ReverseBytesUS dst));
8445 effect(KILL cr);
8446
8447 format %{ "bswapl $dst\n\t"
8448 "shrl $dst,16\n\t" %}
8449 ins_encode %{
8450 __ bswapl($dst$$Register);
8451 __ shrl($dst$$Register, 16);
8452 %}
8453 ins_pipe( ialu_reg );
8454 %}
8455
8456 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8457 match(Set dst (ReverseBytesS dst));
8458 effect(KILL cr);
8459
8460 format %{ "bswapl $dst\n\t"
8461 "sar $dst,16\n\t" %}
8462 ins_encode %{
8463 __ bswapl($dst$$Register);
8464 __ sarl($dst$$Register, 16);
8465 %}
8466 ins_pipe( ialu_reg );
8467 %}
8468
8469 //---------- Zeros Count Instructions ------------------------------------------
8470
8471 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8472 predicate(UseCountLeadingZerosInstruction);
8473 match(Set dst (CountLeadingZerosI src));
8474 effect(KILL cr);
8475
8476 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8477 ins_encode %{
8478 __ lzcntl($dst$$Register, $src$$Register);
8479 %}
8480 ins_pipe(ialu_reg);
8481 %}
8482
8483 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8484 predicate(UseCountLeadingZerosInstruction);
8485 match(Set dst (CountLeadingZerosI (LoadI src)));
8486 effect(KILL cr);
8487 ins_cost(175);
8488 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8489 ins_encode %{
8490 __ lzcntl($dst$$Register, $src$$Address);
8491 %}
8492 ins_pipe(ialu_reg_mem);
8493 %}
8494
8495 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8496 predicate(!UseCountLeadingZerosInstruction);
8497 match(Set dst (CountLeadingZerosI src));
8498 effect(KILL cr);
8499
8500 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8501 "jnz skip\n\t"
8502 "movl $dst, -1\n"
8503 "skip:\n\t"
8504 "negl $dst\n\t"
8505 "addl $dst, 31" %}
8506 ins_encode %{
8507 Register Rdst = $dst$$Register;
8508 Register Rsrc = $src$$Register;
8509 Label skip;
8510 __ bsrl(Rdst, Rsrc);
8511 __ jccb(Assembler::notZero, skip);
8512 __ movl(Rdst, -1);
8513 __ bind(skip);
8514 __ negl(Rdst);
8515 __ addl(Rdst, BitsPerInt - 1);
8516 %}
8517 ins_pipe(ialu_reg);
8518 %}
8519
8520 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8521 predicate(UseCountLeadingZerosInstruction);
8522 match(Set dst (CountLeadingZerosL src));
8523 effect(KILL cr);
8524
8525 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8526 ins_encode %{
8527 __ lzcntq($dst$$Register, $src$$Register);
8528 %}
8529 ins_pipe(ialu_reg);
8530 %}
8531
8532 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8533 predicate(UseCountLeadingZerosInstruction);
8534 match(Set dst (CountLeadingZerosL (LoadL src)));
8535 effect(KILL cr);
8536 ins_cost(175);
8537 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8538 ins_encode %{
8539 __ lzcntq($dst$$Register, $src$$Address);
8540 %}
8541 ins_pipe(ialu_reg_mem);
8542 %}
8543
8544 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8545 predicate(!UseCountLeadingZerosInstruction);
8546 match(Set dst (CountLeadingZerosL src));
8547 effect(KILL cr);
8548
8549 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8550 "jnz skip\n\t"
8551 "movl $dst, -1\n"
8552 "skip:\n\t"
8553 "negl $dst\n\t"
8554 "addl $dst, 63" %}
8555 ins_encode %{
8556 Register Rdst = $dst$$Register;
8557 Register Rsrc = $src$$Register;
8558 Label skip;
8559 __ bsrq(Rdst, Rsrc);
8560 __ jccb(Assembler::notZero, skip);
8561 __ movl(Rdst, -1);
8562 __ bind(skip);
8563 __ negl(Rdst);
8564 __ addl(Rdst, BitsPerLong - 1);
8565 %}
8566 ins_pipe(ialu_reg);
8567 %}
8568
8569 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8570 predicate(UseCountTrailingZerosInstruction);
8571 match(Set dst (CountTrailingZerosI src));
8572 effect(KILL cr);
8573
8574 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8575 ins_encode %{
8576 __ tzcntl($dst$$Register, $src$$Register);
8577 %}
8578 ins_pipe(ialu_reg);
8579 %}
8580
8581 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8582 predicate(UseCountTrailingZerosInstruction);
8583 match(Set dst (CountTrailingZerosI (LoadI src)));
8584 effect(KILL cr);
8585 ins_cost(175);
8586 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8587 ins_encode %{
8588 __ tzcntl($dst$$Register, $src$$Address);
8589 %}
8590 ins_pipe(ialu_reg_mem);
8591 %}
8592
8593 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8594 predicate(!UseCountTrailingZerosInstruction);
8595 match(Set dst (CountTrailingZerosI src));
8596 effect(KILL cr);
8597
8598 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8599 "jnz done\n\t"
8600 "movl $dst, 32\n"
8601 "done:" %}
8602 ins_encode %{
8603 Register Rdst = $dst$$Register;
8604 Label done;
8605 __ bsfl(Rdst, $src$$Register);
8606 __ jccb(Assembler::notZero, done);
8607 __ movl(Rdst, BitsPerInt);
8608 __ bind(done);
8609 %}
8610 ins_pipe(ialu_reg);
8611 %}
8612
8613 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8614 predicate(UseCountTrailingZerosInstruction);
8615 match(Set dst (CountTrailingZerosL src));
8616 effect(KILL cr);
8617
8618 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8619 ins_encode %{
8620 __ tzcntq($dst$$Register, $src$$Register);
8621 %}
8622 ins_pipe(ialu_reg);
8623 %}
8624
8625 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8626 predicate(UseCountTrailingZerosInstruction);
8627 match(Set dst (CountTrailingZerosL (LoadL src)));
8628 effect(KILL cr);
8629 ins_cost(175);
8630 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8631 ins_encode %{
8632 __ tzcntq($dst$$Register, $src$$Address);
8633 %}
8634 ins_pipe(ialu_reg_mem);
8635 %}
8636
8637 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8638 predicate(!UseCountTrailingZerosInstruction);
8639 match(Set dst (CountTrailingZerosL src));
8640 effect(KILL cr);
8641
8642 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8643 "jnz done\n\t"
8644 "movl $dst, 64\n"
8645 "done:" %}
8646 ins_encode %{
8647 Register Rdst = $dst$$Register;
8648 Label done;
8649 __ bsfq(Rdst, $src$$Register);
8650 __ jccb(Assembler::notZero, done);
8651 __ movl(Rdst, BitsPerLong);
8652 __ bind(done);
8653 %}
8654 ins_pipe(ialu_reg);
8655 %}
8656
8657 //--------------- Reverse Operation Instructions ----------------
8658 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8659 predicate(!VM_Version::supports_gfni());
8660 match(Set dst (ReverseI src));
8661 effect(TEMP dst, TEMP rtmp, KILL cr);
8662 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8663 ins_encode %{
8664 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8665 %}
8666 ins_pipe( ialu_reg );
8667 %}
8668
8669 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8670 predicate(VM_Version::supports_gfni());
8671 match(Set dst (ReverseI src));
8672 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8673 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8674 ins_encode %{
8675 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8676 %}
8677 ins_pipe( ialu_reg );
8678 %}
8679
8680 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8681 predicate(!VM_Version::supports_gfni());
8682 match(Set dst (ReverseL src));
8683 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8684 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8685 ins_encode %{
8686 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8687 %}
8688 ins_pipe( ialu_reg );
8689 %}
8690
8691 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8692 predicate(VM_Version::supports_gfni());
8693 match(Set dst (ReverseL src));
8694 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8695 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8696 ins_encode %{
8697 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8698 %}
8699 ins_pipe( ialu_reg );
8700 %}
8701
8702 //---------- Population Count Instructions -------------------------------------
8703
8704 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8705 predicate(UsePopCountInstruction);
8706 match(Set dst (PopCountI src));
8707 effect(KILL cr);
8708
8709 format %{ "popcnt $dst, $src" %}
8710 ins_encode %{
8711 __ popcntl($dst$$Register, $src$$Register);
8712 %}
8713 ins_pipe(ialu_reg);
8714 %}
8715
8716 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8717 predicate(UsePopCountInstruction);
8718 match(Set dst (PopCountI (LoadI mem)));
8719 effect(KILL cr);
8720
8721 format %{ "popcnt $dst, $mem" %}
8722 ins_encode %{
8723 __ popcntl($dst$$Register, $mem$$Address);
8724 %}
8725 ins_pipe(ialu_reg);
8726 %}
8727
8728 // Note: Long.bitCount(long) returns an int.
8729 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8730 predicate(UsePopCountInstruction);
8731 match(Set dst (PopCountL src));
8732 effect(KILL cr);
8733
8734 format %{ "popcnt $dst, $src" %}
8735 ins_encode %{
8736 __ popcntq($dst$$Register, $src$$Register);
8737 %}
8738 ins_pipe(ialu_reg);
8739 %}
8740
8741 // Note: Long.bitCount(long) returns an int.
8742 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8743 predicate(UsePopCountInstruction);
8744 match(Set dst (PopCountL (LoadL mem)));
8745 effect(KILL cr);
8746
8747 format %{ "popcnt $dst, $mem" %}
8748 ins_encode %{
8749 __ popcntq($dst$$Register, $mem$$Address);
8750 %}
8751 ins_pipe(ialu_reg);
8752 %}
8753
8754
8755 //----------MemBar Instructions-----------------------------------------------
8756 // Memory barrier flavors
8757
8758 instruct membar_acquire()
8759 %{
8760 match(MemBarAcquire);
8761 match(LoadFence);
8762 ins_cost(0);
8763
8764 size(0);
8765 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8766 ins_encode();
8767 ins_pipe(empty);
8768 %}
8769
8770 instruct membar_acquire_lock()
8771 %{
8772 match(MemBarAcquireLock);
8773 ins_cost(0);
8774
8775 size(0);
8776 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8777 ins_encode();
8778 ins_pipe(empty);
8779 %}
8780
8781 instruct membar_release()
8782 %{
8783 match(MemBarRelease);
8784 match(StoreFence);
8785 ins_cost(0);
8786
8787 size(0);
8788 format %{ "MEMBAR-release ! (empty encoding)" %}
8789 ins_encode();
8790 ins_pipe(empty);
8791 %}
8792
8793 instruct membar_release_lock()
8794 %{
8795 match(MemBarReleaseLock);
8796 ins_cost(0);
8797
8798 size(0);
8799 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8800 ins_encode();
8801 ins_pipe(empty);
8802 %}
8803
8804 instruct membar_volatile(rFlagsReg cr) %{
8805 match(MemBarVolatile);
8806 effect(KILL cr);
8807 ins_cost(400);
8808
8809 format %{
8810 $$template
8811 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8812 %}
8813 ins_encode %{
8814 __ membar(Assembler::StoreLoad);
8815 %}
8816 ins_pipe(pipe_slow);
8817 %}
8818
8819 instruct unnecessary_membar_volatile()
8820 %{
8821 match(MemBarVolatile);
8822 predicate(Matcher::post_store_load_barrier(n));
8823 ins_cost(0);
8824
8825 size(0);
8826 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8827 ins_encode();
8828 ins_pipe(empty);
8829 %}
8830
8831 instruct membar_storestore() %{
8832 match(MemBarStoreStore);
8833 match(StoreStoreFence);
8834 ins_cost(0);
8835
8836 size(0);
8837 format %{ "MEMBAR-storestore (empty encoding)" %}
8838 ins_encode( );
8839 ins_pipe(empty);
8840 %}
8841
8842 //----------Move Instructions--------------------------------------------------
8843
8844 instruct castX2P(rRegP dst, rRegL src)
8845 %{
8846 match(Set dst (CastX2P src));
8847
8848 format %{ "movq $dst, $src\t# long->ptr" %}
8849 ins_encode %{
8850 if ($dst$$reg != $src$$reg) {
8851 __ movptr($dst$$Register, $src$$Register);
8852 }
8853 %}
8854 ins_pipe(ialu_reg_reg); // XXX
8855 %}
8856
8857 instruct castI2N(rRegN dst, rRegI src)
8858 %{
8859 match(Set dst (CastI2N src));
8860
8861 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8862 ins_encode %{
8863 if ($dst$$reg != $src$$reg) {
8864 __ movl($dst$$Register, $src$$Register);
8865 }
8866 %}
8867 ins_pipe(ialu_reg_reg); // XXX
8868 %}
8869
8870 instruct castN2X(rRegL dst, rRegN src)
8871 %{
8872 match(Set dst (CastP2X src));
8873
8874 format %{ "movq $dst, $src\t# ptr -> long" %}
8875 ins_encode %{
8876 if ($dst$$reg != $src$$reg) {
8877 __ movptr($dst$$Register, $src$$Register);
8878 }
8879 %}
8880 ins_pipe(ialu_reg_reg); // XXX
8881 %}
8882
8883 instruct castP2X(rRegL dst, rRegP src)
8884 %{
8885 match(Set dst (CastP2X src));
8886
8887 format %{ "movq $dst, $src\t# ptr -> long" %}
8888 ins_encode %{
8889 if ($dst$$reg != $src$$reg) {
8890 __ movptr($dst$$Register, $src$$Register);
8891 }
8892 %}
8893 ins_pipe(ialu_reg_reg); // XXX
8894 %}
8895
8896 // Convert oop into int for vectors alignment masking
8897 instruct convP2I(rRegI dst, rRegP src)
8898 %{
8899 match(Set dst (ConvL2I (CastP2X src)));
8900
8901 format %{ "movl $dst, $src\t# ptr -> int" %}
8902 ins_encode %{
8903 __ movl($dst$$Register, $src$$Register);
8904 %}
8905 ins_pipe(ialu_reg_reg); // XXX
8906 %}
8907
8908 // Convert compressed oop into int for vectors alignment masking
8909 // in case of 32bit oops (heap < 4Gb).
8910 instruct convN2I(rRegI dst, rRegN src)
8911 %{
8912 predicate(CompressedOops::shift() == 0);
8913 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8914
8915 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8916 ins_encode %{
8917 __ movl($dst$$Register, $src$$Register);
8918 %}
8919 ins_pipe(ialu_reg_reg); // XXX
8920 %}
8921
8922 // Convert oop pointer into compressed form
8923 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8924 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8925 match(Set dst (EncodeP src));
8926 effect(KILL cr);
8927 format %{ "encode_heap_oop $dst,$src" %}
8928 ins_encode %{
8929 Register s = $src$$Register;
8930 Register d = $dst$$Register;
8931 if (s != d) {
8932 __ movq(d, s);
8933 }
8934 __ encode_heap_oop(d);
8935 %}
8936 ins_pipe(ialu_reg_long);
8937 %}
8938
8939 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8940 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8941 match(Set dst (EncodeP src));
8942 effect(KILL cr);
8943 format %{ "encode_heap_oop_not_null $dst,$src" %}
8944 ins_encode %{
8945 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8946 %}
8947 ins_pipe(ialu_reg_long);
8948 %}
8949
8950 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8951 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8952 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8953 match(Set dst (DecodeN src));
8954 effect(KILL cr);
8955 format %{ "decode_heap_oop $dst,$src" %}
8956 ins_encode %{
8957 Register s = $src$$Register;
8958 Register d = $dst$$Register;
8959 if (s != d) {
8960 __ movq(d, s);
8961 }
8962 __ decode_heap_oop(d);
8963 %}
8964 ins_pipe(ialu_reg_long);
8965 %}
8966
8967 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8968 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8969 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8970 match(Set dst (DecodeN src));
8971 effect(KILL cr);
8972 format %{ "decode_heap_oop_not_null $dst,$src" %}
8973 ins_encode %{
8974 Register s = $src$$Register;
8975 Register d = $dst$$Register;
8976 if (s != d) {
8977 __ decode_heap_oop_not_null(d, s);
8978 } else {
8979 __ decode_heap_oop_not_null(d);
8980 }
8981 %}
8982 ins_pipe(ialu_reg_long);
8983 %}
8984
8985 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8986 match(Set dst (EncodePKlass src));
8987 effect(TEMP dst, KILL cr);
8988 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8989 ins_encode %{
8990 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8991 %}
8992 ins_pipe(ialu_reg_long);
8993 %}
8994
8995 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8996 match(Set dst (DecodeNKlass src));
8997 effect(TEMP dst, KILL cr);
8998 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8999 ins_encode %{
9000 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9001 %}
9002 ins_pipe(ialu_reg_long);
9003 %}
9004
9005 //----------Conditional Move---------------------------------------------------
9006 // Jump
9007 // dummy instruction for generating temp registers
9008 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9009 match(Jump (LShiftL switch_val shift));
9010 ins_cost(350);
9011 predicate(false);
9012 effect(TEMP dest);
9013
9014 format %{ "leaq $dest, [$constantaddress]\n\t"
9015 "jmp [$dest + $switch_val << $shift]\n\t" %}
9016 ins_encode %{
9017 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9018 // to do that and the compiler is using that register as one it can allocate.
9019 // So we build it all by hand.
9020 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9021 // ArrayAddress dispatch(table, index);
9022 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9023 __ lea($dest$$Register, $constantaddress);
9024 __ jmp(dispatch);
9025 %}
9026 ins_pipe(pipe_jmp);
9027 %}
9028
9029 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9030 match(Jump (AddL (LShiftL switch_val shift) offset));
9031 ins_cost(350);
9032 effect(TEMP dest);
9033
9034 format %{ "leaq $dest, [$constantaddress]\n\t"
9035 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9036 ins_encode %{
9037 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9038 // to do that and the compiler is using that register as one it can allocate.
9039 // So we build it all by hand.
9040 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9041 // ArrayAddress dispatch(table, index);
9042 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9043 __ lea($dest$$Register, $constantaddress);
9044 __ jmp(dispatch);
9045 %}
9046 ins_pipe(pipe_jmp);
9047 %}
9048
9049 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9050 match(Jump switch_val);
9051 ins_cost(350);
9052 effect(TEMP dest);
9053
9054 format %{ "leaq $dest, [$constantaddress]\n\t"
9055 "jmp [$dest + $switch_val]\n\t" %}
9056 ins_encode %{
9057 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9058 // to do that and the compiler is using that register as one it can allocate.
9059 // So we build it all by hand.
9060 // Address index(noreg, switch_reg, Address::times_1);
9061 // ArrayAddress dispatch(table, index);
9062 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9063 __ lea($dest$$Register, $constantaddress);
9064 __ jmp(dispatch);
9065 %}
9066 ins_pipe(pipe_jmp);
9067 %}
9068
9069 // Conditional move
9070 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9071 %{
9072 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9073 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9074
9075 ins_cost(100); // XXX
9076 format %{ "setbn$cop $dst\t# signed, int" %}
9077 ins_encode %{
9078 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9079 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9080 %}
9081 ins_pipe(ialu_reg);
9082 %}
9083
9084 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9085 %{
9086 predicate(!UseAPX);
9087 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9088
9089 ins_cost(200); // XXX
9090 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9091 ins_encode %{
9092 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9093 %}
9094 ins_pipe(pipe_cmov_reg);
9095 %}
9096
9097 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9098 %{
9099 predicate(UseAPX);
9100 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9101
9102 ins_cost(200);
9103 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9104 ins_encode %{
9105 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9106 %}
9107 ins_pipe(pipe_cmov_reg);
9108 %}
9109
9110 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9111 %{
9112 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9113 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9114
9115 ins_cost(100); // XXX
9116 format %{ "setbn$cop $dst\t# unsigned, int" %}
9117 ins_encode %{
9118 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9119 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9120 %}
9121 ins_pipe(ialu_reg);
9122 %}
9123
9124 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9125 predicate(!UseAPX);
9126 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9127
9128 ins_cost(200); // XXX
9129 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9130 ins_encode %{
9131 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9132 %}
9133 ins_pipe(pipe_cmov_reg);
9134 %}
9135
9136 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9137 predicate(UseAPX);
9138 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9139
9140 ins_cost(200);
9141 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9142 ins_encode %{
9143 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9144 %}
9145 ins_pipe(pipe_cmov_reg);
9146 %}
9147
9148 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9149 %{
9150 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9151 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9152
9153 ins_cost(100); // XXX
9154 format %{ "setbn$cop $dst\t# unsigned, int" %}
9155 ins_encode %{
9156 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9157 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9158 %}
9159 ins_pipe(ialu_reg);
9160 %}
9161
9162 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9163 predicate(!UseAPX);
9164 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9165 ins_cost(200);
9166 expand %{
9167 cmovI_regU(cop, cr, dst, src);
9168 %}
9169 %}
9170
9171 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9172 predicate(UseAPX);
9173 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9174 ins_cost(200);
9175 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9176 ins_encode %{
9177 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9178 %}
9179 ins_pipe(pipe_cmov_reg);
9180 %}
9181
9182 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9183 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9184 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9185
9186 ins_cost(200); // XXX
9187 format %{ "cmovpl $dst, $src\n\t"
9188 "cmovnel $dst, $src" %}
9189 ins_encode %{
9190 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9191 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9192 %}
9193 ins_pipe(pipe_cmov_reg);
9194 %}
9195
9196 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9197 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9198 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9199 effect(TEMP dst);
9200
9201 ins_cost(200);
9202 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9203 "cmovnel $dst, $src2" %}
9204 ins_encode %{
9205 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9206 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9207 %}
9208 ins_pipe(pipe_cmov_reg);
9209 %}
9210
9211 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9212 // inputs of the CMove
9213 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9214 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9215 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9216 effect(TEMP dst);
9217
9218 ins_cost(200); // XXX
9219 format %{ "cmovpl $dst, $src\n\t"
9220 "cmovnel $dst, $src" %}
9221 ins_encode %{
9222 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9223 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9224 %}
9225 ins_pipe(pipe_cmov_reg);
9226 %}
9227
9228 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9229 // and parity flag bit is set if any of the operand is a NaN.
9230 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9231 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9232 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9233 effect(TEMP dst);
9234
9235 ins_cost(200);
9236 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9237 "cmovnel $dst, $src2" %}
9238 ins_encode %{
9239 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9240 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9241 %}
9242 ins_pipe(pipe_cmov_reg);
9243 %}
9244
9245 // Conditional move
9246 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9247 predicate(!UseAPX);
9248 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9249
9250 ins_cost(250); // XXX
9251 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9252 ins_encode %{
9253 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9254 %}
9255 ins_pipe(pipe_cmov_mem);
9256 %}
9257
9258 // Conditional move
9259 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9260 %{
9261 predicate(UseAPX);
9262 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9263
9264 ins_cost(250);
9265 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9266 ins_encode %{
9267 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9268 %}
9269 ins_pipe(pipe_cmov_mem);
9270 %}
9271
9272 // Conditional move
9273 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9274 %{
9275 predicate(!UseAPX);
9276 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9277
9278 ins_cost(250); // XXX
9279 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9280 ins_encode %{
9281 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9282 %}
9283 ins_pipe(pipe_cmov_mem);
9284 %}
9285
9286 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9287 predicate(!UseAPX);
9288 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9289 ins_cost(250);
9290 expand %{
9291 cmovI_memU(cop, cr, dst, src);
9292 %}
9293 %}
9294
9295 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9296 %{
9297 predicate(UseAPX);
9298 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9299
9300 ins_cost(250);
9301 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9302 ins_encode %{
9303 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9304 %}
9305 ins_pipe(pipe_cmov_mem);
9306 %}
9307
9308 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9309 %{
9310 predicate(UseAPX);
9311 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9312 ins_cost(250);
9313 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9314 ins_encode %{
9315 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9316 %}
9317 ins_pipe(pipe_cmov_mem);
9318 %}
9319
9320 // Conditional move
9321 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9322 %{
9323 predicate(!UseAPX);
9324 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9325
9326 ins_cost(200); // XXX
9327 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9328 ins_encode %{
9329 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9330 %}
9331 ins_pipe(pipe_cmov_reg);
9332 %}
9333
9334 // Conditional move ndd
9335 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9336 %{
9337 predicate(UseAPX);
9338 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9339
9340 ins_cost(200);
9341 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9342 ins_encode %{
9343 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9344 %}
9345 ins_pipe(pipe_cmov_reg);
9346 %}
9347
9348 // Conditional move
9349 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9350 %{
9351 predicate(!UseAPX);
9352 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9353
9354 ins_cost(200); // XXX
9355 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9356 ins_encode %{
9357 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9358 %}
9359 ins_pipe(pipe_cmov_reg);
9360 %}
9361
9362 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9363 predicate(!UseAPX);
9364 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9365 ins_cost(200);
9366 expand %{
9367 cmovN_regU(cop, cr, dst, src);
9368 %}
9369 %}
9370
9371 // Conditional move ndd
9372 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9373 %{
9374 predicate(UseAPX);
9375 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9376
9377 ins_cost(200);
9378 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9379 ins_encode %{
9380 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9381 %}
9382 ins_pipe(pipe_cmov_reg);
9383 %}
9384
9385 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9386 predicate(UseAPX);
9387 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9388 ins_cost(200);
9389 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9390 ins_encode %{
9391 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9392 %}
9393 ins_pipe(pipe_cmov_reg);
9394 %}
9395
9396 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9397 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9398 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9399
9400 ins_cost(200); // XXX
9401 format %{ "cmovpl $dst, $src\n\t"
9402 "cmovnel $dst, $src" %}
9403 ins_encode %{
9404 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9405 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9406 %}
9407 ins_pipe(pipe_cmov_reg);
9408 %}
9409
9410 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9411 // inputs of the CMove
9412 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9413 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9414 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9415
9416 ins_cost(200); // XXX
9417 format %{ "cmovpl $dst, $src\n\t"
9418 "cmovnel $dst, $src" %}
9419 ins_encode %{
9420 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9421 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9422 %}
9423 ins_pipe(pipe_cmov_reg);
9424 %}
9425
9426 // Conditional move
9427 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9428 %{
9429 predicate(!UseAPX);
9430 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9431
9432 ins_cost(200); // XXX
9433 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9434 ins_encode %{
9435 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9436 %}
9437 ins_pipe(pipe_cmov_reg); // XXX
9438 %}
9439
9440 // Conditional move ndd
9441 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9442 %{
9443 predicate(UseAPX);
9444 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9445
9446 ins_cost(200);
9447 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9448 ins_encode %{
9449 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9450 %}
9451 ins_pipe(pipe_cmov_reg);
9452 %}
9453
9454 // Conditional move
9455 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9456 %{
9457 predicate(!UseAPX);
9458 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9459
9460 ins_cost(200); // XXX
9461 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9462 ins_encode %{
9463 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9464 %}
9465 ins_pipe(pipe_cmov_reg); // XXX
9466 %}
9467
9468 // Conditional move ndd
9469 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9470 %{
9471 predicate(UseAPX);
9472 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9473
9474 ins_cost(200);
9475 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9476 ins_encode %{
9477 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9478 %}
9479 ins_pipe(pipe_cmov_reg);
9480 %}
9481
9482 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9483 predicate(!UseAPX);
9484 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9485 ins_cost(200);
9486 expand %{
9487 cmovP_regU(cop, cr, dst, src);
9488 %}
9489 %}
9490
9491 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9492 predicate(UseAPX);
9493 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9494 ins_cost(200);
9495 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9496 ins_encode %{
9497 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9498 %}
9499 ins_pipe(pipe_cmov_reg);
9500 %}
9501
9502 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9503 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9504 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9505
9506 ins_cost(200); // XXX
9507 format %{ "cmovpq $dst, $src\n\t"
9508 "cmovneq $dst, $src" %}
9509 ins_encode %{
9510 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9511 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9512 %}
9513 ins_pipe(pipe_cmov_reg);
9514 %}
9515
9516 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9517 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9518 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9519 effect(TEMP dst);
9520
9521 ins_cost(200);
9522 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9523 "cmovneq $dst, $src2" %}
9524 ins_encode %{
9525 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9526 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9527 %}
9528 ins_pipe(pipe_cmov_reg);
9529 %}
9530
9531 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9532 // inputs of the CMove
9533 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9534 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9535 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9536
9537 ins_cost(200); // XXX
9538 format %{ "cmovpq $dst, $src\n\t"
9539 "cmovneq $dst, $src" %}
9540 ins_encode %{
9541 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9542 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9543 %}
9544 ins_pipe(pipe_cmov_reg);
9545 %}
9546
9547 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9548 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9549 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9550 effect(TEMP dst);
9551
9552 ins_cost(200);
9553 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9554 "cmovneq $dst, $src2" %}
9555 ins_encode %{
9556 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9557 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9558 %}
9559 ins_pipe(pipe_cmov_reg);
9560 %}
9561
9562 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9563 %{
9564 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9565 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9566
9567 ins_cost(100); // XXX
9568 format %{ "setbn$cop $dst\t# signed, long" %}
9569 ins_encode %{
9570 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9571 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9572 %}
9573 ins_pipe(ialu_reg);
9574 %}
9575
9576 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9577 %{
9578 predicate(!UseAPX);
9579 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9580
9581 ins_cost(200); // XXX
9582 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9583 ins_encode %{
9584 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9585 %}
9586 ins_pipe(pipe_cmov_reg); // XXX
9587 %}
9588
9589 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9590 %{
9591 predicate(UseAPX);
9592 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9593
9594 ins_cost(200);
9595 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9596 ins_encode %{
9597 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9598 %}
9599 ins_pipe(pipe_cmov_reg);
9600 %}
9601
9602 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9603 %{
9604 predicate(!UseAPX);
9605 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9606
9607 ins_cost(200); // XXX
9608 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9609 ins_encode %{
9610 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9611 %}
9612 ins_pipe(pipe_cmov_mem); // XXX
9613 %}
9614
9615 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9616 %{
9617 predicate(UseAPX);
9618 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9619
9620 ins_cost(200);
9621 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9622 ins_encode %{
9623 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9624 %}
9625 ins_pipe(pipe_cmov_mem);
9626 %}
9627
9628 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9629 %{
9630 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9631 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9632
9633 ins_cost(100); // XXX
9634 format %{ "setbn$cop $dst\t# unsigned, long" %}
9635 ins_encode %{
9636 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9637 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9638 %}
9639 ins_pipe(ialu_reg);
9640 %}
9641
9642 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9643 %{
9644 predicate(!UseAPX);
9645 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9646
9647 ins_cost(200); // XXX
9648 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9649 ins_encode %{
9650 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9651 %}
9652 ins_pipe(pipe_cmov_reg); // XXX
9653 %}
9654
9655 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9656 %{
9657 predicate(UseAPX);
9658 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9659
9660 ins_cost(200);
9661 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9662 ins_encode %{
9663 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9664 %}
9665 ins_pipe(pipe_cmov_reg);
9666 %}
9667
9668 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9669 %{
9670 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9671 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9672
9673 ins_cost(100); // XXX
9674 format %{ "setbn$cop $dst\t# unsigned, long" %}
9675 ins_encode %{
9676 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9677 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9678 %}
9679 ins_pipe(ialu_reg);
9680 %}
9681
9682 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9683 predicate(!UseAPX);
9684 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9685 ins_cost(200);
9686 expand %{
9687 cmovL_regU(cop, cr, dst, src);
9688 %}
9689 %}
9690
9691 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9692 %{
9693 predicate(UseAPX);
9694 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9695 ins_cost(200);
9696 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9697 ins_encode %{
9698 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9699 %}
9700 ins_pipe(pipe_cmov_reg);
9701 %}
9702
9703 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9704 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9705 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9706
9707 ins_cost(200); // XXX
9708 format %{ "cmovpq $dst, $src\n\t"
9709 "cmovneq $dst, $src" %}
9710 ins_encode %{
9711 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9712 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9713 %}
9714 ins_pipe(pipe_cmov_reg);
9715 %}
9716
9717 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9718 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9719 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9720 effect(TEMP dst);
9721
9722 ins_cost(200);
9723 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9724 "cmovneq $dst, $src2" %}
9725 ins_encode %{
9726 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9727 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9728 %}
9729 ins_pipe(pipe_cmov_reg);
9730 %}
9731
9732 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9733 // inputs of the CMove
9734 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9735 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9736 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9737
9738 ins_cost(200); // XXX
9739 format %{ "cmovpq $dst, $src\n\t"
9740 "cmovneq $dst, $src" %}
9741 ins_encode %{
9742 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9743 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9744 %}
9745 ins_pipe(pipe_cmov_reg);
9746 %}
9747
9748 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9749 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9750 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9751 effect(TEMP dst);
9752
9753 ins_cost(200);
9754 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9755 "cmovneq $dst, $src2" %}
9756 ins_encode %{
9757 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9758 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9759 %}
9760 ins_pipe(pipe_cmov_reg);
9761 %}
9762
9763 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9764 %{
9765 predicate(!UseAPX);
9766 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9767
9768 ins_cost(200); // XXX
9769 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9770 ins_encode %{
9771 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9772 %}
9773 ins_pipe(pipe_cmov_mem); // XXX
9774 %}
9775
9776 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9777 predicate(!UseAPX);
9778 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9779 ins_cost(200);
9780 expand %{
9781 cmovL_memU(cop, cr, dst, src);
9782 %}
9783 %}
9784
9785 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9786 %{
9787 predicate(UseAPX);
9788 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9789
9790 ins_cost(200);
9791 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9792 ins_encode %{
9793 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9794 %}
9795 ins_pipe(pipe_cmov_mem);
9796 %}
9797
9798 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9799 %{
9800 predicate(UseAPX);
9801 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9802 ins_cost(200);
9803 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9804 ins_encode %{
9805 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9806 %}
9807 ins_pipe(pipe_cmov_mem);
9808 %}
9809
9810 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9811 %{
9812 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9813
9814 ins_cost(200); // XXX
9815 format %{ "jn$cop skip\t# signed cmove float\n\t"
9816 "movss $dst, $src\n"
9817 "skip:" %}
9818 ins_encode %{
9819 Label Lskip;
9820 // Invert sense of branch from sense of CMOV
9821 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9822 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9823 __ bind(Lskip);
9824 %}
9825 ins_pipe(pipe_slow);
9826 %}
9827
9828 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9829 %{
9830 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9831
9832 ins_cost(200); // XXX
9833 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9834 "movss $dst, $src\n"
9835 "skip:" %}
9836 ins_encode %{
9837 Label Lskip;
9838 // Invert sense of branch from sense of CMOV
9839 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9840 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9841 __ bind(Lskip);
9842 %}
9843 ins_pipe(pipe_slow);
9844 %}
9845
9846 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9847 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9848 ins_cost(200);
9849 expand %{
9850 cmovF_regU(cop, cr, dst, src);
9851 %}
9852 %}
9853
9854 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9855 %{
9856 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9857
9858 ins_cost(200); // XXX
9859 format %{ "jn$cop skip\t# signed cmove double\n\t"
9860 "movsd $dst, $src\n"
9861 "skip:" %}
9862 ins_encode %{
9863 Label Lskip;
9864 // Invert sense of branch from sense of CMOV
9865 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9866 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9867 __ bind(Lskip);
9868 %}
9869 ins_pipe(pipe_slow);
9870 %}
9871
9872 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9873 %{
9874 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9875
9876 ins_cost(200); // XXX
9877 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9878 "movsd $dst, $src\n"
9879 "skip:" %}
9880 ins_encode %{
9881 Label Lskip;
9882 // Invert sense of branch from sense of CMOV
9883 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9884 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9885 __ bind(Lskip);
9886 %}
9887 ins_pipe(pipe_slow);
9888 %}
9889
9890 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9891 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9892 ins_cost(200);
9893 expand %{
9894 cmovD_regU(cop, cr, dst, src);
9895 %}
9896 %}
9897
9898 //----------Arithmetic Instructions--------------------------------------------
9899 //----------Addition Instructions----------------------------------------------
9900
9901 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9902 %{
9903 predicate(!UseAPX);
9904 match(Set dst (AddI dst src));
9905 effect(KILL cr);
9906 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9907 format %{ "addl $dst, $src\t# int" %}
9908 ins_encode %{
9909 __ addl($dst$$Register, $src$$Register);
9910 %}
9911 ins_pipe(ialu_reg_reg);
9912 %}
9913
9914 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9915 %{
9916 predicate(UseAPX);
9917 match(Set dst (AddI src1 src2));
9918 effect(KILL cr);
9919 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9920
9921 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9922 ins_encode %{
9923 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9924 %}
9925 ins_pipe(ialu_reg_reg);
9926 %}
9927
9928 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9929 %{
9930 predicate(!UseAPX);
9931 match(Set dst (AddI dst src));
9932 effect(KILL cr);
9933 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9934
9935 format %{ "addl $dst, $src\t# int" %}
9936 ins_encode %{
9937 __ addl($dst$$Register, $src$$constant);
9938 %}
9939 ins_pipe( ialu_reg );
9940 %}
9941
9942 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9943 %{
9944 predicate(UseAPX);
9945 match(Set dst (AddI src1 src2));
9946 effect(KILL cr);
9947 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9948
9949 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9950 ins_encode %{
9951 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9952 %}
9953 ins_pipe( ialu_reg );
9954 %}
9955
9956 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9957 %{
9958 predicate(UseAPX);
9959 match(Set dst (AddI (LoadI src1) src2));
9960 effect(KILL cr);
9961 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9962
9963 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9964 ins_encode %{
9965 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9966 %}
9967 ins_pipe( ialu_reg );
9968 %}
9969
9970 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9971 %{
9972 predicate(!UseAPX);
9973 match(Set dst (AddI dst (LoadI src)));
9974 effect(KILL cr);
9975 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9976
9977 ins_cost(150); // XXX
9978 format %{ "addl $dst, $src\t# int" %}
9979 ins_encode %{
9980 __ addl($dst$$Register, $src$$Address);
9981 %}
9982 ins_pipe(ialu_reg_mem);
9983 %}
9984
9985 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9986 %{
9987 predicate(UseAPX);
9988 match(Set dst (AddI src1 (LoadI src2)));
9989 effect(KILL cr);
9990 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9991
9992 ins_cost(150);
9993 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9994 ins_encode %{
9995 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9996 %}
9997 ins_pipe(ialu_reg_mem);
9998 %}
9999
10000 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10001 %{
10002 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10003 effect(KILL cr);
10004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005
10006 ins_cost(150); // XXX
10007 format %{ "addl $dst, $src\t# int" %}
10008 ins_encode %{
10009 __ addl($dst$$Address, $src$$Register);
10010 %}
10011 ins_pipe(ialu_mem_reg);
10012 %}
10013
10014 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10015 %{
10016 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10017 effect(KILL cr);
10018 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019
10020
10021 ins_cost(125); // XXX
10022 format %{ "addl $dst, $src\t# int" %}
10023 ins_encode %{
10024 __ addl($dst$$Address, $src$$constant);
10025 %}
10026 ins_pipe(ialu_mem_imm);
10027 %}
10028
10029 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10030 %{
10031 predicate(!UseAPX && UseIncDec);
10032 match(Set dst (AddI dst src));
10033 effect(KILL cr);
10034
10035 format %{ "incl $dst\t# int" %}
10036 ins_encode %{
10037 __ incrementl($dst$$Register);
10038 %}
10039 ins_pipe(ialu_reg);
10040 %}
10041
10042 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10043 %{
10044 predicate(UseAPX && UseIncDec);
10045 match(Set dst (AddI src val));
10046 effect(KILL cr);
10047
10048 format %{ "eincl $dst, $src\t# int ndd" %}
10049 ins_encode %{
10050 __ eincl($dst$$Register, $src$$Register, false);
10051 %}
10052 ins_pipe(ialu_reg);
10053 %}
10054
10055 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10056 %{
10057 predicate(UseAPX && UseIncDec);
10058 match(Set dst (AddI (LoadI src) val));
10059 effect(KILL cr);
10060
10061 format %{ "eincl $dst, $src\t# int ndd" %}
10062 ins_encode %{
10063 __ eincl($dst$$Register, $src$$Address, false);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10069 %{
10070 predicate(UseIncDec);
10071 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10072 effect(KILL cr);
10073
10074 ins_cost(125); // XXX
10075 format %{ "incl $dst\t# int" %}
10076 ins_encode %{
10077 __ incrementl($dst$$Address);
10078 %}
10079 ins_pipe(ialu_mem_imm);
10080 %}
10081
10082 // XXX why does that use AddI
10083 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10084 %{
10085 predicate(!UseAPX && UseIncDec);
10086 match(Set dst (AddI dst src));
10087 effect(KILL cr);
10088
10089 format %{ "decl $dst\t# int" %}
10090 ins_encode %{
10091 __ decrementl($dst$$Register);
10092 %}
10093 ins_pipe(ialu_reg);
10094 %}
10095
10096 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10097 %{
10098 predicate(UseAPX && UseIncDec);
10099 match(Set dst (AddI src val));
10100 effect(KILL cr);
10101
10102 format %{ "edecl $dst, $src\t# int ndd" %}
10103 ins_encode %{
10104 __ edecl($dst$$Register, $src$$Register, false);
10105 %}
10106 ins_pipe(ialu_reg);
10107 %}
10108
10109 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10110 %{
10111 predicate(UseAPX && UseIncDec);
10112 match(Set dst (AddI (LoadI src) val));
10113 effect(KILL cr);
10114
10115 format %{ "edecl $dst, $src\t# int ndd" %}
10116 ins_encode %{
10117 __ edecl($dst$$Register, $src$$Address, false);
10118 %}
10119 ins_pipe(ialu_reg);
10120 %}
10121
10122 // XXX why does that use AddI
10123 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10124 %{
10125 predicate(UseIncDec);
10126 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10127 effect(KILL cr);
10128
10129 ins_cost(125); // XXX
10130 format %{ "decl $dst\t# int" %}
10131 ins_encode %{
10132 __ decrementl($dst$$Address);
10133 %}
10134 ins_pipe(ialu_mem_imm);
10135 %}
10136
10137 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10138 %{
10139 predicate(VM_Version::supports_fast_2op_lea());
10140 match(Set dst (AddI (LShiftI index scale) disp));
10141
10142 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10143 ins_encode %{
10144 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10145 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10146 %}
10147 ins_pipe(ialu_reg_reg);
10148 %}
10149
10150 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10151 %{
10152 predicate(VM_Version::supports_fast_3op_lea());
10153 match(Set dst (AddI (AddI base index) disp));
10154
10155 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10156 ins_encode %{
10157 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10158 %}
10159 ins_pipe(ialu_reg_reg);
10160 %}
10161
10162 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10163 %{
10164 predicate(VM_Version::supports_fast_2op_lea());
10165 match(Set dst (AddI base (LShiftI index scale)));
10166
10167 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10168 ins_encode %{
10169 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10170 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10171 %}
10172 ins_pipe(ialu_reg_reg);
10173 %}
10174
10175 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10176 %{
10177 predicate(VM_Version::supports_fast_3op_lea());
10178 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10179
10180 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10181 ins_encode %{
10182 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10183 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10184 %}
10185 ins_pipe(ialu_reg_reg);
10186 %}
10187
10188 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10189 %{
10190 predicate(!UseAPX);
10191 match(Set dst (AddL dst src));
10192 effect(KILL cr);
10193 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10194
10195 format %{ "addq $dst, $src\t# long" %}
10196 ins_encode %{
10197 __ addq($dst$$Register, $src$$Register);
10198 %}
10199 ins_pipe(ialu_reg_reg);
10200 %}
10201
10202 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10203 %{
10204 predicate(UseAPX);
10205 match(Set dst (AddL src1 src2));
10206 effect(KILL cr);
10207 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10208
10209 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10210 ins_encode %{
10211 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10212 %}
10213 ins_pipe(ialu_reg_reg);
10214 %}
10215
10216 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10217 %{
10218 predicate(!UseAPX);
10219 match(Set dst (AddL dst src));
10220 effect(KILL cr);
10221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222
10223 format %{ "addq $dst, $src\t# long" %}
10224 ins_encode %{
10225 __ addq($dst$$Register, $src$$constant);
10226 %}
10227 ins_pipe( ialu_reg );
10228 %}
10229
10230 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10231 %{
10232 predicate(UseAPX);
10233 match(Set dst (AddL src1 src2));
10234 effect(KILL cr);
10235 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10236
10237 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10238 ins_encode %{
10239 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10240 %}
10241 ins_pipe( ialu_reg );
10242 %}
10243
10244 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10245 %{
10246 predicate(UseAPX);
10247 match(Set dst (AddL (LoadL src1) src2));
10248 effect(KILL cr);
10249 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250
10251 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10252 ins_encode %{
10253 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10254 %}
10255 ins_pipe( ialu_reg );
10256 %}
10257
10258 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10259 %{
10260 predicate(!UseAPX);
10261 match(Set dst (AddL dst (LoadL src)));
10262 effect(KILL cr);
10263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10264
10265 ins_cost(150); // XXX
10266 format %{ "addq $dst, $src\t# long" %}
10267 ins_encode %{
10268 __ addq($dst$$Register, $src$$Address);
10269 %}
10270 ins_pipe(ialu_reg_mem);
10271 %}
10272
10273 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10274 %{
10275 predicate(UseAPX);
10276 match(Set dst (AddL src1 (LoadL src2)));
10277 effect(KILL cr);
10278 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10279
10280 ins_cost(150);
10281 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10282 ins_encode %{
10283 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10284 %}
10285 ins_pipe(ialu_reg_mem);
10286 %}
10287
10288 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10289 %{
10290 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10291 effect(KILL cr);
10292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10293
10294 ins_cost(150); // XXX
10295 format %{ "addq $dst, $src\t# long" %}
10296 ins_encode %{
10297 __ addq($dst$$Address, $src$$Register);
10298 %}
10299 ins_pipe(ialu_mem_reg);
10300 %}
10301
10302 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10303 %{
10304 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10305 effect(KILL cr);
10306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10307
10308 ins_cost(125); // XXX
10309 format %{ "addq $dst, $src\t# long" %}
10310 ins_encode %{
10311 __ addq($dst$$Address, $src$$constant);
10312 %}
10313 ins_pipe(ialu_mem_imm);
10314 %}
10315
10316 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10317 %{
10318 predicate(!UseAPX && UseIncDec);
10319 match(Set dst (AddL dst src));
10320 effect(KILL cr);
10321
10322 format %{ "incq $dst\t# long" %}
10323 ins_encode %{
10324 __ incrementq($dst$$Register);
10325 %}
10326 ins_pipe(ialu_reg);
10327 %}
10328
10329 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10330 %{
10331 predicate(UseAPX && UseIncDec);
10332 match(Set dst (AddL src val));
10333 effect(KILL cr);
10334
10335 format %{ "eincq $dst, $src\t# long ndd" %}
10336 ins_encode %{
10337 __ eincq($dst$$Register, $src$$Register, false);
10338 %}
10339 ins_pipe(ialu_reg);
10340 %}
10341
10342 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10343 %{
10344 predicate(UseAPX && UseIncDec);
10345 match(Set dst (AddL (LoadL src) val));
10346 effect(KILL cr);
10347
10348 format %{ "eincq $dst, $src\t# long ndd" %}
10349 ins_encode %{
10350 __ eincq($dst$$Register, $src$$Address, false);
10351 %}
10352 ins_pipe(ialu_reg);
10353 %}
10354
10355 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10356 %{
10357 predicate(UseIncDec);
10358 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10359 effect(KILL cr);
10360
10361 ins_cost(125); // XXX
10362 format %{ "incq $dst\t# long" %}
10363 ins_encode %{
10364 __ incrementq($dst$$Address);
10365 %}
10366 ins_pipe(ialu_mem_imm);
10367 %}
10368
10369 // XXX why does that use AddL
10370 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10371 %{
10372 predicate(!UseAPX && UseIncDec);
10373 match(Set dst (AddL dst src));
10374 effect(KILL cr);
10375
10376 format %{ "decq $dst\t# long" %}
10377 ins_encode %{
10378 __ decrementq($dst$$Register);
10379 %}
10380 ins_pipe(ialu_reg);
10381 %}
10382
10383 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10384 %{
10385 predicate(UseAPX && UseIncDec);
10386 match(Set dst (AddL src val));
10387 effect(KILL cr);
10388
10389 format %{ "edecq $dst, $src\t# long ndd" %}
10390 ins_encode %{
10391 __ edecq($dst$$Register, $src$$Register, false);
10392 %}
10393 ins_pipe(ialu_reg);
10394 %}
10395
10396 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10397 %{
10398 predicate(UseAPX && UseIncDec);
10399 match(Set dst (AddL (LoadL src) val));
10400 effect(KILL cr);
10401
10402 format %{ "edecq $dst, $src\t# long ndd" %}
10403 ins_encode %{
10404 __ edecq($dst$$Register, $src$$Address, false);
10405 %}
10406 ins_pipe(ialu_reg);
10407 %}
10408
10409 // XXX why does that use AddL
10410 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10411 %{
10412 predicate(UseIncDec);
10413 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10414 effect(KILL cr);
10415
10416 ins_cost(125); // XXX
10417 format %{ "decq $dst\t# long" %}
10418 ins_encode %{
10419 __ decrementq($dst$$Address);
10420 %}
10421 ins_pipe(ialu_mem_imm);
10422 %}
10423
10424 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10425 %{
10426 predicate(VM_Version::supports_fast_2op_lea());
10427 match(Set dst (AddL (LShiftL index scale) disp));
10428
10429 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10430 ins_encode %{
10431 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10432 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10433 %}
10434 ins_pipe(ialu_reg_reg);
10435 %}
10436
10437 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10438 %{
10439 predicate(VM_Version::supports_fast_3op_lea());
10440 match(Set dst (AddL (AddL base index) disp));
10441
10442 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10443 ins_encode %{
10444 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10445 %}
10446 ins_pipe(ialu_reg_reg);
10447 %}
10448
10449 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10450 %{
10451 predicate(VM_Version::supports_fast_2op_lea());
10452 match(Set dst (AddL base (LShiftL index scale)));
10453
10454 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10455 ins_encode %{
10456 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10457 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10458 %}
10459 ins_pipe(ialu_reg_reg);
10460 %}
10461
10462 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10463 %{
10464 predicate(VM_Version::supports_fast_3op_lea());
10465 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10466
10467 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10468 ins_encode %{
10469 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10470 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10471 %}
10472 ins_pipe(ialu_reg_reg);
10473 %}
10474
10475 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10476 %{
10477 match(Set dst (AddP dst src));
10478 effect(KILL cr);
10479 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10480
10481 format %{ "addq $dst, $src\t# ptr" %}
10482 ins_encode %{
10483 __ addq($dst$$Register, $src$$Register);
10484 %}
10485 ins_pipe(ialu_reg_reg);
10486 %}
10487
10488 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10489 %{
10490 match(Set dst (AddP dst src));
10491 effect(KILL cr);
10492 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10493
10494 format %{ "addq $dst, $src\t# ptr" %}
10495 ins_encode %{
10496 __ addq($dst$$Register, $src$$constant);
10497 %}
10498 ins_pipe( ialu_reg );
10499 %}
10500
10501 // XXX addP mem ops ????
10502
10503 instruct checkCastPP(rRegP dst)
10504 %{
10505 match(Set dst (CheckCastPP dst));
10506
10507 size(0);
10508 format %{ "# checkcastPP of $dst" %}
10509 ins_encode(/* empty encoding */);
10510 ins_pipe(empty);
10511 %}
10512
10513 instruct castPP(rRegP dst)
10514 %{
10515 match(Set dst (CastPP dst));
10516
10517 size(0);
10518 format %{ "# castPP of $dst" %}
10519 ins_encode(/* empty encoding */);
10520 ins_pipe(empty);
10521 %}
10522
10523 instruct castII(rRegI dst)
10524 %{
10525 predicate(VerifyConstraintCasts == 0);
10526 match(Set dst (CastII dst));
10527
10528 size(0);
10529 format %{ "# castII of $dst" %}
10530 ins_encode(/* empty encoding */);
10531 ins_cost(0);
10532 ins_pipe(empty);
10533 %}
10534
10535 instruct castII_checked(rRegI dst, rFlagsReg cr)
10536 %{
10537 predicate(VerifyConstraintCasts > 0);
10538 match(Set dst (CastII dst));
10539
10540 effect(KILL cr);
10541 format %{ "# cast_checked_II $dst" %}
10542 ins_encode %{
10543 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10544 %}
10545 ins_pipe(pipe_slow);
10546 %}
10547
10548 instruct castLL(rRegL dst)
10549 %{
10550 predicate(VerifyConstraintCasts == 0);
10551 match(Set dst (CastLL dst));
10552
10553 size(0);
10554 format %{ "# castLL of $dst" %}
10555 ins_encode(/* empty encoding */);
10556 ins_cost(0);
10557 ins_pipe(empty);
10558 %}
10559
10560 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10561 %{
10562 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10563 match(Set dst (CastLL dst));
10564
10565 effect(KILL cr);
10566 format %{ "# cast_checked_LL $dst" %}
10567 ins_encode %{
10568 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10569 %}
10570 ins_pipe(pipe_slow);
10571 %}
10572
10573 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10574 %{
10575 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10576 match(Set dst (CastLL dst));
10577
10578 effect(KILL cr, TEMP tmp);
10579 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10580 ins_encode %{
10581 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10582 %}
10583 ins_pipe(pipe_slow);
10584 %}
10585
10586 instruct castFF(regF dst)
10587 %{
10588 match(Set dst (CastFF dst));
10589
10590 size(0);
10591 format %{ "# castFF of $dst" %}
10592 ins_encode(/* empty encoding */);
10593 ins_cost(0);
10594 ins_pipe(empty);
10595 %}
10596
10597 instruct castHH(regF dst)
10598 %{
10599 match(Set dst (CastHH dst));
10600
10601 size(0);
10602 format %{ "# castHH of $dst" %}
10603 ins_encode(/* empty encoding */);
10604 ins_cost(0);
10605 ins_pipe(empty);
10606 %}
10607
10608 instruct castDD(regD dst)
10609 %{
10610 match(Set dst (CastDD dst));
10611
10612 size(0);
10613 format %{ "# castDD of $dst" %}
10614 ins_encode(/* empty encoding */);
10615 ins_cost(0);
10616 ins_pipe(empty);
10617 %}
10618
10619 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10620 instruct compareAndSwapP(rRegI res,
10621 memory mem_ptr,
10622 rax_RegP oldval, rRegP newval,
10623 rFlagsReg cr)
10624 %{
10625 predicate(n->as_LoadStore()->barrier_data() == 0);
10626 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10627 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10628 effect(KILL cr, KILL oldval);
10629
10630 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10631 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10632 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10633 ins_encode %{
10634 __ lock();
10635 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10636 __ setcc(Assembler::equal, $res$$Register);
10637 %}
10638 ins_pipe( pipe_cmpxchg );
10639 %}
10640
10641 instruct compareAndSwapL(rRegI res,
10642 memory mem_ptr,
10643 rax_RegL oldval, rRegL newval,
10644 rFlagsReg cr)
10645 %{
10646 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10647 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10648 effect(KILL cr, KILL oldval);
10649
10650 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10651 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10652 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10653 ins_encode %{
10654 __ lock();
10655 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10656 __ setcc(Assembler::equal, $res$$Register);
10657 %}
10658 ins_pipe( pipe_cmpxchg );
10659 %}
10660
10661 instruct compareAndSwapI(rRegI res,
10662 memory mem_ptr,
10663 rax_RegI oldval, rRegI newval,
10664 rFlagsReg cr)
10665 %{
10666 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10667 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10668 effect(KILL cr, KILL oldval);
10669
10670 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10671 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10672 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10673 ins_encode %{
10674 __ lock();
10675 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10676 __ setcc(Assembler::equal, $res$$Register);
10677 %}
10678 ins_pipe( pipe_cmpxchg );
10679 %}
10680
10681 instruct compareAndSwapB(rRegI res,
10682 memory mem_ptr,
10683 rax_RegI oldval, rRegI newval,
10684 rFlagsReg cr)
10685 %{
10686 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10687 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10688 effect(KILL cr, KILL oldval);
10689
10690 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10691 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10692 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10693 ins_encode %{
10694 __ lock();
10695 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10696 __ setcc(Assembler::equal, $res$$Register);
10697 %}
10698 ins_pipe( pipe_cmpxchg );
10699 %}
10700
10701 instruct compareAndSwapS(rRegI res,
10702 memory mem_ptr,
10703 rax_RegI oldval, rRegI newval,
10704 rFlagsReg cr)
10705 %{
10706 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10707 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10708 effect(KILL cr, KILL oldval);
10709
10710 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10711 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10712 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10713 ins_encode %{
10714 __ lock();
10715 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10716 __ setcc(Assembler::equal, $res$$Register);
10717 %}
10718 ins_pipe( pipe_cmpxchg );
10719 %}
10720
10721 instruct compareAndSwapN(rRegI res,
10722 memory mem_ptr,
10723 rax_RegN oldval, rRegN newval,
10724 rFlagsReg cr) %{
10725 predicate(n->as_LoadStore()->barrier_data() == 0);
10726 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10727 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10728 effect(KILL cr, KILL oldval);
10729
10730 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10731 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10732 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10733 ins_encode %{
10734 __ lock();
10735 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10736 __ setcc(Assembler::equal, $res$$Register);
10737 %}
10738 ins_pipe( pipe_cmpxchg );
10739 %}
10740
10741 instruct compareAndExchangeB(
10742 memory mem_ptr,
10743 rax_RegI oldval, rRegI newval,
10744 rFlagsReg cr)
10745 %{
10746 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10747 effect(KILL cr);
10748
10749 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10750 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10751 ins_encode %{
10752 __ lock();
10753 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10754 %}
10755 ins_pipe( pipe_cmpxchg );
10756 %}
10757
10758 instruct compareAndExchangeS(
10759 memory mem_ptr,
10760 rax_RegI oldval, rRegI newval,
10761 rFlagsReg cr)
10762 %{
10763 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10764 effect(KILL cr);
10765
10766 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10767 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10768 ins_encode %{
10769 __ lock();
10770 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10771 %}
10772 ins_pipe( pipe_cmpxchg );
10773 %}
10774
10775 instruct compareAndExchangeI(
10776 memory mem_ptr,
10777 rax_RegI oldval, rRegI newval,
10778 rFlagsReg cr)
10779 %{
10780 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10781 effect(KILL cr);
10782
10783 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10784 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10785 ins_encode %{
10786 __ lock();
10787 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10788 %}
10789 ins_pipe( pipe_cmpxchg );
10790 %}
10791
10792 instruct compareAndExchangeL(
10793 memory mem_ptr,
10794 rax_RegL oldval, rRegL newval,
10795 rFlagsReg cr)
10796 %{
10797 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10798 effect(KILL cr);
10799
10800 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10801 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10802 ins_encode %{
10803 __ lock();
10804 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10805 %}
10806 ins_pipe( pipe_cmpxchg );
10807 %}
10808
10809 instruct compareAndExchangeN(
10810 memory mem_ptr,
10811 rax_RegN oldval, rRegN newval,
10812 rFlagsReg cr) %{
10813 predicate(n->as_LoadStore()->barrier_data() == 0);
10814 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10815 effect(KILL cr);
10816
10817 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10818 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10819 ins_encode %{
10820 __ lock();
10821 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10822 %}
10823 ins_pipe( pipe_cmpxchg );
10824 %}
10825
10826 instruct compareAndExchangeP(
10827 memory mem_ptr,
10828 rax_RegP oldval, rRegP newval,
10829 rFlagsReg cr)
10830 %{
10831 predicate(n->as_LoadStore()->barrier_data() == 0);
10832 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10833 effect(KILL cr);
10834
10835 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10836 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10837 ins_encode %{
10838 __ lock();
10839 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10840 %}
10841 ins_pipe( pipe_cmpxchg );
10842 %}
10843
10844 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10845 predicate(n->as_LoadStore()->result_not_used());
10846 match(Set dummy (GetAndAddB mem add));
10847 effect(KILL cr);
10848 format %{ "addb_lock $mem, $add" %}
10849 ins_encode %{
10850 __ lock();
10851 __ addb($mem$$Address, $add$$Register);
10852 %}
10853 ins_pipe(pipe_cmpxchg);
10854 %}
10855
10856 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10857 predicate(n->as_LoadStore()->result_not_used());
10858 match(Set dummy (GetAndAddB mem add));
10859 effect(KILL cr);
10860 format %{ "addb_lock $mem, $add" %}
10861 ins_encode %{
10862 __ lock();
10863 __ addb($mem$$Address, $add$$constant);
10864 %}
10865 ins_pipe(pipe_cmpxchg);
10866 %}
10867
10868 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10869 predicate(!n->as_LoadStore()->result_not_used());
10870 match(Set newval (GetAndAddB mem newval));
10871 effect(KILL cr);
10872 format %{ "xaddb_lock $mem, $newval" %}
10873 ins_encode %{
10874 __ lock();
10875 __ xaddb($mem$$Address, $newval$$Register);
10876 %}
10877 ins_pipe(pipe_cmpxchg);
10878 %}
10879
10880 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10881 predicate(n->as_LoadStore()->result_not_used());
10882 match(Set dummy (GetAndAddS mem add));
10883 effect(KILL cr);
10884 format %{ "addw_lock $mem, $add" %}
10885 ins_encode %{
10886 __ lock();
10887 __ addw($mem$$Address, $add$$Register);
10888 %}
10889 ins_pipe(pipe_cmpxchg);
10890 %}
10891
10892 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10893 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10894 match(Set dummy (GetAndAddS mem add));
10895 effect(KILL cr);
10896 format %{ "addw_lock $mem, $add" %}
10897 ins_encode %{
10898 __ lock();
10899 __ addw($mem$$Address, $add$$constant);
10900 %}
10901 ins_pipe(pipe_cmpxchg);
10902 %}
10903
10904 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10905 predicate(!n->as_LoadStore()->result_not_used());
10906 match(Set newval (GetAndAddS mem newval));
10907 effect(KILL cr);
10908 format %{ "xaddw_lock $mem, $newval" %}
10909 ins_encode %{
10910 __ lock();
10911 __ xaddw($mem$$Address, $newval$$Register);
10912 %}
10913 ins_pipe(pipe_cmpxchg);
10914 %}
10915
10916 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10917 predicate(n->as_LoadStore()->result_not_used());
10918 match(Set dummy (GetAndAddI mem add));
10919 effect(KILL cr);
10920 format %{ "addl_lock $mem, $add" %}
10921 ins_encode %{
10922 __ lock();
10923 __ addl($mem$$Address, $add$$Register);
10924 %}
10925 ins_pipe(pipe_cmpxchg);
10926 %}
10927
10928 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10929 predicate(n->as_LoadStore()->result_not_used());
10930 match(Set dummy (GetAndAddI mem add));
10931 effect(KILL cr);
10932 format %{ "addl_lock $mem, $add" %}
10933 ins_encode %{
10934 __ lock();
10935 __ addl($mem$$Address, $add$$constant);
10936 %}
10937 ins_pipe(pipe_cmpxchg);
10938 %}
10939
10940 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10941 predicate(!n->as_LoadStore()->result_not_used());
10942 match(Set newval (GetAndAddI mem newval));
10943 effect(KILL cr);
10944 format %{ "xaddl_lock $mem, $newval" %}
10945 ins_encode %{
10946 __ lock();
10947 __ xaddl($mem$$Address, $newval$$Register);
10948 %}
10949 ins_pipe(pipe_cmpxchg);
10950 %}
10951
10952 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10953 predicate(n->as_LoadStore()->result_not_used());
10954 match(Set dummy (GetAndAddL mem add));
10955 effect(KILL cr);
10956 format %{ "addq_lock $mem, $add" %}
10957 ins_encode %{
10958 __ lock();
10959 __ addq($mem$$Address, $add$$Register);
10960 %}
10961 ins_pipe(pipe_cmpxchg);
10962 %}
10963
10964 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10965 predicate(n->as_LoadStore()->result_not_used());
10966 match(Set dummy (GetAndAddL mem add));
10967 effect(KILL cr);
10968 format %{ "addq_lock $mem, $add" %}
10969 ins_encode %{
10970 __ lock();
10971 __ addq($mem$$Address, $add$$constant);
10972 %}
10973 ins_pipe(pipe_cmpxchg);
10974 %}
10975
10976 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10977 predicate(!n->as_LoadStore()->result_not_used());
10978 match(Set newval (GetAndAddL mem newval));
10979 effect(KILL cr);
10980 format %{ "xaddq_lock $mem, $newval" %}
10981 ins_encode %{
10982 __ lock();
10983 __ xaddq($mem$$Address, $newval$$Register);
10984 %}
10985 ins_pipe(pipe_cmpxchg);
10986 %}
10987
10988 instruct xchgB( memory mem, rRegI newval) %{
10989 match(Set newval (GetAndSetB mem newval));
10990 format %{ "XCHGB $newval,[$mem]" %}
10991 ins_encode %{
10992 __ xchgb($newval$$Register, $mem$$Address);
10993 %}
10994 ins_pipe( pipe_cmpxchg );
10995 %}
10996
10997 instruct xchgS( memory mem, rRegI newval) %{
10998 match(Set newval (GetAndSetS mem newval));
10999 format %{ "XCHGW $newval,[$mem]" %}
11000 ins_encode %{
11001 __ xchgw($newval$$Register, $mem$$Address);
11002 %}
11003 ins_pipe( pipe_cmpxchg );
11004 %}
11005
11006 instruct xchgI( memory mem, rRegI newval) %{
11007 match(Set newval (GetAndSetI mem newval));
11008 format %{ "XCHGL $newval,[$mem]" %}
11009 ins_encode %{
11010 __ xchgl($newval$$Register, $mem$$Address);
11011 %}
11012 ins_pipe( pipe_cmpxchg );
11013 %}
11014
11015 instruct xchgL( memory mem, rRegL newval) %{
11016 match(Set newval (GetAndSetL mem newval));
11017 format %{ "XCHGL $newval,[$mem]" %}
11018 ins_encode %{
11019 __ xchgq($newval$$Register, $mem$$Address);
11020 %}
11021 ins_pipe( pipe_cmpxchg );
11022 %}
11023
11024 instruct xchgP( memory mem, rRegP newval) %{
11025 match(Set newval (GetAndSetP mem newval));
11026 predicate(n->as_LoadStore()->barrier_data() == 0);
11027 format %{ "XCHGQ $newval,[$mem]" %}
11028 ins_encode %{
11029 __ xchgq($newval$$Register, $mem$$Address);
11030 %}
11031 ins_pipe( pipe_cmpxchg );
11032 %}
11033
11034 instruct xchgN( memory mem, rRegN newval) %{
11035 predicate(n->as_LoadStore()->barrier_data() == 0);
11036 match(Set newval (GetAndSetN mem newval));
11037 format %{ "XCHGL $newval,$mem]" %}
11038 ins_encode %{
11039 __ xchgl($newval$$Register, $mem$$Address);
11040 %}
11041 ins_pipe( pipe_cmpxchg );
11042 %}
11043
11044 //----------Abs Instructions-------------------------------------------
11045
11046 // Integer Absolute Instructions
11047 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11048 %{
11049 match(Set dst (AbsI src));
11050 effect(TEMP dst, KILL cr);
11051 format %{ "xorl $dst, $dst\t# abs int\n\t"
11052 "subl $dst, $src\n\t"
11053 "cmovll $dst, $src" %}
11054 ins_encode %{
11055 __ xorl($dst$$Register, $dst$$Register);
11056 __ subl($dst$$Register, $src$$Register);
11057 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11058 %}
11059
11060 ins_pipe(ialu_reg_reg);
11061 %}
11062
11063 // Long Absolute Instructions
11064 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11065 %{
11066 match(Set dst (AbsL src));
11067 effect(TEMP dst, KILL cr);
11068 format %{ "xorl $dst, $dst\t# abs long\n\t"
11069 "subq $dst, $src\n\t"
11070 "cmovlq $dst, $src" %}
11071 ins_encode %{
11072 __ xorl($dst$$Register, $dst$$Register);
11073 __ subq($dst$$Register, $src$$Register);
11074 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11075 %}
11076
11077 ins_pipe(ialu_reg_reg);
11078 %}
11079
11080 //----------Subtraction Instructions-------------------------------------------
11081
11082 // Integer Subtraction Instructions
11083 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11084 %{
11085 predicate(!UseAPX);
11086 match(Set dst (SubI dst src));
11087 effect(KILL cr);
11088 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11089
11090 format %{ "subl $dst, $src\t# int" %}
11091 ins_encode %{
11092 __ subl($dst$$Register, $src$$Register);
11093 %}
11094 ins_pipe(ialu_reg_reg);
11095 %}
11096
11097 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11098 %{
11099 predicate(UseAPX);
11100 match(Set dst (SubI src1 src2));
11101 effect(KILL cr);
11102 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11103
11104 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11105 ins_encode %{
11106 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11107 %}
11108 ins_pipe(ialu_reg_reg);
11109 %}
11110
11111 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11112 %{
11113 predicate(UseAPX);
11114 match(Set dst (SubI src1 src2));
11115 effect(KILL cr);
11116 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11117
11118 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11119 ins_encode %{
11120 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11121 %}
11122 ins_pipe(ialu_reg_reg);
11123 %}
11124
11125 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11126 %{
11127 predicate(UseAPX);
11128 match(Set dst (SubI (LoadI src1) src2));
11129 effect(KILL cr);
11130 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11131
11132 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11133 ins_encode %{
11134 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11135 %}
11136 ins_pipe(ialu_reg_reg);
11137 %}
11138
11139 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11140 %{
11141 predicate(!UseAPX);
11142 match(Set dst (SubI dst (LoadI src)));
11143 effect(KILL cr);
11144 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11145
11146 ins_cost(150);
11147 format %{ "subl $dst, $src\t# int" %}
11148 ins_encode %{
11149 __ subl($dst$$Register, $src$$Address);
11150 %}
11151 ins_pipe(ialu_reg_mem);
11152 %}
11153
11154 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11155 %{
11156 predicate(UseAPX);
11157 match(Set dst (SubI src1 (LoadI src2)));
11158 effect(KILL cr);
11159 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11160
11161 ins_cost(150);
11162 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11163 ins_encode %{
11164 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11165 %}
11166 ins_pipe(ialu_reg_mem);
11167 %}
11168
11169 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11170 %{
11171 predicate(UseAPX);
11172 match(Set dst (SubI (LoadI src1) src2));
11173 effect(KILL cr);
11174 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175
11176 ins_cost(150);
11177 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11178 ins_encode %{
11179 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11180 %}
11181 ins_pipe(ialu_reg_mem);
11182 %}
11183
11184 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11185 %{
11186 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11187 effect(KILL cr);
11188 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11189
11190 ins_cost(150);
11191 format %{ "subl $dst, $src\t# int" %}
11192 ins_encode %{
11193 __ subl($dst$$Address, $src$$Register);
11194 %}
11195 ins_pipe(ialu_mem_reg);
11196 %}
11197
11198 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11199 %{
11200 predicate(!UseAPX);
11201 match(Set dst (SubL dst src));
11202 effect(KILL cr);
11203 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11204
11205 format %{ "subq $dst, $src\t# long" %}
11206 ins_encode %{
11207 __ subq($dst$$Register, $src$$Register);
11208 %}
11209 ins_pipe(ialu_reg_reg);
11210 %}
11211
11212 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11213 %{
11214 predicate(UseAPX);
11215 match(Set dst (SubL src1 src2));
11216 effect(KILL cr);
11217 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11218
11219 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11220 ins_encode %{
11221 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11222 %}
11223 ins_pipe(ialu_reg_reg);
11224 %}
11225
11226 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11227 %{
11228 predicate(UseAPX);
11229 match(Set dst (SubL src1 src2));
11230 effect(KILL cr);
11231 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11232
11233 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11234 ins_encode %{
11235 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11236 %}
11237 ins_pipe(ialu_reg_reg);
11238 %}
11239
11240 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11241 %{
11242 predicate(UseAPX);
11243 match(Set dst (SubL (LoadL src1) src2));
11244 effect(KILL cr);
11245 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11246
11247 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11248 ins_encode %{
11249 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11250 %}
11251 ins_pipe(ialu_reg_reg);
11252 %}
11253
11254 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11255 %{
11256 predicate(!UseAPX);
11257 match(Set dst (SubL dst (LoadL src)));
11258 effect(KILL cr);
11259 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11260
11261 ins_cost(150);
11262 format %{ "subq $dst, $src\t# long" %}
11263 ins_encode %{
11264 __ subq($dst$$Register, $src$$Address);
11265 %}
11266 ins_pipe(ialu_reg_mem);
11267 %}
11268
11269 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11270 %{
11271 predicate(UseAPX);
11272 match(Set dst (SubL src1 (LoadL src2)));
11273 effect(KILL cr);
11274 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11275
11276 ins_cost(150);
11277 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11278 ins_encode %{
11279 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11280 %}
11281 ins_pipe(ialu_reg_mem);
11282 %}
11283
11284 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11285 %{
11286 predicate(UseAPX);
11287 match(Set dst (SubL (LoadL src1) src2));
11288 effect(KILL cr);
11289 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290
11291 ins_cost(150);
11292 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11293 ins_encode %{
11294 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11295 %}
11296 ins_pipe(ialu_reg_mem);
11297 %}
11298
11299 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11300 %{
11301 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11302 effect(KILL cr);
11303 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11304
11305 ins_cost(150);
11306 format %{ "subq $dst, $src\t# long" %}
11307 ins_encode %{
11308 __ subq($dst$$Address, $src$$Register);
11309 %}
11310 ins_pipe(ialu_mem_reg);
11311 %}
11312
11313 // Subtract from a pointer
11314 // XXX hmpf???
11315 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11316 %{
11317 match(Set dst (AddP dst (SubI zero src)));
11318 effect(KILL cr);
11319
11320 format %{ "subq $dst, $src\t# ptr - int" %}
11321 ins_encode %{
11322 __ subq($dst$$Register, $src$$Register);
11323 %}
11324 ins_pipe(ialu_reg_reg);
11325 %}
11326
11327 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11328 %{
11329 predicate(!UseAPX);
11330 match(Set dst (SubI zero dst));
11331 effect(KILL cr);
11332 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11333
11334 format %{ "negl $dst\t# int" %}
11335 ins_encode %{
11336 __ negl($dst$$Register);
11337 %}
11338 ins_pipe(ialu_reg);
11339 %}
11340
11341 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11342 %{
11343 predicate(UseAPX);
11344 match(Set dst (SubI zero src));
11345 effect(KILL cr);
11346 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11347
11348 format %{ "enegl $dst, $src\t# int ndd" %}
11349 ins_encode %{
11350 __ enegl($dst$$Register, $src$$Register, false);
11351 %}
11352 ins_pipe(ialu_reg);
11353 %}
11354
11355 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11356 %{
11357 predicate(!UseAPX);
11358 match(Set dst (NegI dst));
11359 effect(KILL cr);
11360 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11361
11362 format %{ "negl $dst\t# int" %}
11363 ins_encode %{
11364 __ negl($dst$$Register);
11365 %}
11366 ins_pipe(ialu_reg);
11367 %}
11368
11369 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11370 %{
11371 predicate(UseAPX);
11372 match(Set dst (NegI src));
11373 effect(KILL cr);
11374 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11375
11376 format %{ "enegl $dst, $src\t# int ndd" %}
11377 ins_encode %{
11378 __ enegl($dst$$Register, $src$$Register, false);
11379 %}
11380 ins_pipe(ialu_reg);
11381 %}
11382
11383 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11384 %{
11385 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11386 effect(KILL cr);
11387 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11388
11389 format %{ "negl $dst\t# int" %}
11390 ins_encode %{
11391 __ negl($dst$$Address);
11392 %}
11393 ins_pipe(ialu_reg);
11394 %}
11395
11396 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11397 %{
11398 predicate(!UseAPX);
11399 match(Set dst (SubL zero dst));
11400 effect(KILL cr);
11401 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11402
11403 format %{ "negq $dst\t# long" %}
11404 ins_encode %{
11405 __ negq($dst$$Register);
11406 %}
11407 ins_pipe(ialu_reg);
11408 %}
11409
11410 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11411 %{
11412 predicate(UseAPX);
11413 match(Set dst (SubL zero src));
11414 effect(KILL cr);
11415 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11416
11417 format %{ "enegq $dst, $src\t# long ndd" %}
11418 ins_encode %{
11419 __ enegq($dst$$Register, $src$$Register, false);
11420 %}
11421 ins_pipe(ialu_reg);
11422 %}
11423
11424 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11425 %{
11426 predicate(!UseAPX);
11427 match(Set dst (NegL dst));
11428 effect(KILL cr);
11429 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11430
11431 format %{ "negq $dst\t# int" %}
11432 ins_encode %{
11433 __ negq($dst$$Register);
11434 %}
11435 ins_pipe(ialu_reg);
11436 %}
11437
11438 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11439 %{
11440 predicate(UseAPX);
11441 match(Set dst (NegL src));
11442 effect(KILL cr);
11443 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11444
11445 format %{ "enegq $dst, $src\t# long ndd" %}
11446 ins_encode %{
11447 __ enegq($dst$$Register, $src$$Register, false);
11448 %}
11449 ins_pipe(ialu_reg);
11450 %}
11451
11452 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11453 %{
11454 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11455 effect(KILL cr);
11456 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11457
11458 format %{ "negq $dst\t# long" %}
11459 ins_encode %{
11460 __ negq($dst$$Address);
11461 %}
11462 ins_pipe(ialu_reg);
11463 %}
11464
11465 //----------Multiplication/Division Instructions-------------------------------
11466 // Integer Multiplication Instructions
11467 // Multiply Register
11468
11469 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11470 %{
11471 predicate(!UseAPX);
11472 match(Set dst (MulI dst src));
11473 effect(KILL cr);
11474
11475 ins_cost(300);
11476 format %{ "imull $dst, $src\t# int" %}
11477 ins_encode %{
11478 __ imull($dst$$Register, $src$$Register);
11479 %}
11480 ins_pipe(ialu_reg_reg_alu0);
11481 %}
11482
11483 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11484 %{
11485 predicate(UseAPX);
11486 match(Set dst (MulI src1 src2));
11487 effect(KILL cr);
11488
11489 ins_cost(300);
11490 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11491 ins_encode %{
11492 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11493 %}
11494 ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496
11497 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11498 %{
11499 match(Set dst (MulI src imm));
11500 effect(KILL cr);
11501
11502 ins_cost(300);
11503 format %{ "imull $dst, $src, $imm\t# int" %}
11504 ins_encode %{
11505 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11506 %}
11507 ins_pipe(ialu_reg_reg_alu0);
11508 %}
11509
11510 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11511 %{
11512 predicate(!UseAPX);
11513 match(Set dst (MulI dst (LoadI src)));
11514 effect(KILL cr);
11515
11516 ins_cost(350);
11517 format %{ "imull $dst, $src\t# int" %}
11518 ins_encode %{
11519 __ imull($dst$$Register, $src$$Address);
11520 %}
11521 ins_pipe(ialu_reg_mem_alu0);
11522 %}
11523
11524 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11525 %{
11526 predicate(UseAPX);
11527 match(Set dst (MulI src1 (LoadI src2)));
11528 effect(KILL cr);
11529
11530 ins_cost(350);
11531 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11532 ins_encode %{
11533 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11534 %}
11535 ins_pipe(ialu_reg_mem_alu0);
11536 %}
11537
11538 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11539 %{
11540 match(Set dst (MulI (LoadI src) imm));
11541 effect(KILL cr);
11542
11543 ins_cost(300);
11544 format %{ "imull $dst, $src, $imm\t# int" %}
11545 ins_encode %{
11546 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11547 %}
11548 ins_pipe(ialu_reg_mem_alu0);
11549 %}
11550
11551 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11552 %{
11553 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11554 effect(KILL cr, KILL src2);
11555
11556 expand %{ mulI_rReg(dst, src1, cr);
11557 mulI_rReg(src2, src3, cr);
11558 addI_rReg(dst, src2, cr); %}
11559 %}
11560
11561 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11562 %{
11563 predicate(!UseAPX);
11564 match(Set dst (MulL dst src));
11565 effect(KILL cr);
11566
11567 ins_cost(300);
11568 format %{ "imulq $dst, $src\t# long" %}
11569 ins_encode %{
11570 __ imulq($dst$$Register, $src$$Register);
11571 %}
11572 ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574
11575 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11576 %{
11577 predicate(UseAPX);
11578 match(Set dst (MulL src1 src2));
11579 effect(KILL cr);
11580
11581 ins_cost(300);
11582 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11583 ins_encode %{
11584 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11585 %}
11586 ins_pipe(ialu_reg_reg_alu0);
11587 %}
11588
11589 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11590 %{
11591 match(Set dst (MulL src imm));
11592 effect(KILL cr);
11593
11594 ins_cost(300);
11595 format %{ "imulq $dst, $src, $imm\t# long" %}
11596 ins_encode %{
11597 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11598 %}
11599 ins_pipe(ialu_reg_reg_alu0);
11600 %}
11601
11602 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11603 %{
11604 predicate(!UseAPX);
11605 match(Set dst (MulL dst (LoadL src)));
11606 effect(KILL cr);
11607
11608 ins_cost(350);
11609 format %{ "imulq $dst, $src\t# long" %}
11610 ins_encode %{
11611 __ imulq($dst$$Register, $src$$Address);
11612 %}
11613 ins_pipe(ialu_reg_mem_alu0);
11614 %}
11615
11616 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11617 %{
11618 predicate(UseAPX);
11619 match(Set dst (MulL src1 (LoadL src2)));
11620 effect(KILL cr);
11621
11622 ins_cost(350);
11623 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11624 ins_encode %{
11625 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11626 %}
11627 ins_pipe(ialu_reg_mem_alu0);
11628 %}
11629
11630 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11631 %{
11632 match(Set dst (MulL (LoadL src) imm));
11633 effect(KILL cr);
11634
11635 ins_cost(300);
11636 format %{ "imulq $dst, $src, $imm\t# long" %}
11637 ins_encode %{
11638 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11639 %}
11640 ins_pipe(ialu_reg_mem_alu0);
11641 %}
11642
11643 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11644 %{
11645 match(Set dst (MulHiL src rax));
11646 effect(USE_KILL rax, KILL cr);
11647
11648 ins_cost(300);
11649 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11650 ins_encode %{
11651 __ imulq($src$$Register);
11652 %}
11653 ins_pipe(ialu_reg_reg_alu0);
11654 %}
11655
11656 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11657 %{
11658 match(Set dst (UMulHiL src rax));
11659 effect(USE_KILL rax, KILL cr);
11660
11661 ins_cost(300);
11662 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11663 ins_encode %{
11664 __ mulq($src$$Register);
11665 %}
11666 ins_pipe(ialu_reg_reg_alu0);
11667 %}
11668
11669 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11670 rFlagsReg cr)
11671 %{
11672 match(Set rax (DivI rax div));
11673 effect(KILL rdx, KILL cr);
11674
11675 ins_cost(30*100+10*100); // XXX
11676 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11677 "jne,s normal\n\t"
11678 "xorl rdx, rdx\n\t"
11679 "cmpl $div, -1\n\t"
11680 "je,s done\n"
11681 "normal: cdql\n\t"
11682 "idivl $div\n"
11683 "done:" %}
11684 ins_encode(cdql_enc(div));
11685 ins_pipe(ialu_reg_reg_alu0);
11686 %}
11687
11688 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11689 rFlagsReg cr)
11690 %{
11691 match(Set rax (DivL rax div));
11692 effect(KILL rdx, KILL cr);
11693
11694 ins_cost(30*100+10*100); // XXX
11695 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11696 "cmpq rax, rdx\n\t"
11697 "jne,s normal\n\t"
11698 "xorl rdx, rdx\n\t"
11699 "cmpq $div, -1\n\t"
11700 "je,s done\n"
11701 "normal: cdqq\n\t"
11702 "idivq $div\n"
11703 "done:" %}
11704 ins_encode(cdqq_enc(div));
11705 ins_pipe(ialu_reg_reg_alu0);
11706 %}
11707
11708 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11709 %{
11710 match(Set rax (UDivI rax div));
11711 effect(KILL rdx, KILL cr);
11712
11713 ins_cost(300);
11714 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11715 ins_encode %{
11716 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11717 %}
11718 ins_pipe(ialu_reg_reg_alu0);
11719 %}
11720
11721 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11722 %{
11723 match(Set rax (UDivL rax div));
11724 effect(KILL rdx, KILL cr);
11725
11726 ins_cost(300);
11727 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11728 ins_encode %{
11729 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11730 %}
11731 ins_pipe(ialu_reg_reg_alu0);
11732 %}
11733
11734 // Integer DIVMOD with Register, both quotient and mod results
11735 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11736 rFlagsReg cr)
11737 %{
11738 match(DivModI rax div);
11739 effect(KILL cr);
11740
11741 ins_cost(30*100+10*100); // XXX
11742 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11743 "jne,s normal\n\t"
11744 "xorl rdx, rdx\n\t"
11745 "cmpl $div, -1\n\t"
11746 "je,s done\n"
11747 "normal: cdql\n\t"
11748 "idivl $div\n"
11749 "done:" %}
11750 ins_encode(cdql_enc(div));
11751 ins_pipe(pipe_slow);
11752 %}
11753
11754 // Long DIVMOD with Register, both quotient and mod results
11755 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11756 rFlagsReg cr)
11757 %{
11758 match(DivModL rax div);
11759 effect(KILL cr);
11760
11761 ins_cost(30*100+10*100); // XXX
11762 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11763 "cmpq rax, rdx\n\t"
11764 "jne,s normal\n\t"
11765 "xorl rdx, rdx\n\t"
11766 "cmpq $div, -1\n\t"
11767 "je,s done\n"
11768 "normal: cdqq\n\t"
11769 "idivq $div\n"
11770 "done:" %}
11771 ins_encode(cdqq_enc(div));
11772 ins_pipe(pipe_slow);
11773 %}
11774
11775 // Unsigned integer DIVMOD with Register, both quotient and mod results
11776 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11777 no_rax_rdx_RegI div, rFlagsReg cr)
11778 %{
11779 match(UDivModI rax div);
11780 effect(TEMP tmp, KILL cr);
11781
11782 ins_cost(300);
11783 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11784 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11785 %}
11786 ins_encode %{
11787 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11788 %}
11789 ins_pipe(pipe_slow);
11790 %}
11791
11792 // Unsigned long DIVMOD with Register, both quotient and mod results
11793 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11794 no_rax_rdx_RegL div, rFlagsReg cr)
11795 %{
11796 match(UDivModL rax div);
11797 effect(TEMP tmp, KILL cr);
11798
11799 ins_cost(300);
11800 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11801 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11802 %}
11803 ins_encode %{
11804 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11805 %}
11806 ins_pipe(pipe_slow);
11807 %}
11808
11809 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11810 rFlagsReg cr)
11811 %{
11812 match(Set rdx (ModI rax div));
11813 effect(KILL rax, KILL cr);
11814
11815 ins_cost(300); // XXX
11816 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11817 "jne,s normal\n\t"
11818 "xorl rdx, rdx\n\t"
11819 "cmpl $div, -1\n\t"
11820 "je,s done\n"
11821 "normal: cdql\n\t"
11822 "idivl $div\n"
11823 "done:" %}
11824 ins_encode(cdql_enc(div));
11825 ins_pipe(ialu_reg_reg_alu0);
11826 %}
11827
11828 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11829 rFlagsReg cr)
11830 %{
11831 match(Set rdx (ModL rax div));
11832 effect(KILL rax, KILL cr);
11833
11834 ins_cost(300); // XXX
11835 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11836 "cmpq rax, rdx\n\t"
11837 "jne,s normal\n\t"
11838 "xorl rdx, rdx\n\t"
11839 "cmpq $div, -1\n\t"
11840 "je,s done\n"
11841 "normal: cdqq\n\t"
11842 "idivq $div\n"
11843 "done:" %}
11844 ins_encode(cdqq_enc(div));
11845 ins_pipe(ialu_reg_reg_alu0);
11846 %}
11847
11848 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11849 %{
11850 match(Set rdx (UModI rax div));
11851 effect(KILL rax, KILL cr);
11852
11853 ins_cost(300);
11854 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11855 ins_encode %{
11856 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11857 %}
11858 ins_pipe(ialu_reg_reg_alu0);
11859 %}
11860
11861 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11862 %{
11863 match(Set rdx (UModL rax div));
11864 effect(KILL rax, KILL cr);
11865
11866 ins_cost(300);
11867 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11868 ins_encode %{
11869 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11870 %}
11871 ins_pipe(ialu_reg_reg_alu0);
11872 %}
11873
11874 // Integer Shift Instructions
11875 // Shift Left by one, two, three
11876 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11877 %{
11878 predicate(!UseAPX);
11879 match(Set dst (LShiftI dst shift));
11880 effect(KILL cr);
11881
11882 format %{ "sall $dst, $shift" %}
11883 ins_encode %{
11884 __ sall($dst$$Register, $shift$$constant);
11885 %}
11886 ins_pipe(ialu_reg);
11887 %}
11888
11889 // Shift Left by one, two, three
11890 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11891 %{
11892 predicate(UseAPX);
11893 match(Set dst (LShiftI src shift));
11894 effect(KILL cr);
11895
11896 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11897 ins_encode %{
11898 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11899 %}
11900 ins_pipe(ialu_reg);
11901 %}
11902
11903 // Shift Left by 8-bit immediate
11904 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11905 %{
11906 predicate(!UseAPX);
11907 match(Set dst (LShiftI dst shift));
11908 effect(KILL cr);
11909
11910 format %{ "sall $dst, $shift" %}
11911 ins_encode %{
11912 __ sall($dst$$Register, $shift$$constant);
11913 %}
11914 ins_pipe(ialu_reg);
11915 %}
11916
11917 // Shift Left by 8-bit immediate
11918 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11919 %{
11920 predicate(UseAPX);
11921 match(Set dst (LShiftI src shift));
11922 effect(KILL cr);
11923
11924 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11925 ins_encode %{
11926 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11927 %}
11928 ins_pipe(ialu_reg);
11929 %}
11930
11931 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11932 %{
11933 predicate(UseAPX);
11934 match(Set dst (LShiftI (LoadI src) shift));
11935 effect(KILL cr);
11936
11937 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11938 ins_encode %{
11939 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11940 %}
11941 ins_pipe(ialu_reg);
11942 %}
11943
11944 // Shift Left by 8-bit immediate
11945 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11946 %{
11947 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11948 effect(KILL cr);
11949
11950 format %{ "sall $dst, $shift" %}
11951 ins_encode %{
11952 __ sall($dst$$Address, $shift$$constant);
11953 %}
11954 ins_pipe(ialu_mem_imm);
11955 %}
11956
11957 // Shift Left by variable
11958 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11959 %{
11960 predicate(!VM_Version::supports_bmi2());
11961 match(Set dst (LShiftI dst shift));
11962 effect(KILL cr);
11963
11964 format %{ "sall $dst, $shift" %}
11965 ins_encode %{
11966 __ sall($dst$$Register);
11967 %}
11968 ins_pipe(ialu_reg_reg);
11969 %}
11970
11971 // Shift Left by variable
11972 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11973 %{
11974 predicate(!VM_Version::supports_bmi2());
11975 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11976 effect(KILL cr);
11977
11978 format %{ "sall $dst, $shift" %}
11979 ins_encode %{
11980 __ sall($dst$$Address);
11981 %}
11982 ins_pipe(ialu_mem_reg);
11983 %}
11984
11985 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11986 %{
11987 predicate(VM_Version::supports_bmi2());
11988 match(Set dst (LShiftI src shift));
11989
11990 format %{ "shlxl $dst, $src, $shift" %}
11991 ins_encode %{
11992 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11993 %}
11994 ins_pipe(ialu_reg_reg);
11995 %}
11996
11997 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11998 %{
11999 predicate(VM_Version::supports_bmi2());
12000 match(Set dst (LShiftI (LoadI src) shift));
12001 ins_cost(175);
12002 format %{ "shlxl $dst, $src, $shift" %}
12003 ins_encode %{
12004 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12005 %}
12006 ins_pipe(ialu_reg_mem);
12007 %}
12008
12009 // Arithmetic Shift Right by 8-bit immediate
12010 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12011 %{
12012 predicate(!UseAPX);
12013 match(Set dst (RShiftI dst shift));
12014 effect(KILL cr);
12015
12016 format %{ "sarl $dst, $shift" %}
12017 ins_encode %{
12018 __ sarl($dst$$Register, $shift$$constant);
12019 %}
12020 ins_pipe(ialu_mem_imm);
12021 %}
12022
12023 // Arithmetic Shift Right by 8-bit immediate
12024 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12025 %{
12026 predicate(UseAPX);
12027 match(Set dst (RShiftI src shift));
12028 effect(KILL cr);
12029
12030 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12031 ins_encode %{
12032 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12033 %}
12034 ins_pipe(ialu_mem_imm);
12035 %}
12036
12037 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12038 %{
12039 predicate(UseAPX);
12040 match(Set dst (RShiftI (LoadI src) shift));
12041 effect(KILL cr);
12042
12043 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12044 ins_encode %{
12045 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12046 %}
12047 ins_pipe(ialu_mem_imm);
12048 %}
12049
12050 // Arithmetic Shift Right by 8-bit immediate
12051 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12052 %{
12053 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12054 effect(KILL cr);
12055
12056 format %{ "sarl $dst, $shift" %}
12057 ins_encode %{
12058 __ sarl($dst$$Address, $shift$$constant);
12059 %}
12060 ins_pipe(ialu_mem_imm);
12061 %}
12062
12063 // Arithmetic Shift Right by variable
12064 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12065 %{
12066 predicate(!VM_Version::supports_bmi2());
12067 match(Set dst (RShiftI dst shift));
12068 effect(KILL cr);
12069
12070 format %{ "sarl $dst, $shift" %}
12071 ins_encode %{
12072 __ sarl($dst$$Register);
12073 %}
12074 ins_pipe(ialu_reg_reg);
12075 %}
12076
12077 // Arithmetic Shift Right by variable
12078 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12079 %{
12080 predicate(!VM_Version::supports_bmi2());
12081 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12082 effect(KILL cr);
12083
12084 format %{ "sarl $dst, $shift" %}
12085 ins_encode %{
12086 __ sarl($dst$$Address);
12087 %}
12088 ins_pipe(ialu_mem_reg);
12089 %}
12090
12091 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12092 %{
12093 predicate(VM_Version::supports_bmi2());
12094 match(Set dst (RShiftI src shift));
12095
12096 format %{ "sarxl $dst, $src, $shift" %}
12097 ins_encode %{
12098 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12099 %}
12100 ins_pipe(ialu_reg_reg);
12101 %}
12102
12103 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12104 %{
12105 predicate(VM_Version::supports_bmi2());
12106 match(Set dst (RShiftI (LoadI src) shift));
12107 ins_cost(175);
12108 format %{ "sarxl $dst, $src, $shift" %}
12109 ins_encode %{
12110 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12111 %}
12112 ins_pipe(ialu_reg_mem);
12113 %}
12114
12115 // Logical Shift Right by 8-bit immediate
12116 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12117 %{
12118 predicate(!UseAPX);
12119 match(Set dst (URShiftI dst shift));
12120 effect(KILL cr);
12121
12122 format %{ "shrl $dst, $shift" %}
12123 ins_encode %{
12124 __ shrl($dst$$Register, $shift$$constant);
12125 %}
12126 ins_pipe(ialu_reg);
12127 %}
12128
12129 // Logical Shift Right by 8-bit immediate
12130 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12131 %{
12132 predicate(UseAPX);
12133 match(Set dst (URShiftI src shift));
12134 effect(KILL cr);
12135
12136 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12137 ins_encode %{
12138 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12139 %}
12140 ins_pipe(ialu_reg);
12141 %}
12142
12143 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12144 %{
12145 predicate(UseAPX);
12146 match(Set dst (URShiftI (LoadI src) shift));
12147 effect(KILL cr);
12148
12149 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12150 ins_encode %{
12151 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12152 %}
12153 ins_pipe(ialu_reg);
12154 %}
12155
12156 // Logical Shift Right by 8-bit immediate
12157 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12158 %{
12159 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12160 effect(KILL cr);
12161
12162 format %{ "shrl $dst, $shift" %}
12163 ins_encode %{
12164 __ shrl($dst$$Address, $shift$$constant);
12165 %}
12166 ins_pipe(ialu_mem_imm);
12167 %}
12168
12169 // Logical Shift Right by variable
12170 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12171 %{
12172 predicate(!VM_Version::supports_bmi2());
12173 match(Set dst (URShiftI dst shift));
12174 effect(KILL cr);
12175
12176 format %{ "shrl $dst, $shift" %}
12177 ins_encode %{
12178 __ shrl($dst$$Register);
12179 %}
12180 ins_pipe(ialu_reg_reg);
12181 %}
12182
12183 // Logical Shift Right by variable
12184 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12185 %{
12186 predicate(!VM_Version::supports_bmi2());
12187 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12188 effect(KILL cr);
12189
12190 format %{ "shrl $dst, $shift" %}
12191 ins_encode %{
12192 __ shrl($dst$$Address);
12193 %}
12194 ins_pipe(ialu_mem_reg);
12195 %}
12196
12197 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12198 %{
12199 predicate(VM_Version::supports_bmi2());
12200 match(Set dst (URShiftI src shift));
12201
12202 format %{ "shrxl $dst, $src, $shift" %}
12203 ins_encode %{
12204 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12205 %}
12206 ins_pipe(ialu_reg_reg);
12207 %}
12208
12209 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12210 %{
12211 predicate(VM_Version::supports_bmi2());
12212 match(Set dst (URShiftI (LoadI src) shift));
12213 ins_cost(175);
12214 format %{ "shrxl $dst, $src, $shift" %}
12215 ins_encode %{
12216 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12217 %}
12218 ins_pipe(ialu_reg_mem);
12219 %}
12220
12221 // Long Shift Instructions
12222 // Shift Left by one, two, three
12223 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12224 %{
12225 predicate(!UseAPX);
12226 match(Set dst (LShiftL dst shift));
12227 effect(KILL cr);
12228
12229 format %{ "salq $dst, $shift" %}
12230 ins_encode %{
12231 __ salq($dst$$Register, $shift$$constant);
12232 %}
12233 ins_pipe(ialu_reg);
12234 %}
12235
12236 // Shift Left by one, two, three
12237 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12238 %{
12239 predicate(UseAPX);
12240 match(Set dst (LShiftL src shift));
12241 effect(KILL cr);
12242
12243 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12244 ins_encode %{
12245 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12246 %}
12247 ins_pipe(ialu_reg);
12248 %}
12249
12250 // Shift Left by 8-bit immediate
12251 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12252 %{
12253 predicate(!UseAPX);
12254 match(Set dst (LShiftL dst shift));
12255 effect(KILL cr);
12256
12257 format %{ "salq $dst, $shift" %}
12258 ins_encode %{
12259 __ salq($dst$$Register, $shift$$constant);
12260 %}
12261 ins_pipe(ialu_reg);
12262 %}
12263
12264 // Shift Left by 8-bit immediate
12265 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12266 %{
12267 predicate(UseAPX);
12268 match(Set dst (LShiftL src shift));
12269 effect(KILL cr);
12270
12271 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12272 ins_encode %{
12273 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12274 %}
12275 ins_pipe(ialu_reg);
12276 %}
12277
12278 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12279 %{
12280 predicate(UseAPX);
12281 match(Set dst (LShiftL (LoadL src) shift));
12282 effect(KILL cr);
12283
12284 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12285 ins_encode %{
12286 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12287 %}
12288 ins_pipe(ialu_reg);
12289 %}
12290
12291 // Shift Left by 8-bit immediate
12292 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12293 %{
12294 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12295 effect(KILL cr);
12296
12297 format %{ "salq $dst, $shift" %}
12298 ins_encode %{
12299 __ salq($dst$$Address, $shift$$constant);
12300 %}
12301 ins_pipe(ialu_mem_imm);
12302 %}
12303
12304 // Shift Left by variable
12305 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12306 %{
12307 predicate(!VM_Version::supports_bmi2());
12308 match(Set dst (LShiftL dst shift));
12309 effect(KILL cr);
12310
12311 format %{ "salq $dst, $shift" %}
12312 ins_encode %{
12313 __ salq($dst$$Register);
12314 %}
12315 ins_pipe(ialu_reg_reg);
12316 %}
12317
12318 // Shift Left by variable
12319 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12320 %{
12321 predicate(!VM_Version::supports_bmi2());
12322 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12323 effect(KILL cr);
12324
12325 format %{ "salq $dst, $shift" %}
12326 ins_encode %{
12327 __ salq($dst$$Address);
12328 %}
12329 ins_pipe(ialu_mem_reg);
12330 %}
12331
12332 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12333 %{
12334 predicate(VM_Version::supports_bmi2());
12335 match(Set dst (LShiftL src shift));
12336
12337 format %{ "shlxq $dst, $src, $shift" %}
12338 ins_encode %{
12339 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12340 %}
12341 ins_pipe(ialu_reg_reg);
12342 %}
12343
12344 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12345 %{
12346 predicate(VM_Version::supports_bmi2());
12347 match(Set dst (LShiftL (LoadL src) shift));
12348 ins_cost(175);
12349 format %{ "shlxq $dst, $src, $shift" %}
12350 ins_encode %{
12351 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12352 %}
12353 ins_pipe(ialu_reg_mem);
12354 %}
12355
12356 // Arithmetic Shift Right by 8-bit immediate
12357 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12358 %{
12359 predicate(!UseAPX);
12360 match(Set dst (RShiftL dst shift));
12361 effect(KILL cr);
12362
12363 format %{ "sarq $dst, $shift" %}
12364 ins_encode %{
12365 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12366 %}
12367 ins_pipe(ialu_mem_imm);
12368 %}
12369
12370 // Arithmetic Shift Right by 8-bit immediate
12371 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12372 %{
12373 predicate(UseAPX);
12374 match(Set dst (RShiftL src shift));
12375 effect(KILL cr);
12376
12377 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12378 ins_encode %{
12379 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12380 %}
12381 ins_pipe(ialu_mem_imm);
12382 %}
12383
12384 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12385 %{
12386 predicate(UseAPX);
12387 match(Set dst (RShiftL (LoadL src) shift));
12388 effect(KILL cr);
12389
12390 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12391 ins_encode %{
12392 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12393 %}
12394 ins_pipe(ialu_mem_imm);
12395 %}
12396
12397 // Arithmetic Shift Right by 8-bit immediate
12398 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12399 %{
12400 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12401 effect(KILL cr);
12402
12403 format %{ "sarq $dst, $shift" %}
12404 ins_encode %{
12405 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12406 %}
12407 ins_pipe(ialu_mem_imm);
12408 %}
12409
12410 // Arithmetic Shift Right by variable
12411 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12412 %{
12413 predicate(!VM_Version::supports_bmi2());
12414 match(Set dst (RShiftL dst shift));
12415 effect(KILL cr);
12416
12417 format %{ "sarq $dst, $shift" %}
12418 ins_encode %{
12419 __ sarq($dst$$Register);
12420 %}
12421 ins_pipe(ialu_reg_reg);
12422 %}
12423
12424 // Arithmetic Shift Right by variable
12425 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12426 %{
12427 predicate(!VM_Version::supports_bmi2());
12428 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12429 effect(KILL cr);
12430
12431 format %{ "sarq $dst, $shift" %}
12432 ins_encode %{
12433 __ sarq($dst$$Address);
12434 %}
12435 ins_pipe(ialu_mem_reg);
12436 %}
12437
12438 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12439 %{
12440 predicate(VM_Version::supports_bmi2());
12441 match(Set dst (RShiftL src shift));
12442
12443 format %{ "sarxq $dst, $src, $shift" %}
12444 ins_encode %{
12445 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12446 %}
12447 ins_pipe(ialu_reg_reg);
12448 %}
12449
12450 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12451 %{
12452 predicate(VM_Version::supports_bmi2());
12453 match(Set dst (RShiftL (LoadL src) shift));
12454 ins_cost(175);
12455 format %{ "sarxq $dst, $src, $shift" %}
12456 ins_encode %{
12457 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12458 %}
12459 ins_pipe(ialu_reg_mem);
12460 %}
12461
12462 // Logical Shift Right by 8-bit immediate
12463 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12464 %{
12465 predicate(!UseAPX);
12466 match(Set dst (URShiftL dst shift));
12467 effect(KILL cr);
12468
12469 format %{ "shrq $dst, $shift" %}
12470 ins_encode %{
12471 __ shrq($dst$$Register, $shift$$constant);
12472 %}
12473 ins_pipe(ialu_reg);
12474 %}
12475
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12478 %{
12479 predicate(UseAPX);
12480 match(Set dst (URShiftL src shift));
12481 effect(KILL cr);
12482
12483 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12484 ins_encode %{
12485 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12486 %}
12487 ins_pipe(ialu_reg);
12488 %}
12489
12490 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12491 %{
12492 predicate(UseAPX);
12493 match(Set dst (URShiftL (LoadL src) shift));
12494 effect(KILL cr);
12495
12496 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12497 ins_encode %{
12498 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12499 %}
12500 ins_pipe(ialu_reg);
12501 %}
12502
12503 // Logical Shift Right by 8-bit immediate
12504 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12505 %{
12506 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12507 effect(KILL cr);
12508
12509 format %{ "shrq $dst, $shift" %}
12510 ins_encode %{
12511 __ shrq($dst$$Address, $shift$$constant);
12512 %}
12513 ins_pipe(ialu_mem_imm);
12514 %}
12515
12516 // Logical Shift Right by variable
12517 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12518 %{
12519 predicate(!VM_Version::supports_bmi2());
12520 match(Set dst (URShiftL dst shift));
12521 effect(KILL cr);
12522
12523 format %{ "shrq $dst, $shift" %}
12524 ins_encode %{
12525 __ shrq($dst$$Register);
12526 %}
12527 ins_pipe(ialu_reg_reg);
12528 %}
12529
12530 // Logical Shift Right by variable
12531 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12532 %{
12533 predicate(!VM_Version::supports_bmi2());
12534 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12535 effect(KILL cr);
12536
12537 format %{ "shrq $dst, $shift" %}
12538 ins_encode %{
12539 __ shrq($dst$$Address);
12540 %}
12541 ins_pipe(ialu_mem_reg);
12542 %}
12543
12544 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12545 %{
12546 predicate(VM_Version::supports_bmi2());
12547 match(Set dst (URShiftL src shift));
12548
12549 format %{ "shrxq $dst, $src, $shift" %}
12550 ins_encode %{
12551 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12552 %}
12553 ins_pipe(ialu_reg_reg);
12554 %}
12555
12556 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12557 %{
12558 predicate(VM_Version::supports_bmi2());
12559 match(Set dst (URShiftL (LoadL src) shift));
12560 ins_cost(175);
12561 format %{ "shrxq $dst, $src, $shift" %}
12562 ins_encode %{
12563 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12564 %}
12565 ins_pipe(ialu_reg_mem);
12566 %}
12567
12568 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12569 // This idiom is used by the compiler for the i2b bytecode.
12570 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12571 %{
12572 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12573
12574 format %{ "movsbl $dst, $src\t# i2b" %}
12575 ins_encode %{
12576 __ movsbl($dst$$Register, $src$$Register);
12577 %}
12578 ins_pipe(ialu_reg_reg);
12579 %}
12580
12581 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12582 // This idiom is used by the compiler the i2s bytecode.
12583 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12584 %{
12585 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12586
12587 format %{ "movswl $dst, $src\t# i2s" %}
12588 ins_encode %{
12589 __ movswl($dst$$Register, $src$$Register);
12590 %}
12591 ins_pipe(ialu_reg_reg);
12592 %}
12593
12594 // ROL/ROR instructions
12595
12596 // Rotate left by constant.
12597 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12598 %{
12599 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12600 match(Set dst (RotateLeft dst shift));
12601 effect(KILL cr);
12602 format %{ "roll $dst, $shift" %}
12603 ins_encode %{
12604 __ roll($dst$$Register, $shift$$constant);
12605 %}
12606 ins_pipe(ialu_reg);
12607 %}
12608
12609 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12610 %{
12611 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12612 match(Set dst (RotateLeft src shift));
12613 format %{ "rolxl $dst, $src, $shift" %}
12614 ins_encode %{
12615 int shift = 32 - ($shift$$constant & 31);
12616 __ rorxl($dst$$Register, $src$$Register, shift);
12617 %}
12618 ins_pipe(ialu_reg_reg);
12619 %}
12620
12621 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12622 %{
12623 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12624 match(Set dst (RotateLeft (LoadI src) shift));
12625 ins_cost(175);
12626 format %{ "rolxl $dst, $src, $shift" %}
12627 ins_encode %{
12628 int shift = 32 - ($shift$$constant & 31);
12629 __ rorxl($dst$$Register, $src$$Address, shift);
12630 %}
12631 ins_pipe(ialu_reg_mem);
12632 %}
12633
12634 // Rotate Left by variable
12635 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12636 %{
12637 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12638 match(Set dst (RotateLeft dst shift));
12639 effect(KILL cr);
12640 format %{ "roll $dst, $shift" %}
12641 ins_encode %{
12642 __ roll($dst$$Register);
12643 %}
12644 ins_pipe(ialu_reg_reg);
12645 %}
12646
12647 // Rotate Left by variable
12648 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12649 %{
12650 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12651 match(Set dst (RotateLeft src shift));
12652 effect(KILL cr);
12653
12654 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12655 ins_encode %{
12656 __ eroll($dst$$Register, $src$$Register, false);
12657 %}
12658 ins_pipe(ialu_reg_reg);
12659 %}
12660
12661 // Rotate Right by constant.
12662 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12663 %{
12664 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12665 match(Set dst (RotateRight dst shift));
12666 effect(KILL cr);
12667 format %{ "rorl $dst, $shift" %}
12668 ins_encode %{
12669 __ rorl($dst$$Register, $shift$$constant);
12670 %}
12671 ins_pipe(ialu_reg);
12672 %}
12673
12674 // Rotate Right by constant.
12675 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12676 %{
12677 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12678 match(Set dst (RotateRight src shift));
12679 format %{ "rorxl $dst, $src, $shift" %}
12680 ins_encode %{
12681 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12682 %}
12683 ins_pipe(ialu_reg_reg);
12684 %}
12685
12686 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12687 %{
12688 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12689 match(Set dst (RotateRight (LoadI src) shift));
12690 ins_cost(175);
12691 format %{ "rorxl $dst, $src, $shift" %}
12692 ins_encode %{
12693 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12694 %}
12695 ins_pipe(ialu_reg_mem);
12696 %}
12697
12698 // Rotate Right by variable
12699 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12700 %{
12701 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12702 match(Set dst (RotateRight dst shift));
12703 effect(KILL cr);
12704 format %{ "rorl $dst, $shift" %}
12705 ins_encode %{
12706 __ rorl($dst$$Register);
12707 %}
12708 ins_pipe(ialu_reg_reg);
12709 %}
12710
12711 // Rotate Right by variable
12712 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12713 %{
12714 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12715 match(Set dst (RotateRight src shift));
12716 effect(KILL cr);
12717
12718 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12719 ins_encode %{
12720 __ erorl($dst$$Register, $src$$Register, false);
12721 %}
12722 ins_pipe(ialu_reg_reg);
12723 %}
12724
12725 // Rotate Left by constant.
12726 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12727 %{
12728 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12729 match(Set dst (RotateLeft dst shift));
12730 effect(KILL cr);
12731 format %{ "rolq $dst, $shift" %}
12732 ins_encode %{
12733 __ rolq($dst$$Register, $shift$$constant);
12734 %}
12735 ins_pipe(ialu_reg);
12736 %}
12737
12738 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12739 %{
12740 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12741 match(Set dst (RotateLeft src shift));
12742 format %{ "rolxq $dst, $src, $shift" %}
12743 ins_encode %{
12744 int shift = 64 - ($shift$$constant & 63);
12745 __ rorxq($dst$$Register, $src$$Register, shift);
12746 %}
12747 ins_pipe(ialu_reg_reg);
12748 %}
12749
12750 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12751 %{
12752 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12753 match(Set dst (RotateLeft (LoadL src) shift));
12754 ins_cost(175);
12755 format %{ "rolxq $dst, $src, $shift" %}
12756 ins_encode %{
12757 int shift = 64 - ($shift$$constant & 63);
12758 __ rorxq($dst$$Register, $src$$Address, shift);
12759 %}
12760 ins_pipe(ialu_reg_mem);
12761 %}
12762
12763 // Rotate Left by variable
12764 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12765 %{
12766 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12767 match(Set dst (RotateLeft dst shift));
12768 effect(KILL cr);
12769 format %{ "rolq $dst, $shift" %}
12770 ins_encode %{
12771 __ rolq($dst$$Register);
12772 %}
12773 ins_pipe(ialu_reg_reg);
12774 %}
12775
12776 // Rotate Left by variable
12777 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12778 %{
12779 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12780 match(Set dst (RotateLeft src shift));
12781 effect(KILL cr);
12782
12783 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12784 ins_encode %{
12785 __ erolq($dst$$Register, $src$$Register, false);
12786 %}
12787 ins_pipe(ialu_reg_reg);
12788 %}
12789
12790 // Rotate Right by constant.
12791 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12792 %{
12793 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12794 match(Set dst (RotateRight dst shift));
12795 effect(KILL cr);
12796 format %{ "rorq $dst, $shift" %}
12797 ins_encode %{
12798 __ rorq($dst$$Register, $shift$$constant);
12799 %}
12800 ins_pipe(ialu_reg);
12801 %}
12802
12803 // Rotate Right by constant
12804 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12805 %{
12806 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12807 match(Set dst (RotateRight src shift));
12808 format %{ "rorxq $dst, $src, $shift" %}
12809 ins_encode %{
12810 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12811 %}
12812 ins_pipe(ialu_reg_reg);
12813 %}
12814
12815 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12816 %{
12817 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12818 match(Set dst (RotateRight (LoadL src) shift));
12819 ins_cost(175);
12820 format %{ "rorxq $dst, $src, $shift" %}
12821 ins_encode %{
12822 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12823 %}
12824 ins_pipe(ialu_reg_mem);
12825 %}
12826
12827 // Rotate Right by variable
12828 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12831 match(Set dst (RotateRight dst shift));
12832 effect(KILL cr);
12833 format %{ "rorq $dst, $shift" %}
12834 ins_encode %{
12835 __ rorq($dst$$Register);
12836 %}
12837 ins_pipe(ialu_reg_reg);
12838 %}
12839
12840 // Rotate Right by variable
12841 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12842 %{
12843 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12844 match(Set dst (RotateRight src shift));
12845 effect(KILL cr);
12846
12847 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12848 ins_encode %{
12849 __ erorq($dst$$Register, $src$$Register, false);
12850 %}
12851 ins_pipe(ialu_reg_reg);
12852 %}
12853
12854 //----------------------------- CompressBits/ExpandBits ------------------------
12855
12856 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12857 predicate(n->bottom_type()->isa_long());
12858 match(Set dst (CompressBits src mask));
12859 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12860 ins_encode %{
12861 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12862 %}
12863 ins_pipe( pipe_slow );
12864 %}
12865
12866 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12867 predicate(n->bottom_type()->isa_long());
12868 match(Set dst (ExpandBits src mask));
12869 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12870 ins_encode %{
12871 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12872 %}
12873 ins_pipe( pipe_slow );
12874 %}
12875
12876 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12877 predicate(n->bottom_type()->isa_long());
12878 match(Set dst (CompressBits src (LoadL mask)));
12879 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12880 ins_encode %{
12881 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12882 %}
12883 ins_pipe( pipe_slow );
12884 %}
12885
12886 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12887 predicate(n->bottom_type()->isa_long());
12888 match(Set dst (ExpandBits src (LoadL mask)));
12889 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12890 ins_encode %{
12891 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12892 %}
12893 ins_pipe( pipe_slow );
12894 %}
12895
12896
12897 // Logical Instructions
12898
12899 // Integer Logical Instructions
12900
12901 // And Instructions
12902 // And Register with Register
12903 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12904 %{
12905 predicate(!UseAPX);
12906 match(Set dst (AndI dst src));
12907 effect(KILL cr);
12908 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12909
12910 format %{ "andl $dst, $src\t# int" %}
12911 ins_encode %{
12912 __ andl($dst$$Register, $src$$Register);
12913 %}
12914 ins_pipe(ialu_reg_reg);
12915 %}
12916
12917 // And Register with Register using New Data Destination (NDD)
12918 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12919 %{
12920 predicate(UseAPX);
12921 match(Set dst (AndI src1 src2));
12922 effect(KILL cr);
12923 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12924
12925 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12926 ins_encode %{
12927 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12928
12929 %}
12930 ins_pipe(ialu_reg_reg);
12931 %}
12932
12933 // And Register with Immediate 255
12934 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12935 %{
12936 match(Set dst (AndI src mask));
12937
12938 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12939 ins_encode %{
12940 __ movzbl($dst$$Register, $src$$Register);
12941 %}
12942 ins_pipe(ialu_reg);
12943 %}
12944
12945 // And Register with Immediate 255 and promote to long
12946 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12947 %{
12948 match(Set dst (ConvI2L (AndI src mask)));
12949
12950 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12951 ins_encode %{
12952 __ movzbl($dst$$Register, $src$$Register);
12953 %}
12954 ins_pipe(ialu_reg);
12955 %}
12956
12957 // And Register with Immediate 65535
12958 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12959 %{
12960 match(Set dst (AndI src mask));
12961
12962 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12963 ins_encode %{
12964 __ movzwl($dst$$Register, $src$$Register);
12965 %}
12966 ins_pipe(ialu_reg);
12967 %}
12968
12969 // And Register with Immediate 65535 and promote to long
12970 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12971 %{
12972 match(Set dst (ConvI2L (AndI src mask)));
12973
12974 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12975 ins_encode %{
12976 __ movzwl($dst$$Register, $src$$Register);
12977 %}
12978 ins_pipe(ialu_reg);
12979 %}
12980
12981 // Can skip int2long conversions after AND with small bitmask
12982 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12983 %{
12984 predicate(VM_Version::supports_bmi2());
12985 ins_cost(125);
12986 effect(TEMP tmp, KILL cr);
12987 match(Set dst (ConvI2L (AndI src mask)));
12988 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12989 ins_encode %{
12990 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12991 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12992 %}
12993 ins_pipe(ialu_reg_reg);
12994 %}
12995
12996 // And Register with Immediate
12997 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12998 %{
12999 predicate(!UseAPX);
13000 match(Set dst (AndI dst src));
13001 effect(KILL cr);
13002 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13003
13004 format %{ "andl $dst, $src\t# int" %}
13005 ins_encode %{
13006 __ andl($dst$$Register, $src$$constant);
13007 %}
13008 ins_pipe(ialu_reg);
13009 %}
13010
13011 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13012 %{
13013 predicate(UseAPX);
13014 match(Set dst (AndI src1 src2));
13015 effect(KILL cr);
13016 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13017
13018 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13019 ins_encode %{
13020 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13021 %}
13022 ins_pipe(ialu_reg);
13023 %}
13024
13025 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13026 %{
13027 predicate(UseAPX);
13028 match(Set dst (AndI (LoadI src1) src2));
13029 effect(KILL cr);
13030 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13031
13032 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13033 ins_encode %{
13034 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13035 %}
13036 ins_pipe(ialu_reg);
13037 %}
13038
13039 // And Register with Memory
13040 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13041 %{
13042 predicate(!UseAPX);
13043 match(Set dst (AndI dst (LoadI src)));
13044 effect(KILL cr);
13045 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13046
13047 ins_cost(150);
13048 format %{ "andl $dst, $src\t# int" %}
13049 ins_encode %{
13050 __ andl($dst$$Register, $src$$Address);
13051 %}
13052 ins_pipe(ialu_reg_mem);
13053 %}
13054
13055 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13056 %{
13057 predicate(UseAPX);
13058 match(Set dst (AndI src1 (LoadI src2)));
13059 effect(KILL cr);
13060 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13061
13062 ins_cost(150);
13063 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13064 ins_encode %{
13065 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13066 %}
13067 ins_pipe(ialu_reg_mem);
13068 %}
13069
13070 // And Memory with Register
13071 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13072 %{
13073 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13074 effect(KILL cr);
13075 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13076
13077 ins_cost(150);
13078 format %{ "andb $dst, $src\t# byte" %}
13079 ins_encode %{
13080 __ andb($dst$$Address, $src$$Register);
13081 %}
13082 ins_pipe(ialu_mem_reg);
13083 %}
13084
13085 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13086 %{
13087 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13088 effect(KILL cr);
13089 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13090
13091 ins_cost(150);
13092 format %{ "andl $dst, $src\t# int" %}
13093 ins_encode %{
13094 __ andl($dst$$Address, $src$$Register);
13095 %}
13096 ins_pipe(ialu_mem_reg);
13097 %}
13098
13099 // And Memory with Immediate
13100 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13101 %{
13102 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13103 effect(KILL cr);
13104 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13105
13106 ins_cost(125);
13107 format %{ "andl $dst, $src\t# int" %}
13108 ins_encode %{
13109 __ andl($dst$$Address, $src$$constant);
13110 %}
13111 ins_pipe(ialu_mem_imm);
13112 %}
13113
13114 // BMI1 instructions
13115 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13116 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13117 predicate(UseBMI1Instructions);
13118 effect(KILL cr);
13119 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13120
13121 ins_cost(125);
13122 format %{ "andnl $dst, $src1, $src2" %}
13123
13124 ins_encode %{
13125 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13126 %}
13127 ins_pipe(ialu_reg_mem);
13128 %}
13129
13130 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13131 match(Set dst (AndI (XorI src1 minus_1) src2));
13132 predicate(UseBMI1Instructions);
13133 effect(KILL cr);
13134 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13135
13136 format %{ "andnl $dst, $src1, $src2" %}
13137
13138 ins_encode %{
13139 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13140 %}
13141 ins_pipe(ialu_reg);
13142 %}
13143
13144 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13145 match(Set dst (AndI (SubI imm_zero src) src));
13146 predicate(UseBMI1Instructions);
13147 effect(KILL cr);
13148 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13149
13150 format %{ "blsil $dst, $src" %}
13151
13152 ins_encode %{
13153 __ blsil($dst$$Register, $src$$Register);
13154 %}
13155 ins_pipe(ialu_reg);
13156 %}
13157
13158 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13159 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13160 predicate(UseBMI1Instructions);
13161 effect(KILL cr);
13162 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13163
13164 ins_cost(125);
13165 format %{ "blsil $dst, $src" %}
13166
13167 ins_encode %{
13168 __ blsil($dst$$Register, $src$$Address);
13169 %}
13170 ins_pipe(ialu_reg_mem);
13171 %}
13172
13173 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13174 %{
13175 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13176 predicate(UseBMI1Instructions);
13177 effect(KILL cr);
13178 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13179
13180 ins_cost(125);
13181 format %{ "blsmskl $dst, $src" %}
13182
13183 ins_encode %{
13184 __ blsmskl($dst$$Register, $src$$Address);
13185 %}
13186 ins_pipe(ialu_reg_mem);
13187 %}
13188
13189 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13190 %{
13191 match(Set dst (XorI (AddI src minus_1) src));
13192 predicate(UseBMI1Instructions);
13193 effect(KILL cr);
13194 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13195
13196 format %{ "blsmskl $dst, $src" %}
13197
13198 ins_encode %{
13199 __ blsmskl($dst$$Register, $src$$Register);
13200 %}
13201
13202 ins_pipe(ialu_reg);
13203 %}
13204
13205 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13206 %{
13207 match(Set dst (AndI (AddI src minus_1) src) );
13208 predicate(UseBMI1Instructions);
13209 effect(KILL cr);
13210 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13211
13212 format %{ "blsrl $dst, $src" %}
13213
13214 ins_encode %{
13215 __ blsrl($dst$$Register, $src$$Register);
13216 %}
13217
13218 ins_pipe(ialu_reg_mem);
13219 %}
13220
13221 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13222 %{
13223 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13224 predicate(UseBMI1Instructions);
13225 effect(KILL cr);
13226 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13227
13228 ins_cost(125);
13229 format %{ "blsrl $dst, $src" %}
13230
13231 ins_encode %{
13232 __ blsrl($dst$$Register, $src$$Address);
13233 %}
13234
13235 ins_pipe(ialu_reg);
13236 %}
13237
13238 // Or Instructions
13239 // Or Register with Register
13240 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13241 %{
13242 predicate(!UseAPX);
13243 match(Set dst (OrI dst src));
13244 effect(KILL cr);
13245 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13246
13247 format %{ "orl $dst, $src\t# int" %}
13248 ins_encode %{
13249 __ orl($dst$$Register, $src$$Register);
13250 %}
13251 ins_pipe(ialu_reg_reg);
13252 %}
13253
13254 // Or Register with Register using New Data Destination (NDD)
13255 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13256 %{
13257 predicate(UseAPX);
13258 match(Set dst (OrI src1 src2));
13259 effect(KILL cr);
13260 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13261
13262 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13263 ins_encode %{
13264 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13265 %}
13266 ins_pipe(ialu_reg_reg);
13267 %}
13268
13269 // Or Register with Immediate
13270 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13271 %{
13272 predicate(!UseAPX);
13273 match(Set dst (OrI dst src));
13274 effect(KILL cr);
13275 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13276
13277 format %{ "orl $dst, $src\t# int" %}
13278 ins_encode %{
13279 __ orl($dst$$Register, $src$$constant);
13280 %}
13281 ins_pipe(ialu_reg);
13282 %}
13283
13284 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13285 %{
13286 predicate(UseAPX);
13287 match(Set dst (OrI src1 src2));
13288 effect(KILL cr);
13289 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13290
13291 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13292 ins_encode %{
13293 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13294 %}
13295 ins_pipe(ialu_reg);
13296 %}
13297
13298 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13299 %{
13300 predicate(UseAPX);
13301 match(Set dst (OrI src1 src2));
13302 effect(KILL cr);
13303 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13304
13305 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13306 ins_encode %{
13307 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13308 %}
13309 ins_pipe(ialu_reg);
13310 %}
13311
13312 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13313 %{
13314 predicate(UseAPX);
13315 match(Set dst (OrI (LoadI src1) src2));
13316 effect(KILL cr);
13317 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318
13319 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13320 ins_encode %{
13321 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13322 %}
13323 ins_pipe(ialu_reg);
13324 %}
13325
13326 // Or Register with Memory
13327 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13328 %{
13329 predicate(!UseAPX);
13330 match(Set dst (OrI dst (LoadI src)));
13331 effect(KILL cr);
13332 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13333
13334 ins_cost(150);
13335 format %{ "orl $dst, $src\t# int" %}
13336 ins_encode %{
13337 __ orl($dst$$Register, $src$$Address);
13338 %}
13339 ins_pipe(ialu_reg_mem);
13340 %}
13341
13342 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13343 %{
13344 predicate(UseAPX);
13345 match(Set dst (OrI src1 (LoadI src2)));
13346 effect(KILL cr);
13347 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13348
13349 ins_cost(150);
13350 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13351 ins_encode %{
13352 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13353 %}
13354 ins_pipe(ialu_reg_mem);
13355 %}
13356
13357 // Or Memory with Register
13358 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13359 %{
13360 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13361 effect(KILL cr);
13362 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13363
13364 ins_cost(150);
13365 format %{ "orb $dst, $src\t# byte" %}
13366 ins_encode %{
13367 __ orb($dst$$Address, $src$$Register);
13368 %}
13369 ins_pipe(ialu_mem_reg);
13370 %}
13371
13372 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13373 %{
13374 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13375 effect(KILL cr);
13376 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13377
13378 ins_cost(150);
13379 format %{ "orl $dst, $src\t# int" %}
13380 ins_encode %{
13381 __ orl($dst$$Address, $src$$Register);
13382 %}
13383 ins_pipe(ialu_mem_reg);
13384 %}
13385
13386 // Or Memory with Immediate
13387 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13388 %{
13389 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13390 effect(KILL cr);
13391 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13392
13393 ins_cost(125);
13394 format %{ "orl $dst, $src\t# int" %}
13395 ins_encode %{
13396 __ orl($dst$$Address, $src$$constant);
13397 %}
13398 ins_pipe(ialu_mem_imm);
13399 %}
13400
13401 // Xor Instructions
13402 // Xor Register with Register
13403 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13404 %{
13405 predicate(!UseAPX);
13406 match(Set dst (XorI dst src));
13407 effect(KILL cr);
13408 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13409
13410 format %{ "xorl $dst, $src\t# int" %}
13411 ins_encode %{
13412 __ xorl($dst$$Register, $src$$Register);
13413 %}
13414 ins_pipe(ialu_reg_reg);
13415 %}
13416
13417 // Xor Register with Register using New Data Destination (NDD)
13418 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13419 %{
13420 predicate(UseAPX);
13421 match(Set dst (XorI src1 src2));
13422 effect(KILL cr);
13423 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424
13425 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13426 ins_encode %{
13427 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13428 %}
13429 ins_pipe(ialu_reg_reg);
13430 %}
13431
13432 // Xor Register with Immediate -1
13433 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13434 %{
13435 predicate(!UseAPX);
13436 match(Set dst (XorI dst imm));
13437
13438 format %{ "notl $dst" %}
13439 ins_encode %{
13440 __ notl($dst$$Register);
13441 %}
13442 ins_pipe(ialu_reg);
13443 %}
13444
13445 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13446 %{
13447 match(Set dst (XorI src imm));
13448 predicate(UseAPX);
13449
13450 format %{ "enotl $dst, $src" %}
13451 ins_encode %{
13452 __ enotl($dst$$Register, $src$$Register);
13453 %}
13454 ins_pipe(ialu_reg);
13455 %}
13456
13457 // Xor Register with Immediate
13458 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13459 %{
13460 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13461 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13462 match(Set dst (XorI dst src));
13463 effect(KILL cr);
13464 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13465
13466 format %{ "xorl $dst, $src\t# int" %}
13467 ins_encode %{
13468 __ xorl($dst$$Register, $src$$constant);
13469 %}
13470 ins_pipe(ialu_reg);
13471 %}
13472
13473 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13474 %{
13475 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13476 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13477 match(Set dst (XorI src1 src2));
13478 effect(KILL cr);
13479 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13480
13481 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13482 ins_encode %{
13483 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13484 %}
13485 ins_pipe(ialu_reg);
13486 %}
13487
13488 // Xor Memory with Immediate
13489 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13490 %{
13491 predicate(UseAPX);
13492 match(Set dst (XorI (LoadI src1) src2));
13493 effect(KILL cr);
13494 ins_cost(150);
13495 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13496
13497 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13498 ins_encode %{
13499 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13500 %}
13501 ins_pipe(ialu_reg);
13502 %}
13503
13504 // Xor Register with Memory
13505 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13506 %{
13507 predicate(!UseAPX);
13508 match(Set dst (XorI dst (LoadI src)));
13509 effect(KILL cr);
13510 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13511
13512 ins_cost(150);
13513 format %{ "xorl $dst, $src\t# int" %}
13514 ins_encode %{
13515 __ xorl($dst$$Register, $src$$Address);
13516 %}
13517 ins_pipe(ialu_reg_mem);
13518 %}
13519
13520 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13521 %{
13522 predicate(UseAPX);
13523 match(Set dst (XorI src1 (LoadI src2)));
13524 effect(KILL cr);
13525 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13526
13527 ins_cost(150);
13528 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13529 ins_encode %{
13530 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13531 %}
13532 ins_pipe(ialu_reg_mem);
13533 %}
13534
13535 // Xor Memory with Register
13536 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13537 %{
13538 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13539 effect(KILL cr);
13540 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13541
13542 ins_cost(150);
13543 format %{ "xorb $dst, $src\t# byte" %}
13544 ins_encode %{
13545 __ xorb($dst$$Address, $src$$Register);
13546 %}
13547 ins_pipe(ialu_mem_reg);
13548 %}
13549
13550 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13551 %{
13552 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13553 effect(KILL cr);
13554 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13555
13556 ins_cost(150);
13557 format %{ "xorl $dst, $src\t# int" %}
13558 ins_encode %{
13559 __ xorl($dst$$Address, $src$$Register);
13560 %}
13561 ins_pipe(ialu_mem_reg);
13562 %}
13563
13564 // Xor Memory with Immediate
13565 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13566 %{
13567 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13568 effect(KILL cr);
13569 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13570
13571 ins_cost(125);
13572 format %{ "xorl $dst, $src\t# int" %}
13573 ins_encode %{
13574 __ xorl($dst$$Address, $src$$constant);
13575 %}
13576 ins_pipe(ialu_mem_imm);
13577 %}
13578
13579
13580 // Long Logical Instructions
13581
13582 // And Instructions
13583 // And Register with Register
13584 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13585 %{
13586 predicate(!UseAPX);
13587 match(Set dst (AndL dst src));
13588 effect(KILL cr);
13589 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13590
13591 format %{ "andq $dst, $src\t# long" %}
13592 ins_encode %{
13593 __ andq($dst$$Register, $src$$Register);
13594 %}
13595 ins_pipe(ialu_reg_reg);
13596 %}
13597
13598 // And Register with Register using New Data Destination (NDD)
13599 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13600 %{
13601 predicate(UseAPX);
13602 match(Set dst (AndL src1 src2));
13603 effect(KILL cr);
13604 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13605
13606 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13607 ins_encode %{
13608 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13609
13610 %}
13611 ins_pipe(ialu_reg_reg);
13612 %}
13613
13614 // And Register with Immediate 255
13615 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13616 %{
13617 match(Set dst (AndL src mask));
13618
13619 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13620 ins_encode %{
13621 // movzbl zeroes out the upper 32-bit and does not need REX.W
13622 __ movzbl($dst$$Register, $src$$Register);
13623 %}
13624 ins_pipe(ialu_reg);
13625 %}
13626
13627 // And Register with Immediate 65535
13628 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13629 %{
13630 match(Set dst (AndL src mask));
13631
13632 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13633 ins_encode %{
13634 // movzwl zeroes out the upper 32-bit and does not need REX.W
13635 __ movzwl($dst$$Register, $src$$Register);
13636 %}
13637 ins_pipe(ialu_reg);
13638 %}
13639
13640 // And Register with Immediate
13641 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13642 %{
13643 predicate(!UseAPX);
13644 match(Set dst (AndL dst src));
13645 effect(KILL cr);
13646 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13647
13648 format %{ "andq $dst, $src\t# long" %}
13649 ins_encode %{
13650 __ andq($dst$$Register, $src$$constant);
13651 %}
13652 ins_pipe(ialu_reg);
13653 %}
13654
13655 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13656 %{
13657 predicate(UseAPX);
13658 match(Set dst (AndL src1 src2));
13659 effect(KILL cr);
13660 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13661
13662 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13663 ins_encode %{
13664 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13665 %}
13666 ins_pipe(ialu_reg);
13667 %}
13668
13669 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13670 %{
13671 predicate(UseAPX);
13672 match(Set dst (AndL (LoadL src1) src2));
13673 effect(KILL cr);
13674 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13675
13676 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13677 ins_encode %{
13678 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13679 %}
13680 ins_pipe(ialu_reg);
13681 %}
13682
13683 // And Register with Memory
13684 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13685 %{
13686 predicate(!UseAPX);
13687 match(Set dst (AndL dst (LoadL src)));
13688 effect(KILL cr);
13689 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13690
13691 ins_cost(150);
13692 format %{ "andq $dst, $src\t# long" %}
13693 ins_encode %{
13694 __ andq($dst$$Register, $src$$Address);
13695 %}
13696 ins_pipe(ialu_reg_mem);
13697 %}
13698
13699 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13700 %{
13701 predicate(UseAPX);
13702 match(Set dst (AndL src1 (LoadL src2)));
13703 effect(KILL cr);
13704 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13705
13706 ins_cost(150);
13707 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13708 ins_encode %{
13709 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13710 %}
13711 ins_pipe(ialu_reg_mem);
13712 %}
13713
13714 // And Memory with Register
13715 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13716 %{
13717 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13718 effect(KILL cr);
13719 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13720
13721 ins_cost(150);
13722 format %{ "andq $dst, $src\t# long" %}
13723 ins_encode %{
13724 __ andq($dst$$Address, $src$$Register);
13725 %}
13726 ins_pipe(ialu_mem_reg);
13727 %}
13728
13729 // And Memory with Immediate
13730 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13731 %{
13732 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13733 effect(KILL cr);
13734 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13735
13736 ins_cost(125);
13737 format %{ "andq $dst, $src\t# long" %}
13738 ins_encode %{
13739 __ andq($dst$$Address, $src$$constant);
13740 %}
13741 ins_pipe(ialu_mem_imm);
13742 %}
13743
13744 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13745 %{
13746 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13747 // because AND/OR works well enough for 8/32-bit values.
13748 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13749
13750 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13751 effect(KILL cr);
13752
13753 ins_cost(125);
13754 format %{ "btrq $dst, log2(not($con))\t# long" %}
13755 ins_encode %{
13756 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13757 %}
13758 ins_pipe(ialu_mem_imm);
13759 %}
13760
13761 // BMI1 instructions
13762 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13763 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13764 predicate(UseBMI1Instructions);
13765 effect(KILL cr);
13766 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13767
13768 ins_cost(125);
13769 format %{ "andnq $dst, $src1, $src2" %}
13770
13771 ins_encode %{
13772 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13773 %}
13774 ins_pipe(ialu_reg_mem);
13775 %}
13776
13777 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13778 match(Set dst (AndL (XorL src1 minus_1) src2));
13779 predicate(UseBMI1Instructions);
13780 effect(KILL cr);
13781 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782
13783 format %{ "andnq $dst, $src1, $src2" %}
13784
13785 ins_encode %{
13786 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13787 %}
13788 ins_pipe(ialu_reg_mem);
13789 %}
13790
13791 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13792 match(Set dst (AndL (SubL imm_zero src) src));
13793 predicate(UseBMI1Instructions);
13794 effect(KILL cr);
13795 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13796
13797 format %{ "blsiq $dst, $src" %}
13798
13799 ins_encode %{
13800 __ blsiq($dst$$Register, $src$$Register);
13801 %}
13802 ins_pipe(ialu_reg);
13803 %}
13804
13805 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13806 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13807 predicate(UseBMI1Instructions);
13808 effect(KILL cr);
13809 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13810
13811 ins_cost(125);
13812 format %{ "blsiq $dst, $src" %}
13813
13814 ins_encode %{
13815 __ blsiq($dst$$Register, $src$$Address);
13816 %}
13817 ins_pipe(ialu_reg_mem);
13818 %}
13819
13820 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13821 %{
13822 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13823 predicate(UseBMI1Instructions);
13824 effect(KILL cr);
13825 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13826
13827 ins_cost(125);
13828 format %{ "blsmskq $dst, $src" %}
13829
13830 ins_encode %{
13831 __ blsmskq($dst$$Register, $src$$Address);
13832 %}
13833 ins_pipe(ialu_reg_mem);
13834 %}
13835
13836 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13837 %{
13838 match(Set dst (XorL (AddL src minus_1) src));
13839 predicate(UseBMI1Instructions);
13840 effect(KILL cr);
13841 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13842
13843 format %{ "blsmskq $dst, $src" %}
13844
13845 ins_encode %{
13846 __ blsmskq($dst$$Register, $src$$Register);
13847 %}
13848
13849 ins_pipe(ialu_reg);
13850 %}
13851
13852 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13853 %{
13854 match(Set dst (AndL (AddL src minus_1) src) );
13855 predicate(UseBMI1Instructions);
13856 effect(KILL cr);
13857 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13858
13859 format %{ "blsrq $dst, $src" %}
13860
13861 ins_encode %{
13862 __ blsrq($dst$$Register, $src$$Register);
13863 %}
13864
13865 ins_pipe(ialu_reg);
13866 %}
13867
13868 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13869 %{
13870 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13871 predicate(UseBMI1Instructions);
13872 effect(KILL cr);
13873 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13874
13875 ins_cost(125);
13876 format %{ "blsrq $dst, $src" %}
13877
13878 ins_encode %{
13879 __ blsrq($dst$$Register, $src$$Address);
13880 %}
13881
13882 ins_pipe(ialu_reg);
13883 %}
13884
13885 // Or Instructions
13886 // Or Register with Register
13887 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13888 %{
13889 predicate(!UseAPX);
13890 match(Set dst (OrL dst src));
13891 effect(KILL cr);
13892 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13893
13894 format %{ "orq $dst, $src\t# long" %}
13895 ins_encode %{
13896 __ orq($dst$$Register, $src$$Register);
13897 %}
13898 ins_pipe(ialu_reg_reg);
13899 %}
13900
13901 // Or Register with Register using New Data Destination (NDD)
13902 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13903 %{
13904 predicate(UseAPX);
13905 match(Set dst (OrL src1 src2));
13906 effect(KILL cr);
13907 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13908
13909 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13910 ins_encode %{
13911 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13912
13913 %}
13914 ins_pipe(ialu_reg_reg);
13915 %}
13916
13917 // Use any_RegP to match R15 (TLS register) without spilling.
13918 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13919 match(Set dst (OrL dst (CastP2X src)));
13920 effect(KILL cr);
13921 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13922
13923 format %{ "orq $dst, $src\t# long" %}
13924 ins_encode %{
13925 __ orq($dst$$Register, $src$$Register);
13926 %}
13927 ins_pipe(ialu_reg_reg);
13928 %}
13929
13930 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13931 match(Set dst (OrL src1 (CastP2X src2)));
13932 effect(KILL cr);
13933 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13934
13935 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13936 ins_encode %{
13937 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13938 %}
13939 ins_pipe(ialu_reg_reg);
13940 %}
13941
13942 // Or Register with Immediate
13943 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13944 %{
13945 predicate(!UseAPX);
13946 match(Set dst (OrL dst src));
13947 effect(KILL cr);
13948 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13949
13950 format %{ "orq $dst, $src\t# long" %}
13951 ins_encode %{
13952 __ orq($dst$$Register, $src$$constant);
13953 %}
13954 ins_pipe(ialu_reg);
13955 %}
13956
13957 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13958 %{
13959 predicate(UseAPX);
13960 match(Set dst (OrL src1 src2));
13961 effect(KILL cr);
13962 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13963
13964 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13965 ins_encode %{
13966 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13967 %}
13968 ins_pipe(ialu_reg);
13969 %}
13970
13971 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13972 %{
13973 predicate(UseAPX);
13974 match(Set dst (OrL src1 src2));
13975 effect(KILL cr);
13976 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13977
13978 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13979 ins_encode %{
13980 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13981 %}
13982 ins_pipe(ialu_reg);
13983 %}
13984
13985 // Or Memory with Immediate
13986 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13987 %{
13988 predicate(UseAPX);
13989 match(Set dst (OrL (LoadL src1) src2));
13990 effect(KILL cr);
13991 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13992
13993 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13994 ins_encode %{
13995 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13996 %}
13997 ins_pipe(ialu_reg);
13998 %}
13999
14000 // Or Register with Memory
14001 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14002 %{
14003 predicate(!UseAPX);
14004 match(Set dst (OrL dst (LoadL src)));
14005 effect(KILL cr);
14006 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14007
14008 ins_cost(150);
14009 format %{ "orq $dst, $src\t# long" %}
14010 ins_encode %{
14011 __ orq($dst$$Register, $src$$Address);
14012 %}
14013 ins_pipe(ialu_reg_mem);
14014 %}
14015
14016 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14017 %{
14018 predicate(UseAPX);
14019 match(Set dst (OrL src1 (LoadL src2)));
14020 effect(KILL cr);
14021 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14022
14023 ins_cost(150);
14024 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14025 ins_encode %{
14026 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14027 %}
14028 ins_pipe(ialu_reg_mem);
14029 %}
14030
14031 // Or Memory with Register
14032 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14033 %{
14034 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14035 effect(KILL cr);
14036 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14037
14038 ins_cost(150);
14039 format %{ "orq $dst, $src\t# long" %}
14040 ins_encode %{
14041 __ orq($dst$$Address, $src$$Register);
14042 %}
14043 ins_pipe(ialu_mem_reg);
14044 %}
14045
14046 // Or Memory with Immediate
14047 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14048 %{
14049 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14050 effect(KILL cr);
14051 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14052
14053 ins_cost(125);
14054 format %{ "orq $dst, $src\t# long" %}
14055 ins_encode %{
14056 __ orq($dst$$Address, $src$$constant);
14057 %}
14058 ins_pipe(ialu_mem_imm);
14059 %}
14060
14061 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14062 %{
14063 // con should be a pure 64-bit power of 2 immediate
14064 // because AND/OR works well enough for 8/32-bit values.
14065 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14066
14067 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14068 effect(KILL cr);
14069
14070 ins_cost(125);
14071 format %{ "btsq $dst, log2($con)\t# long" %}
14072 ins_encode %{
14073 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14074 %}
14075 ins_pipe(ialu_mem_imm);
14076 %}
14077
14078 // Xor Instructions
14079 // Xor Register with Register
14080 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14081 %{
14082 predicate(!UseAPX);
14083 match(Set dst (XorL dst src));
14084 effect(KILL cr);
14085 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14086
14087 format %{ "xorq $dst, $src\t# long" %}
14088 ins_encode %{
14089 __ xorq($dst$$Register, $src$$Register);
14090 %}
14091 ins_pipe(ialu_reg_reg);
14092 %}
14093
14094 // Xor Register with Register using New Data Destination (NDD)
14095 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14096 %{
14097 predicate(UseAPX);
14098 match(Set dst (XorL src1 src2));
14099 effect(KILL cr);
14100 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14101
14102 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14103 ins_encode %{
14104 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14105 %}
14106 ins_pipe(ialu_reg_reg);
14107 %}
14108
14109 // Xor Register with Immediate -1
14110 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14111 %{
14112 predicate(!UseAPX);
14113 match(Set dst (XorL dst imm));
14114
14115 format %{ "notq $dst" %}
14116 ins_encode %{
14117 __ notq($dst$$Register);
14118 %}
14119 ins_pipe(ialu_reg);
14120 %}
14121
14122 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14123 %{
14124 predicate(UseAPX);
14125 match(Set dst (XorL src imm));
14126
14127 format %{ "enotq $dst, $src" %}
14128 ins_encode %{
14129 __ enotq($dst$$Register, $src$$Register);
14130 %}
14131 ins_pipe(ialu_reg);
14132 %}
14133
14134 // Xor Register with Immediate
14135 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14136 %{
14137 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14138 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14139 match(Set dst (XorL dst src));
14140 effect(KILL cr);
14141 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14142
14143 format %{ "xorq $dst, $src\t# long" %}
14144 ins_encode %{
14145 __ xorq($dst$$Register, $src$$constant);
14146 %}
14147 ins_pipe(ialu_reg);
14148 %}
14149
14150 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14151 %{
14152 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14153 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14154 match(Set dst (XorL src1 src2));
14155 effect(KILL cr);
14156 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14157
14158 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14159 ins_encode %{
14160 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14161 %}
14162 ins_pipe(ialu_reg);
14163 %}
14164
14165 // Xor Memory with Immediate
14166 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14167 %{
14168 predicate(UseAPX);
14169 match(Set dst (XorL (LoadL src1) src2));
14170 effect(KILL cr);
14171 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14172 ins_cost(150);
14173
14174 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14175 ins_encode %{
14176 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14177 %}
14178 ins_pipe(ialu_reg);
14179 %}
14180
14181 // Xor Register with Memory
14182 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14183 %{
14184 predicate(!UseAPX);
14185 match(Set dst (XorL dst (LoadL src)));
14186 effect(KILL cr);
14187 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14188
14189 ins_cost(150);
14190 format %{ "xorq $dst, $src\t# long" %}
14191 ins_encode %{
14192 __ xorq($dst$$Register, $src$$Address);
14193 %}
14194 ins_pipe(ialu_reg_mem);
14195 %}
14196
14197 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14198 %{
14199 predicate(UseAPX);
14200 match(Set dst (XorL src1 (LoadL src2)));
14201 effect(KILL cr);
14202 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14203
14204 ins_cost(150);
14205 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14206 ins_encode %{
14207 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14208 %}
14209 ins_pipe(ialu_reg_mem);
14210 %}
14211
14212 // Xor Memory with Register
14213 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14214 %{
14215 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14216 effect(KILL cr);
14217 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218
14219 ins_cost(150);
14220 format %{ "xorq $dst, $src\t# long" %}
14221 ins_encode %{
14222 __ xorq($dst$$Address, $src$$Register);
14223 %}
14224 ins_pipe(ialu_mem_reg);
14225 %}
14226
14227 // Xor Memory with Immediate
14228 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14229 %{
14230 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14231 effect(KILL cr);
14232 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14233
14234 ins_cost(125);
14235 format %{ "xorq $dst, $src\t# long" %}
14236 ins_encode %{
14237 __ xorq($dst$$Address, $src$$constant);
14238 %}
14239 ins_pipe(ialu_mem_imm);
14240 %}
14241
14242 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14243 %{
14244 match(Set dst (CmpLTMask p q));
14245 effect(KILL cr);
14246
14247 ins_cost(400);
14248 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14249 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14250 "negl $dst" %}
14251 ins_encode %{
14252 __ cmpl($p$$Register, $q$$Register);
14253 __ setcc(Assembler::less, $dst$$Register);
14254 __ negl($dst$$Register);
14255 %}
14256 ins_pipe(pipe_slow);
14257 %}
14258
14259 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14260 %{
14261 match(Set dst (CmpLTMask dst zero));
14262 effect(KILL cr);
14263
14264 ins_cost(100);
14265 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14266 ins_encode %{
14267 __ sarl($dst$$Register, 31);
14268 %}
14269 ins_pipe(ialu_reg);
14270 %}
14271
14272 /* Better to save a register than avoid a branch */
14273 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14274 %{
14275 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14276 effect(KILL cr);
14277 ins_cost(300);
14278 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14279 "jge done\n\t"
14280 "addl $p,$y\n"
14281 "done: " %}
14282 ins_encode %{
14283 Register Rp = $p$$Register;
14284 Register Rq = $q$$Register;
14285 Register Ry = $y$$Register;
14286 Label done;
14287 __ subl(Rp, Rq);
14288 __ jccb(Assembler::greaterEqual, done);
14289 __ addl(Rp, Ry);
14290 __ bind(done);
14291 %}
14292 ins_pipe(pipe_cmplt);
14293 %}
14294
14295 /* Better to save a register than avoid a branch */
14296 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14297 %{
14298 match(Set y (AndI (CmpLTMask p q) y));
14299 effect(KILL cr);
14300
14301 ins_cost(300);
14302
14303 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14304 "jlt done\n\t"
14305 "xorl $y, $y\n"
14306 "done: " %}
14307 ins_encode %{
14308 Register Rp = $p$$Register;
14309 Register Rq = $q$$Register;
14310 Register Ry = $y$$Register;
14311 Label done;
14312 __ cmpl(Rp, Rq);
14313 __ jccb(Assembler::less, done);
14314 __ xorl(Ry, Ry);
14315 __ bind(done);
14316 %}
14317 ins_pipe(pipe_cmplt);
14318 %}
14319
14320
14321 //---------- FP Instructions------------------------------------------------
14322
14323 // Really expensive, avoid
14324 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14325 %{
14326 match(Set cr (CmpF src1 src2));
14327
14328 ins_cost(500);
14329 format %{ "ucomiss $src1, $src2\n\t"
14330 "jnp,s exit\n\t"
14331 "pushfq\t# saw NaN, set CF\n\t"
14332 "andq [rsp], #0xffffff2b\n\t"
14333 "popfq\n"
14334 "exit:" %}
14335 ins_encode %{
14336 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14337 emit_cmpfp_fixup(masm);
14338 %}
14339 ins_pipe(pipe_slow);
14340 %}
14341
14342 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14343 match(Set cr (CmpF src1 src2));
14344
14345 ins_cost(100);
14346 format %{ "ucomiss $src1, $src2" %}
14347 ins_encode %{
14348 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14349 %}
14350 ins_pipe(pipe_slow);
14351 %}
14352
14353 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14354 match(Set cr (CmpF src1 (LoadF src2)));
14355
14356 ins_cost(100);
14357 format %{ "ucomiss $src1, $src2" %}
14358 ins_encode %{
14359 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14360 %}
14361 ins_pipe(pipe_slow);
14362 %}
14363
14364 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14365 match(Set cr (CmpF src con));
14366 ins_cost(100);
14367 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14368 ins_encode %{
14369 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14370 %}
14371 ins_pipe(pipe_slow);
14372 %}
14373
14374 // Really expensive, avoid
14375 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14376 %{
14377 match(Set cr (CmpD src1 src2));
14378
14379 ins_cost(500);
14380 format %{ "ucomisd $src1, $src2\n\t"
14381 "jnp,s exit\n\t"
14382 "pushfq\t# saw NaN, set CF\n\t"
14383 "andq [rsp], #0xffffff2b\n\t"
14384 "popfq\n"
14385 "exit:" %}
14386 ins_encode %{
14387 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14388 emit_cmpfp_fixup(masm);
14389 %}
14390 ins_pipe(pipe_slow);
14391 %}
14392
14393 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14394 match(Set cr (CmpD src1 src2));
14395
14396 ins_cost(100);
14397 format %{ "ucomisd $src1, $src2 test" %}
14398 ins_encode %{
14399 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14400 %}
14401 ins_pipe(pipe_slow);
14402 %}
14403
14404 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14405 match(Set cr (CmpD src1 (LoadD src2)));
14406
14407 ins_cost(100);
14408 format %{ "ucomisd $src1, $src2" %}
14409 ins_encode %{
14410 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14411 %}
14412 ins_pipe(pipe_slow);
14413 %}
14414
14415 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14416 match(Set cr (CmpD src con));
14417 ins_cost(100);
14418 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14419 ins_encode %{
14420 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14421 %}
14422 ins_pipe(pipe_slow);
14423 %}
14424
14425 // Compare into -1,0,1
14426 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14427 %{
14428 match(Set dst (CmpF3 src1 src2));
14429 effect(KILL cr);
14430
14431 ins_cost(275);
14432 format %{ "ucomiss $src1, $src2\n\t"
14433 "movl $dst, #-1\n\t"
14434 "jp,s done\n\t"
14435 "jb,s done\n\t"
14436 "setne $dst\n\t"
14437 "movzbl $dst, $dst\n"
14438 "done:" %}
14439 ins_encode %{
14440 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14441 emit_cmpfp3(masm, $dst$$Register);
14442 %}
14443 ins_pipe(pipe_slow);
14444 %}
14445
14446 // Compare into -1,0,1
14447 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14448 %{
14449 match(Set dst (CmpF3 src1 (LoadF src2)));
14450 effect(KILL cr);
14451
14452 ins_cost(275);
14453 format %{ "ucomiss $src1, $src2\n\t"
14454 "movl $dst, #-1\n\t"
14455 "jp,s done\n\t"
14456 "jb,s done\n\t"
14457 "setne $dst\n\t"
14458 "movzbl $dst, $dst\n"
14459 "done:" %}
14460 ins_encode %{
14461 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14462 emit_cmpfp3(masm, $dst$$Register);
14463 %}
14464 ins_pipe(pipe_slow);
14465 %}
14466
14467 // Compare into -1,0,1
14468 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14469 match(Set dst (CmpF3 src con));
14470 effect(KILL cr);
14471
14472 ins_cost(275);
14473 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14474 "movl $dst, #-1\n\t"
14475 "jp,s done\n\t"
14476 "jb,s done\n\t"
14477 "setne $dst\n\t"
14478 "movzbl $dst, $dst\n"
14479 "done:" %}
14480 ins_encode %{
14481 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14482 emit_cmpfp3(masm, $dst$$Register);
14483 %}
14484 ins_pipe(pipe_slow);
14485 %}
14486
14487 // Compare into -1,0,1
14488 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14489 %{
14490 match(Set dst (CmpD3 src1 src2));
14491 effect(KILL cr);
14492
14493 ins_cost(275);
14494 format %{ "ucomisd $src1, $src2\n\t"
14495 "movl $dst, #-1\n\t"
14496 "jp,s done\n\t"
14497 "jb,s done\n\t"
14498 "setne $dst\n\t"
14499 "movzbl $dst, $dst\n"
14500 "done:" %}
14501 ins_encode %{
14502 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14503 emit_cmpfp3(masm, $dst$$Register);
14504 %}
14505 ins_pipe(pipe_slow);
14506 %}
14507
14508 // Compare into -1,0,1
14509 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14510 %{
14511 match(Set dst (CmpD3 src1 (LoadD src2)));
14512 effect(KILL cr);
14513
14514 ins_cost(275);
14515 format %{ "ucomisd $src1, $src2\n\t"
14516 "movl $dst, #-1\n\t"
14517 "jp,s done\n\t"
14518 "jb,s done\n\t"
14519 "setne $dst\n\t"
14520 "movzbl $dst, $dst\n"
14521 "done:" %}
14522 ins_encode %{
14523 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14524 emit_cmpfp3(masm, $dst$$Register);
14525 %}
14526 ins_pipe(pipe_slow);
14527 %}
14528
14529 // Compare into -1,0,1
14530 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14531 match(Set dst (CmpD3 src con));
14532 effect(KILL cr);
14533
14534 ins_cost(275);
14535 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14536 "movl $dst, #-1\n\t"
14537 "jp,s done\n\t"
14538 "jb,s done\n\t"
14539 "setne $dst\n\t"
14540 "movzbl $dst, $dst\n"
14541 "done:" %}
14542 ins_encode %{
14543 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14544 emit_cmpfp3(masm, $dst$$Register);
14545 %}
14546 ins_pipe(pipe_slow);
14547 %}
14548
14549 //----------Arithmetic Conversion Instructions---------------------------------
14550
14551 instruct convF2D_reg_reg(regD dst, regF src)
14552 %{
14553 match(Set dst (ConvF2D src));
14554
14555 format %{ "cvtss2sd $dst, $src" %}
14556 ins_encode %{
14557 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14558 %}
14559 ins_pipe(pipe_slow); // XXX
14560 %}
14561
14562 instruct convF2D_reg_mem(regD dst, memory src)
14563 %{
14564 predicate(UseAVX == 0);
14565 match(Set dst (ConvF2D (LoadF src)));
14566
14567 format %{ "cvtss2sd $dst, $src" %}
14568 ins_encode %{
14569 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14570 %}
14571 ins_pipe(pipe_slow); // XXX
14572 %}
14573
14574 instruct convD2F_reg_reg(regF dst, regD src)
14575 %{
14576 match(Set dst (ConvD2F src));
14577
14578 format %{ "cvtsd2ss $dst, $src" %}
14579 ins_encode %{
14580 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14581 %}
14582 ins_pipe(pipe_slow); // XXX
14583 %}
14584
14585 instruct convD2F_reg_mem(regF dst, memory src)
14586 %{
14587 predicate(UseAVX == 0);
14588 match(Set dst (ConvD2F (LoadD src)));
14589
14590 format %{ "cvtsd2ss $dst, $src" %}
14591 ins_encode %{
14592 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14593 %}
14594 ins_pipe(pipe_slow); // XXX
14595 %}
14596
14597 // XXX do mem variants
14598 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14599 %{
14600 predicate(!VM_Version::supports_avx10_2());
14601 match(Set dst (ConvF2I src));
14602 effect(KILL cr);
14603 format %{ "convert_f2i $dst, $src" %}
14604 ins_encode %{
14605 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14606 %}
14607 ins_pipe(pipe_slow);
14608 %}
14609
14610 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14611 %{
14612 predicate(VM_Version::supports_avx10_2());
14613 match(Set dst (ConvF2I src));
14614 format %{ "evcvttss2sisl $dst, $src" %}
14615 ins_encode %{
14616 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14617 %}
14618 ins_pipe(pipe_slow);
14619 %}
14620
14621 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14622 %{
14623 predicate(VM_Version::supports_avx10_2());
14624 match(Set dst (ConvF2I (LoadF src)));
14625 format %{ "evcvttss2sisl $dst, $src" %}
14626 ins_encode %{
14627 __ evcvttss2sisl($dst$$Register, $src$$Address);
14628 %}
14629 ins_pipe(pipe_slow);
14630 %}
14631
14632 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14633 %{
14634 predicate(!VM_Version::supports_avx10_2());
14635 match(Set dst (ConvF2L src));
14636 effect(KILL cr);
14637 format %{ "convert_f2l $dst, $src"%}
14638 ins_encode %{
14639 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14640 %}
14641 ins_pipe(pipe_slow);
14642 %}
14643
14644 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14645 %{
14646 predicate(VM_Version::supports_avx10_2());
14647 match(Set dst (ConvF2L src));
14648 format %{ "evcvttss2sisq $dst, $src" %}
14649 ins_encode %{
14650 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14651 %}
14652 ins_pipe(pipe_slow);
14653 %}
14654
14655 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14656 %{
14657 predicate(VM_Version::supports_avx10_2());
14658 match(Set dst (ConvF2L (LoadF src)));
14659 format %{ "evcvttss2sisq $dst, $src" %}
14660 ins_encode %{
14661 __ evcvttss2sisq($dst$$Register, $src$$Address);
14662 %}
14663 ins_pipe(pipe_slow);
14664 %}
14665
14666 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14667 %{
14668 predicate(!VM_Version::supports_avx10_2());
14669 match(Set dst (ConvD2I src));
14670 effect(KILL cr);
14671 format %{ "convert_d2i $dst, $src"%}
14672 ins_encode %{
14673 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14674 %}
14675 ins_pipe(pipe_slow);
14676 %}
14677
14678 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14679 %{
14680 predicate(VM_Version::supports_avx10_2());
14681 match(Set dst (ConvD2I src));
14682 format %{ "evcvttsd2sisl $dst, $src" %}
14683 ins_encode %{
14684 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14685 %}
14686 ins_pipe(pipe_slow);
14687 %}
14688
14689 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14690 %{
14691 predicate(VM_Version::supports_avx10_2());
14692 match(Set dst (ConvD2I (LoadD src)));
14693 format %{ "evcvttsd2sisl $dst, $src" %}
14694 ins_encode %{
14695 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14696 %}
14697 ins_pipe(pipe_slow);
14698 %}
14699
14700 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14701 %{
14702 predicate(!VM_Version::supports_avx10_2());
14703 match(Set dst (ConvD2L src));
14704 effect(KILL cr);
14705 format %{ "convert_d2l $dst, $src"%}
14706 ins_encode %{
14707 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14708 %}
14709 ins_pipe(pipe_slow);
14710 %}
14711
14712 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14713 %{
14714 predicate(VM_Version::supports_avx10_2());
14715 match(Set dst (ConvD2L src));
14716 format %{ "evcvttsd2sisq $dst, $src" %}
14717 ins_encode %{
14718 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14719 %}
14720 ins_pipe(pipe_slow);
14721 %}
14722
14723 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14724 %{
14725 predicate(VM_Version::supports_avx10_2());
14726 match(Set dst (ConvD2L (LoadD src)));
14727 format %{ "evcvttsd2sisq $dst, $src" %}
14728 ins_encode %{
14729 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14730 %}
14731 ins_pipe(pipe_slow);
14732 %}
14733
14734 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14735 %{
14736 match(Set dst (RoundD src));
14737 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14738 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14739 ins_encode %{
14740 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14741 %}
14742 ins_pipe(pipe_slow);
14743 %}
14744
14745 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14746 %{
14747 match(Set dst (RoundF src));
14748 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14749 format %{ "round_float $dst,$src" %}
14750 ins_encode %{
14751 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14752 %}
14753 ins_pipe(pipe_slow);
14754 %}
14755
14756 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14757 %{
14758 predicate(!UseXmmI2F);
14759 match(Set dst (ConvI2F src));
14760
14761 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14762 ins_encode %{
14763 if (UseAVX > 0) {
14764 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14765 }
14766 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14767 %}
14768 ins_pipe(pipe_slow); // XXX
14769 %}
14770
14771 instruct convI2F_reg_mem(regF dst, memory src)
14772 %{
14773 predicate(UseAVX == 0);
14774 match(Set dst (ConvI2F (LoadI src)));
14775
14776 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14777 ins_encode %{
14778 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14779 %}
14780 ins_pipe(pipe_slow); // XXX
14781 %}
14782
14783 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14784 %{
14785 predicate(!UseXmmI2D);
14786 match(Set dst (ConvI2D src));
14787
14788 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14789 ins_encode %{
14790 if (UseAVX > 0) {
14791 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14792 }
14793 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14794 %}
14795 ins_pipe(pipe_slow); // XXX
14796 %}
14797
14798 instruct convI2D_reg_mem(regD dst, memory src)
14799 %{
14800 predicate(UseAVX == 0);
14801 match(Set dst (ConvI2D (LoadI src)));
14802
14803 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14804 ins_encode %{
14805 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14806 %}
14807 ins_pipe(pipe_slow); // XXX
14808 %}
14809
14810 instruct convXI2F_reg(regF dst, rRegI src)
14811 %{
14812 predicate(UseXmmI2F);
14813 match(Set dst (ConvI2F src));
14814
14815 format %{ "movdl $dst, $src\n\t"
14816 "cvtdq2psl $dst, $dst\t# i2f" %}
14817 ins_encode %{
14818 __ movdl($dst$$XMMRegister, $src$$Register);
14819 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14820 %}
14821 ins_pipe(pipe_slow); // XXX
14822 %}
14823
14824 instruct convXI2D_reg(regD dst, rRegI src)
14825 %{
14826 predicate(UseXmmI2D);
14827 match(Set dst (ConvI2D src));
14828
14829 format %{ "movdl $dst, $src\n\t"
14830 "cvtdq2pdl $dst, $dst\t# i2d" %}
14831 ins_encode %{
14832 __ movdl($dst$$XMMRegister, $src$$Register);
14833 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14834 %}
14835 ins_pipe(pipe_slow); // XXX
14836 %}
14837
14838 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14839 %{
14840 match(Set dst (ConvL2F src));
14841
14842 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14843 ins_encode %{
14844 if (UseAVX > 0) {
14845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14846 }
14847 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14848 %}
14849 ins_pipe(pipe_slow); // XXX
14850 %}
14851
14852 instruct convL2F_reg_mem(regF dst, memory src)
14853 %{
14854 predicate(UseAVX == 0);
14855 match(Set dst (ConvL2F (LoadL src)));
14856
14857 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14858 ins_encode %{
14859 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14860 %}
14861 ins_pipe(pipe_slow); // XXX
14862 %}
14863
14864 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14865 %{
14866 match(Set dst (ConvL2D src));
14867
14868 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14869 ins_encode %{
14870 if (UseAVX > 0) {
14871 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14872 }
14873 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14874 %}
14875 ins_pipe(pipe_slow); // XXX
14876 %}
14877
14878 instruct convL2D_reg_mem(regD dst, memory src)
14879 %{
14880 predicate(UseAVX == 0);
14881 match(Set dst (ConvL2D (LoadL src)));
14882
14883 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14884 ins_encode %{
14885 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14886 %}
14887 ins_pipe(pipe_slow); // XXX
14888 %}
14889
14890 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14891 %{
14892 match(Set dst (ConvI2L src));
14893
14894 ins_cost(125);
14895 format %{ "movslq $dst, $src\t# i2l" %}
14896 ins_encode %{
14897 __ movslq($dst$$Register, $src$$Register);
14898 %}
14899 ins_pipe(ialu_reg_reg);
14900 %}
14901
14902 // Zero-extend convert int to long
14903 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14904 %{
14905 match(Set dst (AndL (ConvI2L src) mask));
14906
14907 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14908 ins_encode %{
14909 if ($dst$$reg != $src$$reg) {
14910 __ movl($dst$$Register, $src$$Register);
14911 }
14912 %}
14913 ins_pipe(ialu_reg_reg);
14914 %}
14915
14916 // Zero-extend convert int to long
14917 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14918 %{
14919 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14920
14921 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14922 ins_encode %{
14923 __ movl($dst$$Register, $src$$Address);
14924 %}
14925 ins_pipe(ialu_reg_mem);
14926 %}
14927
14928 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14929 %{
14930 match(Set dst (AndL src mask));
14931
14932 format %{ "movl $dst, $src\t# zero-extend long" %}
14933 ins_encode %{
14934 __ movl($dst$$Register, $src$$Register);
14935 %}
14936 ins_pipe(ialu_reg_reg);
14937 %}
14938
14939 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14940 %{
14941 match(Set dst (ConvL2I src));
14942
14943 format %{ "movl $dst, $src\t# l2i" %}
14944 ins_encode %{
14945 __ movl($dst$$Register, $src$$Register);
14946 %}
14947 ins_pipe(ialu_reg_reg);
14948 %}
14949
14950
14951 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14952 match(Set dst (MoveF2I src));
14953 effect(DEF dst, USE src);
14954
14955 ins_cost(125);
14956 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14957 ins_encode %{
14958 __ movl($dst$$Register, Address(rsp, $src$$disp));
14959 %}
14960 ins_pipe(ialu_reg_mem);
14961 %}
14962
14963 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14964 match(Set dst (MoveI2F src));
14965 effect(DEF dst, USE src);
14966
14967 ins_cost(125);
14968 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14969 ins_encode %{
14970 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14971 %}
14972 ins_pipe(pipe_slow);
14973 %}
14974
14975 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14976 match(Set dst (MoveD2L src));
14977 effect(DEF dst, USE src);
14978
14979 ins_cost(125);
14980 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14981 ins_encode %{
14982 __ movq($dst$$Register, Address(rsp, $src$$disp));
14983 %}
14984 ins_pipe(ialu_reg_mem);
14985 %}
14986
14987 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14988 predicate(!UseXmmLoadAndClearUpper);
14989 match(Set dst (MoveL2D src));
14990 effect(DEF dst, USE src);
14991
14992 ins_cost(125);
14993 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14994 ins_encode %{
14995 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14996 %}
14997 ins_pipe(pipe_slow);
14998 %}
14999
15000 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15001 predicate(UseXmmLoadAndClearUpper);
15002 match(Set dst (MoveL2D src));
15003 effect(DEF dst, USE src);
15004
15005 ins_cost(125);
15006 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15007 ins_encode %{
15008 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15009 %}
15010 ins_pipe(pipe_slow);
15011 %}
15012
15013
15014 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15015 match(Set dst (MoveF2I src));
15016 effect(DEF dst, USE src);
15017
15018 ins_cost(95); // XXX
15019 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15020 ins_encode %{
15021 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15022 %}
15023 ins_pipe(pipe_slow);
15024 %}
15025
15026 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15027 match(Set dst (MoveI2F src));
15028 effect(DEF dst, USE src);
15029
15030 ins_cost(100);
15031 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15032 ins_encode %{
15033 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15034 %}
15035 ins_pipe( ialu_mem_reg );
15036 %}
15037
15038 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15039 match(Set dst (MoveD2L src));
15040 effect(DEF dst, USE src);
15041
15042 ins_cost(95); // XXX
15043 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15044 ins_encode %{
15045 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15046 %}
15047 ins_pipe(pipe_slow);
15048 %}
15049
15050 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15051 match(Set dst (MoveL2D src));
15052 effect(DEF dst, USE src);
15053
15054 ins_cost(100);
15055 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15056 ins_encode %{
15057 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15058 %}
15059 ins_pipe(ialu_mem_reg);
15060 %}
15061
15062 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15063 match(Set dst (MoveF2I src));
15064 effect(DEF dst, USE src);
15065 ins_cost(85);
15066 format %{ "movd $dst,$src\t# MoveF2I" %}
15067 ins_encode %{
15068 __ movdl($dst$$Register, $src$$XMMRegister);
15069 %}
15070 ins_pipe( pipe_slow );
15071 %}
15072
15073 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15074 match(Set dst (MoveD2L src));
15075 effect(DEF dst, USE src);
15076 ins_cost(85);
15077 format %{ "movd $dst,$src\t# MoveD2L" %}
15078 ins_encode %{
15079 __ movdq($dst$$Register, $src$$XMMRegister);
15080 %}
15081 ins_pipe( pipe_slow );
15082 %}
15083
15084 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15085 match(Set dst (MoveI2F src));
15086 effect(DEF dst, USE src);
15087 ins_cost(100);
15088 format %{ "movd $dst,$src\t# MoveI2F" %}
15089 ins_encode %{
15090 __ movdl($dst$$XMMRegister, $src$$Register);
15091 %}
15092 ins_pipe( pipe_slow );
15093 %}
15094
15095 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15096 match(Set dst (MoveL2D src));
15097 effect(DEF dst, USE src);
15098 ins_cost(100);
15099 format %{ "movd $dst,$src\t# MoveL2D" %}
15100 ins_encode %{
15101 __ movdq($dst$$XMMRegister, $src$$Register);
15102 %}
15103 ins_pipe( pipe_slow );
15104 %}
15105
15106
15107 // Fast clearing of an array
15108 // Small non-constant lenght ClearArray for non-AVX512 targets.
15109 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15110 Universe dummy, rFlagsReg cr)
15111 %{
15112 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15113 match(Set dummy (ClearArray (Binary cnt base) val));
15114 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15115
15116 format %{ $$template
15117 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15118 $$emit$$"jg LARGE\n\t"
15119 $$emit$$"dec rcx\n\t"
15120 $$emit$$"js DONE\t# Zero length\n\t"
15121 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15122 $$emit$$"dec rcx\n\t"
15123 $$emit$$"jge LOOP\n\t"
15124 $$emit$$"jmp DONE\n\t"
15125 $$emit$$"# LARGE:\n\t"
15126 if (UseFastStosb) {
15127 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15128 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15129 } else if (UseXMMForObjInit) {
15130 $$emit$$"movdq $tmp, $val\n\t"
15131 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15132 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15133 $$emit$$"jmpq L_zero_64_bytes\n\t"
15134 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15135 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15136 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15137 $$emit$$"add 0x40,rax\n\t"
15138 $$emit$$"# L_zero_64_bytes:\n\t"
15139 $$emit$$"sub 0x8,rcx\n\t"
15140 $$emit$$"jge L_loop\n\t"
15141 $$emit$$"add 0x4,rcx\n\t"
15142 $$emit$$"jl L_tail\n\t"
15143 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15144 $$emit$$"add 0x20,rax\n\t"
15145 $$emit$$"sub 0x4,rcx\n\t"
15146 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15147 $$emit$$"add 0x4,rcx\n\t"
15148 $$emit$$"jle L_end\n\t"
15149 $$emit$$"dec rcx\n\t"
15150 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15151 $$emit$$"vmovq xmm0,(rax)\n\t"
15152 $$emit$$"add 0x8,rax\n\t"
15153 $$emit$$"dec rcx\n\t"
15154 $$emit$$"jge L_sloop\n\t"
15155 $$emit$$"# L_end:\n\t"
15156 } else {
15157 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15158 }
15159 $$emit$$"# DONE"
15160 %}
15161 ins_encode %{
15162 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15163 $tmp$$XMMRegister, false, false);
15164 %}
15165 ins_pipe(pipe_slow);
15166 %}
15167
15168 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15169 Universe dummy, rFlagsReg cr)
15170 %{
15171 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15172 match(Set dummy (ClearArray (Binary cnt base) val));
15173 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15174
15175 format %{ $$template
15176 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15177 $$emit$$"jg LARGE\n\t"
15178 $$emit$$"dec rcx\n\t"
15179 $$emit$$"js DONE\t# Zero length\n\t"
15180 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15181 $$emit$$"dec rcx\n\t"
15182 $$emit$$"jge LOOP\n\t"
15183 $$emit$$"jmp DONE\n\t"
15184 $$emit$$"# LARGE:\n\t"
15185 if (UseXMMForObjInit) {
15186 $$emit$$"movdq $tmp, $val\n\t"
15187 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15188 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15189 $$emit$$"jmpq L_zero_64_bytes\n\t"
15190 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15191 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15192 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15193 $$emit$$"add 0x40,rax\n\t"
15194 $$emit$$"# L_zero_64_bytes:\n\t"
15195 $$emit$$"sub 0x8,rcx\n\t"
15196 $$emit$$"jge L_loop\n\t"
15197 $$emit$$"add 0x4,rcx\n\t"
15198 $$emit$$"jl L_tail\n\t"
15199 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15200 $$emit$$"add 0x20,rax\n\t"
15201 $$emit$$"sub 0x4,rcx\n\t"
15202 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15203 $$emit$$"add 0x4,rcx\n\t"
15204 $$emit$$"jle L_end\n\t"
15205 $$emit$$"dec rcx\n\t"
15206 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15207 $$emit$$"vmovq xmm0,(rax)\n\t"
15208 $$emit$$"add 0x8,rax\n\t"
15209 $$emit$$"dec rcx\n\t"
15210 $$emit$$"jge L_sloop\n\t"
15211 $$emit$$"# L_end:\n\t"
15212 } else {
15213 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15214 }
15215 $$emit$$"# DONE"
15216 %}
15217 ins_encode %{
15218 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15219 $tmp$$XMMRegister, false, true);
15220 %}
15221 ins_pipe(pipe_slow);
15222 %}
15223
15224 // Small non-constant length ClearArray for AVX512 targets.
15225 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15226 Universe dummy, rFlagsReg cr)
15227 %{
15228 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15229 match(Set dummy (ClearArray (Binary cnt base) val));
15230 ins_cost(125);
15231 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15232
15233 format %{ $$template
15234 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15235 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15236 $$emit$$"jg LARGE\n\t"
15237 $$emit$$"dec rcx\n\t"
15238 $$emit$$"js DONE\t# Zero length\n\t"
15239 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15240 $$emit$$"dec rcx\n\t"
15241 $$emit$$"jge LOOP\n\t"
15242 $$emit$$"jmp DONE\n\t"
15243 $$emit$$"# LARGE:\n\t"
15244 if (UseFastStosb) {
15245 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15246 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15247 } else if (UseXMMForObjInit) {
15248 $$emit$$"mov rdi,rax\n\t"
15249 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15250 $$emit$$"jmpq L_zero_64_bytes\n\t"
15251 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15252 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15253 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15254 $$emit$$"add 0x40,rax\n\t"
15255 $$emit$$"# L_zero_64_bytes:\n\t"
15256 $$emit$$"sub 0x8,rcx\n\t"
15257 $$emit$$"jge L_loop\n\t"
15258 $$emit$$"add 0x4,rcx\n\t"
15259 $$emit$$"jl L_tail\n\t"
15260 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15261 $$emit$$"add 0x20,rax\n\t"
15262 $$emit$$"sub 0x4,rcx\n\t"
15263 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15264 $$emit$$"add 0x4,rcx\n\t"
15265 $$emit$$"jle L_end\n\t"
15266 $$emit$$"dec rcx\n\t"
15267 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15268 $$emit$$"vmovq xmm0,(rax)\n\t"
15269 $$emit$$"add 0x8,rax\n\t"
15270 $$emit$$"dec rcx\n\t"
15271 $$emit$$"jge L_sloop\n\t"
15272 $$emit$$"# L_end:\n\t"
15273 } else {
15274 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15275 }
15276 $$emit$$"# DONE"
15277 %}
15278 ins_encode %{
15279 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15280 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15281 %}
15282 ins_pipe(pipe_slow);
15283 %}
15284
15285 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15286 Universe dummy, rFlagsReg cr)
15287 %{
15288 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15289 match(Set dummy (ClearArray (Binary cnt base) val));
15290 ins_cost(125);
15291 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15292
15293 format %{ $$template
15294 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15295 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15296 $$emit$$"jg LARGE\n\t"
15297 $$emit$$"dec rcx\n\t"
15298 $$emit$$"js DONE\t# Zero length\n\t"
15299 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15300 $$emit$$"dec rcx\n\t"
15301 $$emit$$"jge LOOP\n\t"
15302 $$emit$$"jmp DONE\n\t"
15303 $$emit$$"# LARGE:\n\t"
15304 if (UseFastStosb) {
15305 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15306 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15307 } else if (UseXMMForObjInit) {
15308 $$emit$$"mov rdi,rax\n\t"
15309 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15310 $$emit$$"jmpq L_zero_64_bytes\n\t"
15311 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15312 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15313 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15314 $$emit$$"add 0x40,rax\n\t"
15315 $$emit$$"# L_zero_64_bytes:\n\t"
15316 $$emit$$"sub 0x8,rcx\n\t"
15317 $$emit$$"jge L_loop\n\t"
15318 $$emit$$"add 0x4,rcx\n\t"
15319 $$emit$$"jl L_tail\n\t"
15320 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15321 $$emit$$"add 0x20,rax\n\t"
15322 $$emit$$"sub 0x4,rcx\n\t"
15323 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15324 $$emit$$"add 0x4,rcx\n\t"
15325 $$emit$$"jle L_end\n\t"
15326 $$emit$$"dec rcx\n\t"
15327 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15328 $$emit$$"vmovq xmm0,(rax)\n\t"
15329 $$emit$$"add 0x8,rax\n\t"
15330 $$emit$$"dec rcx\n\t"
15331 $$emit$$"jge L_sloop\n\t"
15332 $$emit$$"# L_end:\n\t"
15333 } else {
15334 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15335 }
15336 $$emit$$"# DONE"
15337 %}
15338 ins_encode %{
15339 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15340 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15341 %}
15342 ins_pipe(pipe_slow);
15343 %}
15344
15345 // Large non-constant length ClearArray for non-AVX512 targets.
15346 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15347 Universe dummy, rFlagsReg cr)
15348 %{
15349 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15350 match(Set dummy (ClearArray (Binary cnt base) val));
15351 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15352
15353 format %{ $$template
15354 if (UseFastStosb) {
15355 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15356 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15357 } else if (UseXMMForObjInit) {
15358 $$emit$$"movdq $tmp, $val\n\t"
15359 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15360 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15361 $$emit$$"jmpq L_zero_64_bytes\n\t"
15362 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15363 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15364 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15365 $$emit$$"add 0x40,rax\n\t"
15366 $$emit$$"# L_zero_64_bytes:\n\t"
15367 $$emit$$"sub 0x8,rcx\n\t"
15368 $$emit$$"jge L_loop\n\t"
15369 $$emit$$"add 0x4,rcx\n\t"
15370 $$emit$$"jl L_tail\n\t"
15371 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15372 $$emit$$"add 0x20,rax\n\t"
15373 $$emit$$"sub 0x4,rcx\n\t"
15374 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15375 $$emit$$"add 0x4,rcx\n\t"
15376 $$emit$$"jle L_end\n\t"
15377 $$emit$$"dec rcx\n\t"
15378 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15379 $$emit$$"vmovq xmm0,(rax)\n\t"
15380 $$emit$$"add 0x8,rax\n\t"
15381 $$emit$$"dec rcx\n\t"
15382 $$emit$$"jge L_sloop\n\t"
15383 $$emit$$"# L_end:\n\t"
15384 } else {
15385 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15386 }
15387 %}
15388 ins_encode %{
15389 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15390 $tmp$$XMMRegister, true, false);
15391 %}
15392 ins_pipe(pipe_slow);
15393 %}
15394
15395 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15396 Universe dummy, rFlagsReg cr)
15397 %{
15398 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15399 match(Set dummy (ClearArray (Binary cnt base) val));
15400 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15401
15402 format %{ $$template
15403 if (UseXMMForObjInit) {
15404 $$emit$$"movdq $tmp, $val\n\t"
15405 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15406 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15407 $$emit$$"jmpq L_zero_64_bytes\n\t"
15408 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15410 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15411 $$emit$$"add 0x40,rax\n\t"
15412 $$emit$$"# L_zero_64_bytes:\n\t"
15413 $$emit$$"sub 0x8,rcx\n\t"
15414 $$emit$$"jge L_loop\n\t"
15415 $$emit$$"add 0x4,rcx\n\t"
15416 $$emit$$"jl L_tail\n\t"
15417 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15418 $$emit$$"add 0x20,rax\n\t"
15419 $$emit$$"sub 0x4,rcx\n\t"
15420 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421 $$emit$$"add 0x4,rcx\n\t"
15422 $$emit$$"jle L_end\n\t"
15423 $$emit$$"dec rcx\n\t"
15424 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425 $$emit$$"vmovq xmm0,(rax)\n\t"
15426 $$emit$$"add 0x8,rax\n\t"
15427 $$emit$$"dec rcx\n\t"
15428 $$emit$$"jge L_sloop\n\t"
15429 $$emit$$"# L_end:\n\t"
15430 } else {
15431 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15432 }
15433 %}
15434 ins_encode %{
15435 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15436 $tmp$$XMMRegister, true, true);
15437 %}
15438 ins_pipe(pipe_slow);
15439 %}
15440
15441 // Large non-constant length ClearArray for AVX512 targets.
15442 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15443 Universe dummy, rFlagsReg cr)
15444 %{
15445 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15446 match(Set dummy (ClearArray (Binary cnt base) val));
15447 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15448
15449 format %{ $$template
15450 if (UseFastStosb) {
15451 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15452 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15453 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15454 } else if (UseXMMForObjInit) {
15455 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15456 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15457 $$emit$$"jmpq L_zero_64_bytes\n\t"
15458 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15459 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15460 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15461 $$emit$$"add 0x40,rax\n\t"
15462 $$emit$$"# L_zero_64_bytes:\n\t"
15463 $$emit$$"sub 0x8,rcx\n\t"
15464 $$emit$$"jge L_loop\n\t"
15465 $$emit$$"add 0x4,rcx\n\t"
15466 $$emit$$"jl L_tail\n\t"
15467 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468 $$emit$$"add 0x20,rax\n\t"
15469 $$emit$$"sub 0x4,rcx\n\t"
15470 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471 $$emit$$"add 0x4,rcx\n\t"
15472 $$emit$$"jle L_end\n\t"
15473 $$emit$$"dec rcx\n\t"
15474 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475 $$emit$$"vmovq xmm0,(rax)\n\t"
15476 $$emit$$"add 0x8,rax\n\t"
15477 $$emit$$"dec rcx\n\t"
15478 $$emit$$"jge L_sloop\n\t"
15479 $$emit$$"# L_end:\n\t"
15480 } else {
15481 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15482 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15483 }
15484 %}
15485 ins_encode %{
15486 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15488 %}
15489 ins_pipe(pipe_slow);
15490 %}
15491
15492 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493 Universe dummy, rFlagsReg cr)
15494 %{
15495 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496 match(Set dummy (ClearArray (Binary cnt base) val));
15497 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15498
15499 format %{ $$template
15500 if (UseFastStosb) {
15501 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15502 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15503 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15504 } else if (UseXMMForObjInit) {
15505 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15506 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15507 $$emit$$"jmpq L_zero_64_bytes\n\t"
15508 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15509 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15510 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15511 $$emit$$"add 0x40,rax\n\t"
15512 $$emit$$"# L_zero_64_bytes:\n\t"
15513 $$emit$$"sub 0x8,rcx\n\t"
15514 $$emit$$"jge L_loop\n\t"
15515 $$emit$$"add 0x4,rcx\n\t"
15516 $$emit$$"jl L_tail\n\t"
15517 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15518 $$emit$$"add 0x20,rax\n\t"
15519 $$emit$$"sub 0x4,rcx\n\t"
15520 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15521 $$emit$$"add 0x4,rcx\n\t"
15522 $$emit$$"jle L_end\n\t"
15523 $$emit$$"dec rcx\n\t"
15524 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15525 $$emit$$"vmovq xmm0,(rax)\n\t"
15526 $$emit$$"add 0x8,rax\n\t"
15527 $$emit$$"dec rcx\n\t"
15528 $$emit$$"jge L_sloop\n\t"
15529 $$emit$$"# L_end:\n\t"
15530 } else {
15531 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15532 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15533 }
15534 %}
15535 ins_encode %{
15536 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15537 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15538 %}
15539 ins_pipe(pipe_slow);
15540 %}
15541
15542 // Small constant length ClearArray for AVX512 targets.
15543 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15544 %{
15545 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15546 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15547 match(Set dummy (ClearArray (Binary cnt base) val));
15548 ins_cost(100);
15549 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15550 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15551 ins_encode %{
15552 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15553 %}
15554 ins_pipe(pipe_slow);
15555 %}
15556
15557 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15558 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15559 %{
15560 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15561 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15562 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15563
15564 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15565 ins_encode %{
15566 __ string_compare($str1$$Register, $str2$$Register,
15567 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15568 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15569 %}
15570 ins_pipe( pipe_slow );
15571 %}
15572
15573 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15574 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15575 %{
15576 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15577 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15578 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15579
15580 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15581 ins_encode %{
15582 __ string_compare($str1$$Register, $str2$$Register,
15583 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15584 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15585 %}
15586 ins_pipe( pipe_slow );
15587 %}
15588
15589 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15590 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15591 %{
15592 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15593 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15594 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15595
15596 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15597 ins_encode %{
15598 __ string_compare($str1$$Register, $str2$$Register,
15599 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15600 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15601 %}
15602 ins_pipe( pipe_slow );
15603 %}
15604
15605 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15606 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15607 %{
15608 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15609 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15610 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15611
15612 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15613 ins_encode %{
15614 __ string_compare($str1$$Register, $str2$$Register,
15615 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15616 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15617 %}
15618 ins_pipe( pipe_slow );
15619 %}
15620
15621 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15622 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15623 %{
15624 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15625 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15626 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15627
15628 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15629 ins_encode %{
15630 __ string_compare($str1$$Register, $str2$$Register,
15631 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15632 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15633 %}
15634 ins_pipe( pipe_slow );
15635 %}
15636
15637 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15638 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15639 %{
15640 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15641 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15642 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15643
15644 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15645 ins_encode %{
15646 __ string_compare($str1$$Register, $str2$$Register,
15647 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15648 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15649 %}
15650 ins_pipe( pipe_slow );
15651 %}
15652
15653 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15654 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15655 %{
15656 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15657 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15658 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15659
15660 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15661 ins_encode %{
15662 __ string_compare($str2$$Register, $str1$$Register,
15663 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15664 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15665 %}
15666 ins_pipe( pipe_slow );
15667 %}
15668
15669 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15670 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15671 %{
15672 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15673 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15674 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15675
15676 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15677 ins_encode %{
15678 __ string_compare($str2$$Register, $str1$$Register,
15679 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15680 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15681 %}
15682 ins_pipe( pipe_slow );
15683 %}
15684
15685 // fast search of substring with known size.
15686 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15687 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15688 %{
15689 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15690 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15691 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15692
15693 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15694 ins_encode %{
15695 int icnt2 = (int)$int_cnt2$$constant;
15696 if (icnt2 >= 16) {
15697 // IndexOf for constant substrings with size >= 16 elements
15698 // which don't need to be loaded through stack.
15699 __ string_indexofC8($str1$$Register, $str2$$Register,
15700 $cnt1$$Register, $cnt2$$Register,
15701 icnt2, $result$$Register,
15702 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15703 } else {
15704 // Small strings are loaded through stack if they cross page boundary.
15705 __ string_indexof($str1$$Register, $str2$$Register,
15706 $cnt1$$Register, $cnt2$$Register,
15707 icnt2, $result$$Register,
15708 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15709 }
15710 %}
15711 ins_pipe( pipe_slow );
15712 %}
15713
15714 // fast search of substring with known size.
15715 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15716 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15717 %{
15718 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15719 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15720 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15721
15722 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15723 ins_encode %{
15724 int icnt2 = (int)$int_cnt2$$constant;
15725 if (icnt2 >= 8) {
15726 // IndexOf for constant substrings with size >= 8 elements
15727 // which don't need to be loaded through stack.
15728 __ string_indexofC8($str1$$Register, $str2$$Register,
15729 $cnt1$$Register, $cnt2$$Register,
15730 icnt2, $result$$Register,
15731 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15732 } else {
15733 // Small strings are loaded through stack if they cross page boundary.
15734 __ string_indexof($str1$$Register, $str2$$Register,
15735 $cnt1$$Register, $cnt2$$Register,
15736 icnt2, $result$$Register,
15737 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15738 }
15739 %}
15740 ins_pipe( pipe_slow );
15741 %}
15742
15743 // fast search of substring with known size.
15744 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15745 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15746 %{
15747 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15748 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15749 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15750
15751 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15752 ins_encode %{
15753 int icnt2 = (int)$int_cnt2$$constant;
15754 if (icnt2 >= 8) {
15755 // IndexOf for constant substrings with size >= 8 elements
15756 // which don't need to be loaded through stack.
15757 __ string_indexofC8($str1$$Register, $str2$$Register,
15758 $cnt1$$Register, $cnt2$$Register,
15759 icnt2, $result$$Register,
15760 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15761 } else {
15762 // Small strings are loaded through stack if they cross page boundary.
15763 __ string_indexof($str1$$Register, $str2$$Register,
15764 $cnt1$$Register, $cnt2$$Register,
15765 icnt2, $result$$Register,
15766 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15767 }
15768 %}
15769 ins_pipe( pipe_slow );
15770 %}
15771
15772 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15773 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15774 %{
15775 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15776 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15777 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15778
15779 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15780 ins_encode %{
15781 __ string_indexof($str1$$Register, $str2$$Register,
15782 $cnt1$$Register, $cnt2$$Register,
15783 (-1), $result$$Register,
15784 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15785 %}
15786 ins_pipe( pipe_slow );
15787 %}
15788
15789 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15790 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15791 %{
15792 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15793 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15794 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15795
15796 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15797 ins_encode %{
15798 __ string_indexof($str1$$Register, $str2$$Register,
15799 $cnt1$$Register, $cnt2$$Register,
15800 (-1), $result$$Register,
15801 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15802 %}
15803 ins_pipe( pipe_slow );
15804 %}
15805
15806 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15807 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15808 %{
15809 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15810 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15811 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15812
15813 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15814 ins_encode %{
15815 __ string_indexof($str1$$Register, $str2$$Register,
15816 $cnt1$$Register, $cnt2$$Register,
15817 (-1), $result$$Register,
15818 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15819 %}
15820 ins_pipe( pipe_slow );
15821 %}
15822
15823 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15824 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15825 %{
15826 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15827 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15828 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15829 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15830 ins_encode %{
15831 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15832 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15833 %}
15834 ins_pipe( pipe_slow );
15835 %}
15836
15837 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15838 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15839 %{
15840 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15841 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15842 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15843 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15844 ins_encode %{
15845 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15846 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15847 %}
15848 ins_pipe( pipe_slow );
15849 %}
15850
15851 // fast string equals
15852 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15853 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15854 %{
15855 predicate(!VM_Version::supports_avx512vlbw());
15856 match(Set result (StrEquals (Binary str1 str2) cnt));
15857 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15858
15859 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15860 ins_encode %{
15861 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15862 $cnt$$Register, $result$$Register, $tmp3$$Register,
15863 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15864 %}
15865 ins_pipe( pipe_slow );
15866 %}
15867
15868 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15869 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15870 %{
15871 predicate(VM_Version::supports_avx512vlbw());
15872 match(Set result (StrEquals (Binary str1 str2) cnt));
15873 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15874
15875 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15876 ins_encode %{
15877 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15878 $cnt$$Register, $result$$Register, $tmp3$$Register,
15879 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15880 %}
15881 ins_pipe( pipe_slow );
15882 %}
15883
15884 // fast array equals
15885 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15886 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15887 %{
15888 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15889 match(Set result (AryEq ary1 ary2));
15890 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15891
15892 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15893 ins_encode %{
15894 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15895 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15896 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15897 %}
15898 ins_pipe( pipe_slow );
15899 %}
15900
15901 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15902 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15903 %{
15904 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15905 match(Set result (AryEq ary1 ary2));
15906 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15907
15908 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15909 ins_encode %{
15910 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15911 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15912 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15913 %}
15914 ins_pipe( pipe_slow );
15915 %}
15916
15917 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15918 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15919 %{
15920 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15921 match(Set result (AryEq ary1 ary2));
15922 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15923
15924 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15925 ins_encode %{
15926 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15927 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15928 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15929 %}
15930 ins_pipe( pipe_slow );
15931 %}
15932
15933 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15934 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15935 %{
15936 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15937 match(Set result (AryEq ary1 ary2));
15938 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15939
15940 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15941 ins_encode %{
15942 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15943 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15944 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15945 %}
15946 ins_pipe( pipe_slow );
15947 %}
15948
15949 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15950 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15951 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15952 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15953 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15954 %{
15955 predicate(UseAVX >= 2);
15956 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15957 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15958 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15959 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15960 USE basic_type, KILL cr);
15961
15962 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15963 ins_encode %{
15964 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15965 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15966 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15967 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15968 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15969 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15970 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15971 %}
15972 ins_pipe( pipe_slow );
15973 %}
15974
15975 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15976 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15977 %{
15978 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15979 match(Set result (CountPositives ary1 len));
15980 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15981
15982 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15983 ins_encode %{
15984 __ count_positives($ary1$$Register, $len$$Register,
15985 $result$$Register, $tmp3$$Register,
15986 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15987 %}
15988 ins_pipe( pipe_slow );
15989 %}
15990
15991 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15992 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15993 %{
15994 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15995 match(Set result (CountPositives ary1 len));
15996 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15997
15998 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15999 ins_encode %{
16000 __ count_positives($ary1$$Register, $len$$Register,
16001 $result$$Register, $tmp3$$Register,
16002 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16003 %}
16004 ins_pipe( pipe_slow );
16005 %}
16006
16007 // fast char[] to byte[] compression
16008 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16009 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16010 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16011 match(Set result (StrCompressedCopy src (Binary dst len)));
16012 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16013 USE_KILL len, KILL tmp5, KILL cr);
16014
16015 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16016 ins_encode %{
16017 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16018 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16019 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16020 knoreg, knoreg);
16021 %}
16022 ins_pipe( pipe_slow );
16023 %}
16024
16025 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16026 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16027 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16028 match(Set result (StrCompressedCopy src (Binary dst len)));
16029 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16030 USE_KILL len, KILL tmp5, KILL cr);
16031
16032 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16033 ins_encode %{
16034 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16035 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16036 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16037 $ktmp1$$KRegister, $ktmp2$$KRegister);
16038 %}
16039 ins_pipe( pipe_slow );
16040 %}
16041 // fast byte[] to char[] inflation
16042 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16043 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16044 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16045 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16046 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16047
16048 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16049 ins_encode %{
16050 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16051 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16052 %}
16053 ins_pipe( pipe_slow );
16054 %}
16055
16056 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16057 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16058 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16059 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16060 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16061
16062 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16063 ins_encode %{
16064 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16065 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16066 %}
16067 ins_pipe( pipe_slow );
16068 %}
16069
16070 // encode char[] to byte[] in ISO_8859_1
16071 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16072 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16073 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16074 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16075 match(Set result (EncodeISOArray src (Binary dst len)));
16076 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16077
16078 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16079 ins_encode %{
16080 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16081 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16082 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16083 %}
16084 ins_pipe( pipe_slow );
16085 %}
16086
16087 // encode char[] to byte[] in ASCII
16088 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16089 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16090 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16091 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16092 match(Set result (EncodeISOArray src (Binary dst len)));
16093 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16094
16095 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16096 ins_encode %{
16097 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16098 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16099 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16100 %}
16101 ins_pipe( pipe_slow );
16102 %}
16103
16104 //----------Overflow Math Instructions-----------------------------------------
16105
16106 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16107 %{
16108 match(Set cr (OverflowAddI op1 op2));
16109 effect(DEF cr, USE_KILL op1, USE op2);
16110
16111 format %{ "addl $op1, $op2\t# overflow check int" %}
16112
16113 ins_encode %{
16114 __ addl($op1$$Register, $op2$$Register);
16115 %}
16116 ins_pipe(ialu_reg_reg);
16117 %}
16118
16119 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16120 %{
16121 match(Set cr (OverflowAddI op1 op2));
16122 effect(DEF cr, USE_KILL op1, USE op2);
16123
16124 format %{ "addl $op1, $op2\t# overflow check int" %}
16125
16126 ins_encode %{
16127 __ addl($op1$$Register, $op2$$constant);
16128 %}
16129 ins_pipe(ialu_reg_reg);
16130 %}
16131
16132 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16133 %{
16134 match(Set cr (OverflowAddL op1 op2));
16135 effect(DEF cr, USE_KILL op1, USE op2);
16136
16137 format %{ "addq $op1, $op2\t# overflow check long" %}
16138 ins_encode %{
16139 __ addq($op1$$Register, $op2$$Register);
16140 %}
16141 ins_pipe(ialu_reg_reg);
16142 %}
16143
16144 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16145 %{
16146 match(Set cr (OverflowAddL op1 op2));
16147 effect(DEF cr, USE_KILL op1, USE op2);
16148
16149 format %{ "addq $op1, $op2\t# overflow check long" %}
16150 ins_encode %{
16151 __ addq($op1$$Register, $op2$$constant);
16152 %}
16153 ins_pipe(ialu_reg_reg);
16154 %}
16155
16156 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16157 %{
16158 match(Set cr (OverflowSubI op1 op2));
16159
16160 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16161 ins_encode %{
16162 __ cmpl($op1$$Register, $op2$$Register);
16163 %}
16164 ins_pipe(ialu_reg_reg);
16165 %}
16166
16167 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16168 %{
16169 match(Set cr (OverflowSubI op1 op2));
16170
16171 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16172 ins_encode %{
16173 __ cmpl($op1$$Register, $op2$$constant);
16174 %}
16175 ins_pipe(ialu_reg_reg);
16176 %}
16177
16178 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16179 %{
16180 match(Set cr (OverflowSubL op1 op2));
16181
16182 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16183 ins_encode %{
16184 __ cmpq($op1$$Register, $op2$$Register);
16185 %}
16186 ins_pipe(ialu_reg_reg);
16187 %}
16188
16189 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16190 %{
16191 match(Set cr (OverflowSubL op1 op2));
16192
16193 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16194 ins_encode %{
16195 __ cmpq($op1$$Register, $op2$$constant);
16196 %}
16197 ins_pipe(ialu_reg_reg);
16198 %}
16199
16200 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16201 %{
16202 match(Set cr (OverflowSubI zero op2));
16203 effect(DEF cr, USE_KILL op2);
16204
16205 format %{ "negl $op2\t# overflow check int" %}
16206 ins_encode %{
16207 __ negl($op2$$Register);
16208 %}
16209 ins_pipe(ialu_reg_reg);
16210 %}
16211
16212 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16213 %{
16214 match(Set cr (OverflowSubL zero op2));
16215 effect(DEF cr, USE_KILL op2);
16216
16217 format %{ "negq $op2\t# overflow check long" %}
16218 ins_encode %{
16219 __ negq($op2$$Register);
16220 %}
16221 ins_pipe(ialu_reg_reg);
16222 %}
16223
16224 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16225 %{
16226 match(Set cr (OverflowMulI op1 op2));
16227 effect(DEF cr, USE_KILL op1, USE op2);
16228
16229 format %{ "imull $op1, $op2\t# overflow check int" %}
16230 ins_encode %{
16231 __ imull($op1$$Register, $op2$$Register);
16232 %}
16233 ins_pipe(ialu_reg_reg_alu0);
16234 %}
16235
16236 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16237 %{
16238 match(Set cr (OverflowMulI op1 op2));
16239 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16240
16241 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16242 ins_encode %{
16243 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16244 %}
16245 ins_pipe(ialu_reg_reg_alu0);
16246 %}
16247
16248 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16249 %{
16250 match(Set cr (OverflowMulL op1 op2));
16251 effect(DEF cr, USE_KILL op1, USE op2);
16252
16253 format %{ "imulq $op1, $op2\t# overflow check long" %}
16254 ins_encode %{
16255 __ imulq($op1$$Register, $op2$$Register);
16256 %}
16257 ins_pipe(ialu_reg_reg_alu0);
16258 %}
16259
16260 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16261 %{
16262 match(Set cr (OverflowMulL op1 op2));
16263 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16264
16265 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16266 ins_encode %{
16267 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16268 %}
16269 ins_pipe(ialu_reg_reg_alu0);
16270 %}
16271
16272
16273 //----------Control Flow Instructions------------------------------------------
16274 // Signed compare Instructions
16275
16276 // XXX more variants!!
16277 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16278 %{
16279 match(Set cr (CmpI op1 op2));
16280 effect(DEF cr, USE op1, USE op2);
16281
16282 format %{ "cmpl $op1, $op2" %}
16283 ins_encode %{
16284 __ cmpl($op1$$Register, $op2$$Register);
16285 %}
16286 ins_pipe(ialu_cr_reg_reg);
16287 %}
16288
16289 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16290 %{
16291 match(Set cr (CmpI op1 op2));
16292
16293 format %{ "cmpl $op1, $op2" %}
16294 ins_encode %{
16295 __ cmpl($op1$$Register, $op2$$constant);
16296 %}
16297 ins_pipe(ialu_cr_reg_imm);
16298 %}
16299
16300 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16301 %{
16302 match(Set cr (CmpI op1 (LoadI op2)));
16303
16304 ins_cost(500); // XXX
16305 format %{ "cmpl $op1, $op2" %}
16306 ins_encode %{
16307 __ cmpl($op1$$Register, $op2$$Address);
16308 %}
16309 ins_pipe(ialu_cr_reg_mem);
16310 %}
16311
16312 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16313 %{
16314 match(Set cr (CmpI src zero));
16315
16316 format %{ "testl $src, $src" %}
16317 ins_encode %{
16318 __ testl($src$$Register, $src$$Register);
16319 %}
16320 ins_pipe(ialu_cr_reg_imm);
16321 %}
16322
16323 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16324 %{
16325 match(Set cr (CmpI (AndI src con) zero));
16326
16327 format %{ "testl $src, $con" %}
16328 ins_encode %{
16329 __ testl($src$$Register, $con$$constant);
16330 %}
16331 ins_pipe(ialu_cr_reg_imm);
16332 %}
16333
16334 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16335 %{
16336 match(Set cr (CmpI (AndI src1 src2) zero));
16337
16338 format %{ "testl $src1, $src2" %}
16339 ins_encode %{
16340 __ testl($src1$$Register, $src2$$Register);
16341 %}
16342 ins_pipe(ialu_cr_reg_imm);
16343 %}
16344
16345 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16346 %{
16347 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16348
16349 format %{ "testl $src, $mem" %}
16350 ins_encode %{
16351 __ testl($src$$Register, $mem$$Address);
16352 %}
16353 ins_pipe(ialu_cr_reg_mem);
16354 %}
16355
16356 // Unsigned compare Instructions; really, same as signed except they
16357 // produce an rFlagsRegU instead of rFlagsReg.
16358 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16359 %{
16360 match(Set cr (CmpU op1 op2));
16361
16362 format %{ "cmpl $op1, $op2\t# unsigned" %}
16363 ins_encode %{
16364 __ cmpl($op1$$Register, $op2$$Register);
16365 %}
16366 ins_pipe(ialu_cr_reg_reg);
16367 %}
16368
16369 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16370 %{
16371 match(Set cr (CmpU op1 op2));
16372
16373 format %{ "cmpl $op1, $op2\t# unsigned" %}
16374 ins_encode %{
16375 __ cmpl($op1$$Register, $op2$$constant);
16376 %}
16377 ins_pipe(ialu_cr_reg_imm);
16378 %}
16379
16380 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16381 %{
16382 match(Set cr (CmpU op1 (LoadI op2)));
16383
16384 ins_cost(500); // XXX
16385 format %{ "cmpl $op1, $op2\t# unsigned" %}
16386 ins_encode %{
16387 __ cmpl($op1$$Register, $op2$$Address);
16388 %}
16389 ins_pipe(ialu_cr_reg_mem);
16390 %}
16391
16392 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16393 %{
16394 match(Set cr (CmpU src zero));
16395
16396 format %{ "testl $src, $src\t# unsigned" %}
16397 ins_encode %{
16398 __ testl($src$$Register, $src$$Register);
16399 %}
16400 ins_pipe(ialu_cr_reg_imm);
16401 %}
16402
16403 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16404 %{
16405 match(Set cr (CmpP op1 op2));
16406
16407 format %{ "cmpq $op1, $op2\t# ptr" %}
16408 ins_encode %{
16409 __ cmpq($op1$$Register, $op2$$Register);
16410 %}
16411 ins_pipe(ialu_cr_reg_reg);
16412 %}
16413
16414 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16415 %{
16416 match(Set cr (CmpP op1 (LoadP op2)));
16417 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16418
16419 ins_cost(500); // XXX
16420 format %{ "cmpq $op1, $op2\t# ptr" %}
16421 ins_encode %{
16422 __ cmpq($op1$$Register, $op2$$Address);
16423 %}
16424 ins_pipe(ialu_cr_reg_mem);
16425 %}
16426
16427 // XXX this is generalized by compP_rReg_mem???
16428 // Compare raw pointer (used in out-of-heap check).
16429 // Only works because non-oop pointers must be raw pointers
16430 // and raw pointers have no anti-dependencies.
16431 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16432 %{
16433 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16434 n->in(2)->as_Load()->barrier_data() == 0);
16435 match(Set cr (CmpP op1 (LoadP op2)));
16436
16437 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16438 ins_encode %{
16439 __ cmpq($op1$$Register, $op2$$Address);
16440 %}
16441 ins_pipe(ialu_cr_reg_mem);
16442 %}
16443
16444 // This will generate a signed flags result. This should be OK since
16445 // any compare to a zero should be eq/neq.
16446 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16447 %{
16448 match(Set cr (CmpP src zero));
16449
16450 format %{ "testq $src, $src\t# ptr" %}
16451 ins_encode %{
16452 __ testq($src$$Register, $src$$Register);
16453 %}
16454 ins_pipe(ialu_cr_reg_imm);
16455 %}
16456
16457 // This will generate a signed flags result. This should be OK since
16458 // any compare to a zero should be eq/neq.
16459 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16460 %{
16461 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16462 n->in(1)->as_Load()->barrier_data() == 0);
16463 match(Set cr (CmpP (LoadP op) zero));
16464
16465 ins_cost(500); // XXX
16466 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16467 ins_encode %{
16468 __ testq($op$$Address, 0xFFFFFFFF);
16469 %}
16470 ins_pipe(ialu_cr_reg_imm);
16471 %}
16472
16473 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16474 %{
16475 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16476 n->in(1)->as_Load()->barrier_data() == 0);
16477 match(Set cr (CmpP (LoadP mem) zero));
16478
16479 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16480 ins_encode %{
16481 __ cmpq(r12, $mem$$Address);
16482 %}
16483 ins_pipe(ialu_cr_reg_mem);
16484 %}
16485
16486 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16487 %{
16488 match(Set cr (CmpN op1 op2));
16489
16490 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16491 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16492 ins_pipe(ialu_cr_reg_reg);
16493 %}
16494
16495 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16496 %{
16497 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16498 match(Set cr (CmpN src (LoadN mem)));
16499
16500 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16501 ins_encode %{
16502 __ cmpl($src$$Register, $mem$$Address);
16503 %}
16504 ins_pipe(ialu_cr_reg_mem);
16505 %}
16506
16507 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16508 match(Set cr (CmpN op1 op2));
16509
16510 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16511 ins_encode %{
16512 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16513 %}
16514 ins_pipe(ialu_cr_reg_imm);
16515 %}
16516
16517 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16518 %{
16519 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16520 match(Set cr (CmpN src (LoadN mem)));
16521
16522 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16523 ins_encode %{
16524 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16525 %}
16526 ins_pipe(ialu_cr_reg_mem);
16527 %}
16528
16529 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16530 match(Set cr (CmpN op1 op2));
16531
16532 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16533 ins_encode %{
16534 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16535 %}
16536 ins_pipe(ialu_cr_reg_imm);
16537 %}
16538
16539 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16540 %{
16541 predicate(!UseCompactObjectHeaders);
16542 match(Set cr (CmpN src (LoadNKlass mem)));
16543
16544 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16545 ins_encode %{
16546 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16547 %}
16548 ins_pipe(ialu_cr_reg_mem);
16549 %}
16550
16551 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16552 match(Set cr (CmpN src zero));
16553
16554 format %{ "testl $src, $src\t# compressed ptr" %}
16555 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16556 ins_pipe(ialu_cr_reg_imm);
16557 %}
16558
16559 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16560 %{
16561 predicate(CompressedOops::base() != nullptr &&
16562 n->in(1)->as_Load()->barrier_data() == 0);
16563 match(Set cr (CmpN (LoadN mem) zero));
16564
16565 ins_cost(500); // XXX
16566 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16567 ins_encode %{
16568 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16569 %}
16570 ins_pipe(ialu_cr_reg_mem);
16571 %}
16572
16573 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16574 %{
16575 predicate(CompressedOops::base() == nullptr &&
16576 n->in(1)->as_Load()->barrier_data() == 0);
16577 match(Set cr (CmpN (LoadN mem) zero));
16578
16579 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16580 ins_encode %{
16581 __ cmpl(r12, $mem$$Address);
16582 %}
16583 ins_pipe(ialu_cr_reg_mem);
16584 %}
16585
16586 // Yanked all unsigned pointer compare operations.
16587 // Pointer compares are done with CmpP which is already unsigned.
16588
16589 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16590 %{
16591 match(Set cr (CmpL op1 op2));
16592
16593 format %{ "cmpq $op1, $op2" %}
16594 ins_encode %{
16595 __ cmpq($op1$$Register, $op2$$Register);
16596 %}
16597 ins_pipe(ialu_cr_reg_reg);
16598 %}
16599
16600 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16601 %{
16602 match(Set cr (CmpL op1 op2));
16603
16604 format %{ "cmpq $op1, $op2" %}
16605 ins_encode %{
16606 __ cmpq($op1$$Register, $op2$$constant);
16607 %}
16608 ins_pipe(ialu_cr_reg_imm);
16609 %}
16610
16611 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16612 %{
16613 match(Set cr (CmpL op1 (LoadL op2)));
16614
16615 format %{ "cmpq $op1, $op2" %}
16616 ins_encode %{
16617 __ cmpq($op1$$Register, $op2$$Address);
16618 %}
16619 ins_pipe(ialu_cr_reg_mem);
16620 %}
16621
16622 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16623 %{
16624 match(Set cr (CmpL src zero));
16625
16626 format %{ "testq $src, $src" %}
16627 ins_encode %{
16628 __ testq($src$$Register, $src$$Register);
16629 %}
16630 ins_pipe(ialu_cr_reg_imm);
16631 %}
16632
16633 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16634 %{
16635 match(Set cr (CmpL (AndL src con) zero));
16636
16637 format %{ "testq $src, $con\t# long" %}
16638 ins_encode %{
16639 __ testq($src$$Register, $con$$constant);
16640 %}
16641 ins_pipe(ialu_cr_reg_imm);
16642 %}
16643
16644 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16645 %{
16646 match(Set cr (CmpL (AndL src1 src2) zero));
16647
16648 format %{ "testq $src1, $src2\t# long" %}
16649 ins_encode %{
16650 __ testq($src1$$Register, $src2$$Register);
16651 %}
16652 ins_pipe(ialu_cr_reg_imm);
16653 %}
16654
16655 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16656 %{
16657 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16658
16659 format %{ "testq $src, $mem" %}
16660 ins_encode %{
16661 __ testq($src$$Register, $mem$$Address);
16662 %}
16663 ins_pipe(ialu_cr_reg_mem);
16664 %}
16665
16666 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16667 %{
16668 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16669
16670 format %{ "testq $src, $mem" %}
16671 ins_encode %{
16672 __ testq($src$$Register, $mem$$Address);
16673 %}
16674 ins_pipe(ialu_cr_reg_mem);
16675 %}
16676
16677 // Manifest a CmpU result in an integer register. Very painful.
16678 // This is the test to avoid.
16679 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16680 %{
16681 match(Set dst (CmpU3 src1 src2));
16682 effect(KILL flags);
16683
16684 ins_cost(275); // XXX
16685 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16686 "movl $dst, -1\n\t"
16687 "jb,u done\n\t"
16688 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16689 "done:" %}
16690 ins_encode %{
16691 Label done;
16692 __ cmpl($src1$$Register, $src2$$Register);
16693 __ movl($dst$$Register, -1);
16694 __ jccb(Assembler::below, done);
16695 __ setcc(Assembler::notZero, $dst$$Register);
16696 __ bind(done);
16697 %}
16698 ins_pipe(pipe_slow);
16699 %}
16700
16701 // Manifest a CmpL result in an integer register. Very painful.
16702 // This is the test to avoid.
16703 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16704 %{
16705 match(Set dst (CmpL3 src1 src2));
16706 effect(KILL flags);
16707
16708 ins_cost(275); // XXX
16709 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16710 "movl $dst, -1\n\t"
16711 "jl,s done\n\t"
16712 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16713 "done:" %}
16714 ins_encode %{
16715 Label done;
16716 __ cmpq($src1$$Register, $src2$$Register);
16717 __ movl($dst$$Register, -1);
16718 __ jccb(Assembler::less, done);
16719 __ setcc(Assembler::notZero, $dst$$Register);
16720 __ bind(done);
16721 %}
16722 ins_pipe(pipe_slow);
16723 %}
16724
16725 // Manifest a CmpUL result in an integer register. Very painful.
16726 // This is the test to avoid.
16727 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16728 %{
16729 match(Set dst (CmpUL3 src1 src2));
16730 effect(KILL flags);
16731
16732 ins_cost(275); // XXX
16733 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16734 "movl $dst, -1\n\t"
16735 "jb,u done\n\t"
16736 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16737 "done:" %}
16738 ins_encode %{
16739 Label done;
16740 __ cmpq($src1$$Register, $src2$$Register);
16741 __ movl($dst$$Register, -1);
16742 __ jccb(Assembler::below, done);
16743 __ setcc(Assembler::notZero, $dst$$Register);
16744 __ bind(done);
16745 %}
16746 ins_pipe(pipe_slow);
16747 %}
16748
16749 // Unsigned long compare Instructions; really, same as signed long except they
16750 // produce an rFlagsRegU instead of rFlagsReg.
16751 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16752 %{
16753 match(Set cr (CmpUL op1 op2));
16754
16755 format %{ "cmpq $op1, $op2\t# unsigned" %}
16756 ins_encode %{
16757 __ cmpq($op1$$Register, $op2$$Register);
16758 %}
16759 ins_pipe(ialu_cr_reg_reg);
16760 %}
16761
16762 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16763 %{
16764 match(Set cr (CmpUL op1 op2));
16765
16766 format %{ "cmpq $op1, $op2\t# unsigned" %}
16767 ins_encode %{
16768 __ cmpq($op1$$Register, $op2$$constant);
16769 %}
16770 ins_pipe(ialu_cr_reg_imm);
16771 %}
16772
16773 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16774 %{
16775 match(Set cr (CmpUL op1 (LoadL op2)));
16776
16777 format %{ "cmpq $op1, $op2\t# unsigned" %}
16778 ins_encode %{
16779 __ cmpq($op1$$Register, $op2$$Address);
16780 %}
16781 ins_pipe(ialu_cr_reg_mem);
16782 %}
16783
16784 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16785 %{
16786 match(Set cr (CmpUL src zero));
16787
16788 format %{ "testq $src, $src\t# unsigned" %}
16789 ins_encode %{
16790 __ testq($src$$Register, $src$$Register);
16791 %}
16792 ins_pipe(ialu_cr_reg_imm);
16793 %}
16794
16795 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16796 %{
16797 match(Set cr (CmpI (LoadB mem) imm));
16798
16799 ins_cost(125);
16800 format %{ "cmpb $mem, $imm" %}
16801 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16802 ins_pipe(ialu_cr_reg_mem);
16803 %}
16804
16805 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16806 %{
16807 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16808
16809 ins_cost(125);
16810 format %{ "testb $mem, $imm\t# ubyte" %}
16811 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16812 ins_pipe(ialu_cr_reg_mem);
16813 %}
16814
16815 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16816 %{
16817 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16818
16819 ins_cost(125);
16820 format %{ "testb $mem, $imm\t# byte" %}
16821 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16822 ins_pipe(ialu_cr_reg_mem);
16823 %}
16824
16825 //----------Max and Min--------------------------------------------------------
16826 // Min Instructions
16827
16828 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16829 %{
16830 predicate(!UseAPX);
16831 effect(USE_DEF dst, USE src, USE cr);
16832
16833 format %{ "cmovlgt $dst, $src\t# min" %}
16834 ins_encode %{
16835 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16836 %}
16837 ins_pipe(pipe_cmov_reg);
16838 %}
16839
16840 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16841 %{
16842 predicate(UseAPX);
16843 effect(DEF dst, USE src1, USE src2, USE cr);
16844
16845 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16846 ins_encode %{
16847 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16848 %}
16849 ins_pipe(pipe_cmov_reg);
16850 %}
16851
16852 instruct minI_rReg(rRegI dst, rRegI src)
16853 %{
16854 predicate(!UseAPX);
16855 match(Set dst (MinI dst src));
16856
16857 ins_cost(200);
16858 expand %{
16859 rFlagsReg cr;
16860 compI_rReg(cr, dst, src);
16861 cmovI_reg_g(dst, src, cr);
16862 %}
16863 %}
16864
16865 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16866 %{
16867 predicate(UseAPX);
16868 match(Set dst (MinI src1 src2));
16869 effect(DEF dst, USE src1, USE src2);
16870
16871 ins_cost(200);
16872 expand %{
16873 rFlagsReg cr;
16874 compI_rReg(cr, src1, src2);
16875 cmovI_reg_g_ndd(dst, src1, src2, cr);
16876 %}
16877 %}
16878
16879 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16880 %{
16881 predicate(!UseAPX);
16882 effect(USE_DEF dst, USE src, USE cr);
16883
16884 format %{ "cmovllt $dst, $src\t# max" %}
16885 ins_encode %{
16886 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16887 %}
16888 ins_pipe(pipe_cmov_reg);
16889 %}
16890
16891 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16892 %{
16893 predicate(UseAPX);
16894 effect(DEF dst, USE src1, USE src2, USE cr);
16895
16896 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16897 ins_encode %{
16898 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16899 %}
16900 ins_pipe(pipe_cmov_reg);
16901 %}
16902
16903 instruct maxI_rReg(rRegI dst, rRegI src)
16904 %{
16905 predicate(!UseAPX);
16906 match(Set dst (MaxI dst src));
16907
16908 ins_cost(200);
16909 expand %{
16910 rFlagsReg cr;
16911 compI_rReg(cr, dst, src);
16912 cmovI_reg_l(dst, src, cr);
16913 %}
16914 %}
16915
16916 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16917 %{
16918 predicate(UseAPX);
16919 match(Set dst (MaxI src1 src2));
16920 effect(DEF dst, USE src1, USE src2);
16921
16922 ins_cost(200);
16923 expand %{
16924 rFlagsReg cr;
16925 compI_rReg(cr, src1, src2);
16926 cmovI_reg_l_ndd(dst, src1, src2, cr);
16927 %}
16928 %}
16929
16930 // ============================================================================
16931 // Branch Instructions
16932
16933 // Jump Direct - Label defines a relative address from JMP+1
16934 instruct jmpDir(label labl)
16935 %{
16936 match(Goto);
16937 effect(USE labl);
16938
16939 ins_cost(300);
16940 format %{ "jmp $labl" %}
16941 size(5);
16942 ins_encode %{
16943 Label* L = $labl$$label;
16944 __ jmp(*L, false); // Always long jump
16945 %}
16946 ins_pipe(pipe_jmp);
16947 %}
16948
16949 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16950 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16951 %{
16952 match(If cop cr);
16953 effect(USE labl);
16954
16955 ins_cost(300);
16956 format %{ "j$cop $labl" %}
16957 size(6);
16958 ins_encode %{
16959 Label* L = $labl$$label;
16960 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16961 %}
16962 ins_pipe(pipe_jcc);
16963 %}
16964
16965 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16966 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16967 %{
16968 match(CountedLoopEnd cop cr);
16969 effect(USE labl);
16970
16971 ins_cost(300);
16972 format %{ "j$cop $labl\t# loop end" %}
16973 size(6);
16974 ins_encode %{
16975 Label* L = $labl$$label;
16976 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16977 %}
16978 ins_pipe(pipe_jcc);
16979 %}
16980
16981 // Jump Direct Conditional - using unsigned comparison
16982 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16983 match(If cop cmp);
16984 effect(USE labl);
16985
16986 ins_cost(300);
16987 format %{ "j$cop,u $labl" %}
16988 size(6);
16989 ins_encode %{
16990 Label* L = $labl$$label;
16991 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16992 %}
16993 ins_pipe(pipe_jcc);
16994 %}
16995
16996 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16997 match(If cop cmp);
16998 effect(USE labl);
16999
17000 ins_cost(200);
17001 format %{ "j$cop,u $labl" %}
17002 size(6);
17003 ins_encode %{
17004 Label* L = $labl$$label;
17005 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17006 %}
17007 ins_pipe(pipe_jcc);
17008 %}
17009
17010 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17011 match(If cop cmp);
17012 effect(USE labl);
17013
17014 ins_cost(200);
17015 format %{ $$template
17016 if ($cop$$cmpcode == Assembler::notEqual) {
17017 $$emit$$"jp,u $labl\n\t"
17018 $$emit$$"j$cop,u $labl"
17019 } else {
17020 $$emit$$"jp,u done\n\t"
17021 $$emit$$"j$cop,u $labl\n\t"
17022 $$emit$$"done:"
17023 }
17024 %}
17025 ins_encode %{
17026 Label* l = $labl$$label;
17027 if ($cop$$cmpcode == Assembler::notEqual) {
17028 __ jcc(Assembler::parity, *l, false);
17029 __ jcc(Assembler::notEqual, *l, false);
17030 } else if ($cop$$cmpcode == Assembler::equal) {
17031 Label done;
17032 __ jccb(Assembler::parity, done);
17033 __ jcc(Assembler::equal, *l, false);
17034 __ bind(done);
17035 } else {
17036 ShouldNotReachHere();
17037 }
17038 %}
17039 ins_pipe(pipe_jcc);
17040 %}
17041
17042 // ============================================================================
17043 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17044 // superklass array for an instance of the superklass. Set a hidden
17045 // internal cache on a hit (cache is checked with exposed code in
17046 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17047 // encoding ALSO sets flags.
17048
17049 instruct partialSubtypeCheck(rdi_RegP result,
17050 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17051 rFlagsReg cr)
17052 %{
17053 match(Set result (PartialSubtypeCheck sub super));
17054 predicate(!UseSecondarySupersTable);
17055 effect(KILL rcx, KILL cr);
17056
17057 ins_cost(1100); // slightly larger than the next version
17058 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17059 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17060 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17061 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17062 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17063 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17064 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17065 "miss:\t" %}
17066
17067 ins_encode %{
17068 Label miss;
17069 // NB: Callers may assume that, when $result is a valid register,
17070 // check_klass_subtype_slow_path_linear sets it to a nonzero
17071 // value.
17072 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17073 $rcx$$Register, $result$$Register,
17074 nullptr, &miss,
17075 /*set_cond_codes:*/ true);
17076 __ xorptr($result$$Register, $result$$Register);
17077 __ bind(miss);
17078 %}
17079
17080 ins_pipe(pipe_slow);
17081 %}
17082
17083 // ============================================================================
17084 // Two versions of hashtable-based partialSubtypeCheck, both used when
17085 // we need to search for a super class in the secondary supers array.
17086 // The first is used when we don't know _a priori_ the class being
17087 // searched for. The second, far more common, is used when we do know:
17088 // this is used for instanceof, checkcast, and any case where C2 can
17089 // determine it by constant propagation.
17090
17091 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17092 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17093 rFlagsReg cr)
17094 %{
17095 match(Set result (PartialSubtypeCheck sub super));
17096 predicate(UseSecondarySupersTable);
17097 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17098
17099 ins_cost(1000);
17100 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17101
17102 ins_encode %{
17103 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17104 $temp3$$Register, $temp4$$Register, $result$$Register);
17105 %}
17106
17107 ins_pipe(pipe_slow);
17108 %}
17109
17110 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17111 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17112 rFlagsReg cr)
17113 %{
17114 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17115 predicate(UseSecondarySupersTable);
17116 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17117
17118 ins_cost(700); // smaller than the next version
17119 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17120
17121 ins_encode %{
17122 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17123 if (InlineSecondarySupersTest) {
17124 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17125 $temp3$$Register, $temp4$$Register, $result$$Register,
17126 super_klass_slot);
17127 } else {
17128 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17129 }
17130 %}
17131
17132 ins_pipe(pipe_slow);
17133 %}
17134
17135 // ============================================================================
17136 // Branch Instructions -- short offset versions
17137 //
17138 // These instructions are used to replace jumps of a long offset (the default
17139 // match) with jumps of a shorter offset. These instructions are all tagged
17140 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17141 // match rules in general matching. Instead, the ADLC generates a conversion
17142 // method in the MachNode which can be used to do in-place replacement of the
17143 // long variant with the shorter variant. The compiler will determine if a
17144 // branch can be taken by the is_short_branch_offset() predicate in the machine
17145 // specific code section of the file.
17146
17147 // Jump Direct - Label defines a relative address from JMP+1
17148 instruct jmpDir_short(label labl) %{
17149 match(Goto);
17150 effect(USE labl);
17151
17152 ins_cost(300);
17153 format %{ "jmp,s $labl" %}
17154 size(2);
17155 ins_encode %{
17156 Label* L = $labl$$label;
17157 __ jmpb(*L);
17158 %}
17159 ins_pipe(pipe_jmp);
17160 ins_short_branch(1);
17161 %}
17162
17163 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17164 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17165 match(If cop cr);
17166 effect(USE labl);
17167
17168 ins_cost(300);
17169 format %{ "j$cop,s $labl" %}
17170 size(2);
17171 ins_encode %{
17172 Label* L = $labl$$label;
17173 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17174 %}
17175 ins_pipe(pipe_jcc);
17176 ins_short_branch(1);
17177 %}
17178
17179 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17180 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17181 match(CountedLoopEnd cop cr);
17182 effect(USE labl);
17183
17184 ins_cost(300);
17185 format %{ "j$cop,s $labl\t# loop end" %}
17186 size(2);
17187 ins_encode %{
17188 Label* L = $labl$$label;
17189 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17190 %}
17191 ins_pipe(pipe_jcc);
17192 ins_short_branch(1);
17193 %}
17194
17195 // Jump Direct Conditional - using unsigned comparison
17196 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17197 match(If cop cmp);
17198 effect(USE labl);
17199
17200 ins_cost(300);
17201 format %{ "j$cop,us $labl" %}
17202 size(2);
17203 ins_encode %{
17204 Label* L = $labl$$label;
17205 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17206 %}
17207 ins_pipe(pipe_jcc);
17208 ins_short_branch(1);
17209 %}
17210
17211 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17212 match(If cop cmp);
17213 effect(USE labl);
17214
17215 ins_cost(300);
17216 format %{ "j$cop,us $labl" %}
17217 size(2);
17218 ins_encode %{
17219 Label* L = $labl$$label;
17220 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17221 %}
17222 ins_pipe(pipe_jcc);
17223 ins_short_branch(1);
17224 %}
17225
17226 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17227 match(If cop cmp);
17228 effect(USE labl);
17229
17230 ins_cost(300);
17231 format %{ $$template
17232 if ($cop$$cmpcode == Assembler::notEqual) {
17233 $$emit$$"jp,u,s $labl\n\t"
17234 $$emit$$"j$cop,u,s $labl"
17235 } else {
17236 $$emit$$"jp,u,s done\n\t"
17237 $$emit$$"j$cop,u,s $labl\n\t"
17238 $$emit$$"done:"
17239 }
17240 %}
17241 size(4);
17242 ins_encode %{
17243 Label* l = $labl$$label;
17244 if ($cop$$cmpcode == Assembler::notEqual) {
17245 __ jccb(Assembler::parity, *l);
17246 __ jccb(Assembler::notEqual, *l);
17247 } else if ($cop$$cmpcode == Assembler::equal) {
17248 Label done;
17249 __ jccb(Assembler::parity, done);
17250 __ jccb(Assembler::equal, *l);
17251 __ bind(done);
17252 } else {
17253 ShouldNotReachHere();
17254 }
17255 %}
17256 ins_pipe(pipe_jcc);
17257 ins_short_branch(1);
17258 %}
17259
17260 // ============================================================================
17261 // inlined locking and unlocking
17262
17263 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17264 match(Set cr (FastLock object box));
17265 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17266 ins_cost(300);
17267 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17268 ins_encode %{
17269 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17270 %}
17271 ins_pipe(pipe_slow);
17272 %}
17273
17274 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17275 match(Set cr (FastUnlock object rax_reg));
17276 effect(TEMP tmp, USE_KILL rax_reg);
17277 ins_cost(300);
17278 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17279 ins_encode %{
17280 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17281 %}
17282 ins_pipe(pipe_slow);
17283 %}
17284
17285
17286 // ============================================================================
17287 // Safepoint Instructions
17288 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17289 %{
17290 match(SafePoint poll);
17291 effect(KILL cr, USE poll);
17292
17293 format %{ "testl rax, [$poll]\t"
17294 "# Safepoint: poll for GC" %}
17295 ins_cost(125);
17296 ins_encode %{
17297 __ relocate(relocInfo::poll_type);
17298 address pre_pc = __ pc();
17299 __ testl(rax, Address($poll$$Register, 0));
17300 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17301 %}
17302 ins_pipe(ialu_reg_mem);
17303 %}
17304
17305 instruct mask_all_evexL(kReg dst, rRegL src) %{
17306 match(Set dst (MaskAll src));
17307 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17308 ins_encode %{
17309 int mask_len = Matcher::vector_length(this);
17310 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17311 %}
17312 ins_pipe( pipe_slow );
17313 %}
17314
17315 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17316 predicate(Matcher::vector_length(n) > 32);
17317 match(Set dst (MaskAll src));
17318 effect(TEMP tmp);
17319 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17320 ins_encode %{
17321 int mask_len = Matcher::vector_length(this);
17322 __ movslq($tmp$$Register, $src$$Register);
17323 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17324 %}
17325 ins_pipe( pipe_slow );
17326 %}
17327
17328 // ============================================================================
17329 // Procedure Call/Return Instructions
17330 // Call Java Static Instruction
17331 // Note: If this code changes, the corresponding ret_addr_offset() and
17332 // compute_padding() functions will have to be adjusted.
17333 instruct CallStaticJavaDirect(method meth) %{
17334 match(CallStaticJava);
17335 effect(USE meth);
17336
17337 ins_cost(300);
17338 format %{ "call,static " %}
17339 opcode(0xE8); /* E8 cd */
17340 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17341 ins_pipe(pipe_slow);
17342 ins_alignment(4);
17343 %}
17344
17345 // Call Java Dynamic Instruction
17346 // Note: If this code changes, the corresponding ret_addr_offset() and
17347 // compute_padding() functions will have to be adjusted.
17348 instruct CallDynamicJavaDirect(method meth)
17349 %{
17350 match(CallDynamicJava);
17351 effect(USE meth);
17352
17353 ins_cost(300);
17354 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17355 "call,dynamic " %}
17356 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17357 ins_pipe(pipe_slow);
17358 ins_alignment(4);
17359 %}
17360
17361 // Call Runtime Instruction
17362 instruct CallRuntimeDirect(method meth)
17363 %{
17364 match(CallRuntime);
17365 effect(USE meth);
17366
17367 ins_cost(300);
17368 format %{ "call,runtime " %}
17369 ins_encode(clear_avx, Java_To_Runtime(meth));
17370 ins_pipe(pipe_slow);
17371 %}
17372
17373 // Call runtime without safepoint
17374 instruct CallLeafDirect(method meth)
17375 %{
17376 match(CallLeaf);
17377 effect(USE meth);
17378
17379 ins_cost(300);
17380 format %{ "call_leaf,runtime " %}
17381 ins_encode(clear_avx, Java_To_Runtime(meth));
17382 ins_pipe(pipe_slow);
17383 %}
17384
17385 // Call runtime without safepoint and with vector arguments
17386 instruct CallLeafDirectVector(method meth)
17387 %{
17388 match(CallLeafVector);
17389 effect(USE meth);
17390
17391 ins_cost(300);
17392 format %{ "call_leaf,vector " %}
17393 ins_encode(Java_To_Runtime(meth));
17394 ins_pipe(pipe_slow);
17395 %}
17396
17397 // Call runtime without safepoint
17398 // entry point is null, target holds the address to call
17399 instruct CallLeafNoFPInDirect(rRegP target)
17400 %{
17401 predicate(n->as_Call()->entry_point() == nullptr);
17402 match(CallLeafNoFP target);
17403
17404 ins_cost(300);
17405 format %{ "call_leaf_nofp,runtime indirect " %}
17406 ins_encode %{
17407 __ call($target$$Register);
17408 %}
17409
17410 ins_pipe(pipe_slow);
17411 %}
17412
17413 // Call runtime without safepoint
17414 instruct CallLeafNoFPDirect(method meth)
17415 %{
17416 predicate(n->as_Call()->entry_point() != nullptr);
17417 match(CallLeafNoFP);
17418 effect(USE meth);
17419
17420 ins_cost(300);
17421 format %{ "call_leaf_nofp,runtime " %}
17422 ins_encode(clear_avx, Java_To_Runtime(meth));
17423 ins_pipe(pipe_slow);
17424 %}
17425
17426 // Return Instruction
17427 // Remove the return address & jump to it.
17428 // Notice: We always emit a nop after a ret to make sure there is room
17429 // for safepoint patching
17430 instruct Ret()
17431 %{
17432 match(Return);
17433
17434 format %{ "ret" %}
17435 ins_encode %{
17436 __ ret(0);
17437 %}
17438 ins_pipe(pipe_jmp);
17439 %}
17440
17441 // Tail Call; Jump from runtime stub to Java code.
17442 // Also known as an 'interprocedural jump'.
17443 // Target of jump will eventually return to caller.
17444 // TailJump below removes the return address.
17445 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17446 // emitted just above the TailCall which has reset rbp to the caller state.
17447 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17448 %{
17449 match(TailCall jump_target method_ptr);
17450
17451 ins_cost(300);
17452 format %{ "jmp $jump_target\t# rbx holds method" %}
17453 ins_encode %{
17454 __ jmp($jump_target$$Register);
17455 %}
17456 ins_pipe(pipe_jmp);
17457 %}
17458
17459 // Tail Jump; remove the return address; jump to target.
17460 // TailCall above leaves the return address around.
17461 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17462 %{
17463 match(TailJump jump_target ex_oop);
17464
17465 ins_cost(300);
17466 format %{ "popq rdx\t# pop return address\n\t"
17467 "jmp $jump_target" %}
17468 ins_encode %{
17469 __ popq(as_Register(RDX_enc));
17470 __ jmp($jump_target$$Register);
17471 %}
17472 ins_pipe(pipe_jmp);
17473 %}
17474
17475 // Forward exception.
17476 instruct ForwardExceptionjmp()
17477 %{
17478 match(ForwardException);
17479
17480 format %{ "jmp forward_exception_stub" %}
17481 ins_encode %{
17482 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17483 %}
17484 ins_pipe(pipe_jmp);
17485 %}
17486
17487 // Create exception oop: created by stack-crawling runtime code.
17488 // Created exception is now available to this handler, and is setup
17489 // just prior to jumping to this handler. No code emitted.
17490 instruct CreateException(rax_RegP ex_oop)
17491 %{
17492 match(Set ex_oop (CreateEx));
17493
17494 size(0);
17495 // use the following format syntax
17496 format %{ "# exception oop is in rax; no code emitted" %}
17497 ins_encode();
17498 ins_pipe(empty);
17499 %}
17500
17501 // Rethrow exception:
17502 // The exception oop will come in the first argument position.
17503 // Then JUMP (not call) to the rethrow stub code.
17504 instruct RethrowException()
17505 %{
17506 match(Rethrow);
17507
17508 // use the following format syntax
17509 format %{ "jmp rethrow_stub" %}
17510 ins_encode %{
17511 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17512 %}
17513 ins_pipe(pipe_jmp);
17514 %}
17515
17516 // ============================================================================
17517 // This name is KNOWN by the ADLC and cannot be changed.
17518 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17519 // for this guy.
17520 instruct tlsLoadP(r15_RegP dst) %{
17521 match(Set dst (ThreadLocal));
17522 effect(DEF dst);
17523
17524 size(0);
17525 format %{ "# TLS is in R15" %}
17526 ins_encode( /*empty encoding*/ );
17527 ins_pipe(ialu_reg_reg);
17528 %}
17529
17530 instruct addF_reg(regF dst, regF src) %{
17531 predicate(UseAVX == 0);
17532 match(Set dst (AddF dst src));
17533
17534 format %{ "addss $dst, $src" %}
17535 ins_cost(150);
17536 ins_encode %{
17537 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17538 %}
17539 ins_pipe(pipe_slow);
17540 %}
17541
17542 instruct addF_mem(regF dst, memory src) %{
17543 predicate(UseAVX == 0);
17544 match(Set dst (AddF dst (LoadF src)));
17545
17546 format %{ "addss $dst, $src" %}
17547 ins_cost(150);
17548 ins_encode %{
17549 __ addss($dst$$XMMRegister, $src$$Address);
17550 %}
17551 ins_pipe(pipe_slow);
17552 %}
17553
17554 instruct addF_imm(regF dst, immF con) %{
17555 predicate(UseAVX == 0);
17556 match(Set dst (AddF dst con));
17557 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17558 ins_cost(150);
17559 ins_encode %{
17560 __ addss($dst$$XMMRegister, $constantaddress($con));
17561 %}
17562 ins_pipe(pipe_slow);
17563 %}
17564
17565 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17566 predicate(UseAVX > 0);
17567 match(Set dst (AddF src1 src2));
17568
17569 format %{ "vaddss $dst, $src1, $src2" %}
17570 ins_cost(150);
17571 ins_encode %{
17572 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17573 %}
17574 ins_pipe(pipe_slow);
17575 %}
17576
17577 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17578 predicate(UseAVX > 0);
17579 match(Set dst (AddF src1 (LoadF src2)));
17580
17581 format %{ "vaddss $dst, $src1, $src2" %}
17582 ins_cost(150);
17583 ins_encode %{
17584 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17585 %}
17586 ins_pipe(pipe_slow);
17587 %}
17588
17589 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17590 predicate(UseAVX > 0);
17591 match(Set dst (AddF src con));
17592
17593 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17594 ins_cost(150);
17595 ins_encode %{
17596 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17597 %}
17598 ins_pipe(pipe_slow);
17599 %}
17600
17601 instruct addD_reg(regD dst, regD src) %{
17602 predicate(UseAVX == 0);
17603 match(Set dst (AddD dst src));
17604
17605 format %{ "addsd $dst, $src" %}
17606 ins_cost(150);
17607 ins_encode %{
17608 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17609 %}
17610 ins_pipe(pipe_slow);
17611 %}
17612
17613 instruct addD_mem(regD dst, memory src) %{
17614 predicate(UseAVX == 0);
17615 match(Set dst (AddD dst (LoadD src)));
17616
17617 format %{ "addsd $dst, $src" %}
17618 ins_cost(150);
17619 ins_encode %{
17620 __ addsd($dst$$XMMRegister, $src$$Address);
17621 %}
17622 ins_pipe(pipe_slow);
17623 %}
17624
17625 instruct addD_imm(regD dst, immD con) %{
17626 predicate(UseAVX == 0);
17627 match(Set dst (AddD dst con));
17628 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17629 ins_cost(150);
17630 ins_encode %{
17631 __ addsd($dst$$XMMRegister, $constantaddress($con));
17632 %}
17633 ins_pipe(pipe_slow);
17634 %}
17635
17636 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17637 predicate(UseAVX > 0);
17638 match(Set dst (AddD src1 src2));
17639
17640 format %{ "vaddsd $dst, $src1, $src2" %}
17641 ins_cost(150);
17642 ins_encode %{
17643 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17644 %}
17645 ins_pipe(pipe_slow);
17646 %}
17647
17648 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17649 predicate(UseAVX > 0);
17650 match(Set dst (AddD src1 (LoadD src2)));
17651
17652 format %{ "vaddsd $dst, $src1, $src2" %}
17653 ins_cost(150);
17654 ins_encode %{
17655 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17656 %}
17657 ins_pipe(pipe_slow);
17658 %}
17659
17660 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17661 predicate(UseAVX > 0);
17662 match(Set dst (AddD src con));
17663
17664 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17665 ins_cost(150);
17666 ins_encode %{
17667 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17668 %}
17669 ins_pipe(pipe_slow);
17670 %}
17671
17672 instruct subF_reg(regF dst, regF src) %{
17673 predicate(UseAVX == 0);
17674 match(Set dst (SubF dst src));
17675
17676 format %{ "subss $dst, $src" %}
17677 ins_cost(150);
17678 ins_encode %{
17679 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17680 %}
17681 ins_pipe(pipe_slow);
17682 %}
17683
17684 instruct subF_mem(regF dst, memory src) %{
17685 predicate(UseAVX == 0);
17686 match(Set dst (SubF dst (LoadF src)));
17687
17688 format %{ "subss $dst, $src" %}
17689 ins_cost(150);
17690 ins_encode %{
17691 __ subss($dst$$XMMRegister, $src$$Address);
17692 %}
17693 ins_pipe(pipe_slow);
17694 %}
17695
17696 instruct subF_imm(regF dst, immF con) %{
17697 predicate(UseAVX == 0);
17698 match(Set dst (SubF dst con));
17699 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17700 ins_cost(150);
17701 ins_encode %{
17702 __ subss($dst$$XMMRegister, $constantaddress($con));
17703 %}
17704 ins_pipe(pipe_slow);
17705 %}
17706
17707 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17708 predicate(UseAVX > 0);
17709 match(Set dst (SubF src1 src2));
17710
17711 format %{ "vsubss $dst, $src1, $src2" %}
17712 ins_cost(150);
17713 ins_encode %{
17714 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17715 %}
17716 ins_pipe(pipe_slow);
17717 %}
17718
17719 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17720 predicate(UseAVX > 0);
17721 match(Set dst (SubF src1 (LoadF src2)));
17722
17723 format %{ "vsubss $dst, $src1, $src2" %}
17724 ins_cost(150);
17725 ins_encode %{
17726 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17727 %}
17728 ins_pipe(pipe_slow);
17729 %}
17730
17731 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17732 predicate(UseAVX > 0);
17733 match(Set dst (SubF src con));
17734
17735 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17736 ins_cost(150);
17737 ins_encode %{
17738 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17739 %}
17740 ins_pipe(pipe_slow);
17741 %}
17742
17743 instruct subD_reg(regD dst, regD src) %{
17744 predicate(UseAVX == 0);
17745 match(Set dst (SubD dst src));
17746
17747 format %{ "subsd $dst, $src" %}
17748 ins_cost(150);
17749 ins_encode %{
17750 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17751 %}
17752 ins_pipe(pipe_slow);
17753 %}
17754
17755 instruct subD_mem(regD dst, memory src) %{
17756 predicate(UseAVX == 0);
17757 match(Set dst (SubD dst (LoadD src)));
17758
17759 format %{ "subsd $dst, $src" %}
17760 ins_cost(150);
17761 ins_encode %{
17762 __ subsd($dst$$XMMRegister, $src$$Address);
17763 %}
17764 ins_pipe(pipe_slow);
17765 %}
17766
17767 instruct subD_imm(regD dst, immD con) %{
17768 predicate(UseAVX == 0);
17769 match(Set dst (SubD dst con));
17770 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17771 ins_cost(150);
17772 ins_encode %{
17773 __ subsd($dst$$XMMRegister, $constantaddress($con));
17774 %}
17775 ins_pipe(pipe_slow);
17776 %}
17777
17778 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17779 predicate(UseAVX > 0);
17780 match(Set dst (SubD src1 src2));
17781
17782 format %{ "vsubsd $dst, $src1, $src2" %}
17783 ins_cost(150);
17784 ins_encode %{
17785 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17786 %}
17787 ins_pipe(pipe_slow);
17788 %}
17789
17790 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17791 predicate(UseAVX > 0);
17792 match(Set dst (SubD src1 (LoadD src2)));
17793
17794 format %{ "vsubsd $dst, $src1, $src2" %}
17795 ins_cost(150);
17796 ins_encode %{
17797 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17798 %}
17799 ins_pipe(pipe_slow);
17800 %}
17801
17802 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17803 predicate(UseAVX > 0);
17804 match(Set dst (SubD src con));
17805
17806 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17807 ins_cost(150);
17808 ins_encode %{
17809 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17810 %}
17811 ins_pipe(pipe_slow);
17812 %}
17813
17814 instruct mulF_reg(regF dst, regF src) %{
17815 predicate(UseAVX == 0);
17816 match(Set dst (MulF dst src));
17817
17818 format %{ "mulss $dst, $src" %}
17819 ins_cost(150);
17820 ins_encode %{
17821 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17822 %}
17823 ins_pipe(pipe_slow);
17824 %}
17825
17826 instruct mulF_mem(regF dst, memory src) %{
17827 predicate(UseAVX == 0);
17828 match(Set dst (MulF dst (LoadF src)));
17829
17830 format %{ "mulss $dst, $src" %}
17831 ins_cost(150);
17832 ins_encode %{
17833 __ mulss($dst$$XMMRegister, $src$$Address);
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct mulF_imm(regF dst, immF con) %{
17839 predicate(UseAVX == 0);
17840 match(Set dst (MulF dst con));
17841 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17842 ins_cost(150);
17843 ins_encode %{
17844 __ mulss($dst$$XMMRegister, $constantaddress($con));
17845 %}
17846 ins_pipe(pipe_slow);
17847 %}
17848
17849 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17850 predicate(UseAVX > 0);
17851 match(Set dst (MulF src1 src2));
17852
17853 format %{ "vmulss $dst, $src1, $src2" %}
17854 ins_cost(150);
17855 ins_encode %{
17856 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17857 %}
17858 ins_pipe(pipe_slow);
17859 %}
17860
17861 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17862 predicate(UseAVX > 0);
17863 match(Set dst (MulF src1 (LoadF src2)));
17864
17865 format %{ "vmulss $dst, $src1, $src2" %}
17866 ins_cost(150);
17867 ins_encode %{
17868 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17869 %}
17870 ins_pipe(pipe_slow);
17871 %}
17872
17873 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17874 predicate(UseAVX > 0);
17875 match(Set dst (MulF src con));
17876
17877 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17878 ins_cost(150);
17879 ins_encode %{
17880 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17881 %}
17882 ins_pipe(pipe_slow);
17883 %}
17884
17885 instruct mulD_reg(regD dst, regD src) %{
17886 predicate(UseAVX == 0);
17887 match(Set dst (MulD dst src));
17888
17889 format %{ "mulsd $dst, $src" %}
17890 ins_cost(150);
17891 ins_encode %{
17892 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17893 %}
17894 ins_pipe(pipe_slow);
17895 %}
17896
17897 instruct mulD_mem(regD dst, memory src) %{
17898 predicate(UseAVX == 0);
17899 match(Set dst (MulD dst (LoadD src)));
17900
17901 format %{ "mulsd $dst, $src" %}
17902 ins_cost(150);
17903 ins_encode %{
17904 __ mulsd($dst$$XMMRegister, $src$$Address);
17905 %}
17906 ins_pipe(pipe_slow);
17907 %}
17908
17909 instruct mulD_imm(regD dst, immD con) %{
17910 predicate(UseAVX == 0);
17911 match(Set dst (MulD dst con));
17912 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17913 ins_cost(150);
17914 ins_encode %{
17915 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17916 %}
17917 ins_pipe(pipe_slow);
17918 %}
17919
17920 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17921 predicate(UseAVX > 0);
17922 match(Set dst (MulD src1 src2));
17923
17924 format %{ "vmulsd $dst, $src1, $src2" %}
17925 ins_cost(150);
17926 ins_encode %{
17927 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17928 %}
17929 ins_pipe(pipe_slow);
17930 %}
17931
17932 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17933 predicate(UseAVX > 0);
17934 match(Set dst (MulD src1 (LoadD src2)));
17935
17936 format %{ "vmulsd $dst, $src1, $src2" %}
17937 ins_cost(150);
17938 ins_encode %{
17939 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17940 %}
17941 ins_pipe(pipe_slow);
17942 %}
17943
17944 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17945 predicate(UseAVX > 0);
17946 match(Set dst (MulD src con));
17947
17948 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17949 ins_cost(150);
17950 ins_encode %{
17951 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17952 %}
17953 ins_pipe(pipe_slow);
17954 %}
17955
17956 instruct divF_reg(regF dst, regF src) %{
17957 predicate(UseAVX == 0);
17958 match(Set dst (DivF dst src));
17959
17960 format %{ "divss $dst, $src" %}
17961 ins_cost(150);
17962 ins_encode %{
17963 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17964 %}
17965 ins_pipe(pipe_slow);
17966 %}
17967
17968 instruct divF_mem(regF dst, memory src) %{
17969 predicate(UseAVX == 0);
17970 match(Set dst (DivF dst (LoadF src)));
17971
17972 format %{ "divss $dst, $src" %}
17973 ins_cost(150);
17974 ins_encode %{
17975 __ divss($dst$$XMMRegister, $src$$Address);
17976 %}
17977 ins_pipe(pipe_slow);
17978 %}
17979
17980 instruct divF_imm(regF dst, immF con) %{
17981 predicate(UseAVX == 0);
17982 match(Set dst (DivF dst con));
17983 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17984 ins_cost(150);
17985 ins_encode %{
17986 __ divss($dst$$XMMRegister, $constantaddress($con));
17987 %}
17988 ins_pipe(pipe_slow);
17989 %}
17990
17991 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17992 predicate(UseAVX > 0);
17993 match(Set dst (DivF src1 src2));
17994
17995 format %{ "vdivss $dst, $src1, $src2" %}
17996 ins_cost(150);
17997 ins_encode %{
17998 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17999 %}
18000 ins_pipe(pipe_slow);
18001 %}
18002
18003 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18004 predicate(UseAVX > 0);
18005 match(Set dst (DivF src1 (LoadF src2)));
18006
18007 format %{ "vdivss $dst, $src1, $src2" %}
18008 ins_cost(150);
18009 ins_encode %{
18010 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18011 %}
18012 ins_pipe(pipe_slow);
18013 %}
18014
18015 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18016 predicate(UseAVX > 0);
18017 match(Set dst (DivF src con));
18018
18019 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18020 ins_cost(150);
18021 ins_encode %{
18022 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18023 %}
18024 ins_pipe(pipe_slow);
18025 %}
18026
18027 instruct divD_reg(regD dst, regD src) %{
18028 predicate(UseAVX == 0);
18029 match(Set dst (DivD dst src));
18030
18031 format %{ "divsd $dst, $src" %}
18032 ins_cost(150);
18033 ins_encode %{
18034 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18035 %}
18036 ins_pipe(pipe_slow);
18037 %}
18038
18039 instruct divD_mem(regD dst, memory src) %{
18040 predicate(UseAVX == 0);
18041 match(Set dst (DivD dst (LoadD src)));
18042
18043 format %{ "divsd $dst, $src" %}
18044 ins_cost(150);
18045 ins_encode %{
18046 __ divsd($dst$$XMMRegister, $src$$Address);
18047 %}
18048 ins_pipe(pipe_slow);
18049 %}
18050
18051 instruct divD_imm(regD dst, immD con) %{
18052 predicate(UseAVX == 0);
18053 match(Set dst (DivD dst con));
18054 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18055 ins_cost(150);
18056 ins_encode %{
18057 __ divsd($dst$$XMMRegister, $constantaddress($con));
18058 %}
18059 ins_pipe(pipe_slow);
18060 %}
18061
18062 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18063 predicate(UseAVX > 0);
18064 match(Set dst (DivD src1 src2));
18065
18066 format %{ "vdivsd $dst, $src1, $src2" %}
18067 ins_cost(150);
18068 ins_encode %{
18069 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18070 %}
18071 ins_pipe(pipe_slow);
18072 %}
18073
18074 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18075 predicate(UseAVX > 0);
18076 match(Set dst (DivD src1 (LoadD src2)));
18077
18078 format %{ "vdivsd $dst, $src1, $src2" %}
18079 ins_cost(150);
18080 ins_encode %{
18081 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18082 %}
18083 ins_pipe(pipe_slow);
18084 %}
18085
18086 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18087 predicate(UseAVX > 0);
18088 match(Set dst (DivD src con));
18089
18090 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18091 ins_cost(150);
18092 ins_encode %{
18093 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18094 %}
18095 ins_pipe(pipe_slow);
18096 %}
18097
18098 instruct absF_reg(regF dst) %{
18099 predicate(UseAVX == 0);
18100 match(Set dst (AbsF dst));
18101 ins_cost(150);
18102 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18103 ins_encode %{
18104 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18105 %}
18106 ins_pipe(pipe_slow);
18107 %}
18108
18109 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18110 predicate(UseAVX > 0);
18111 match(Set dst (AbsF src));
18112 ins_cost(150);
18113 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18114 ins_encode %{
18115 int vlen_enc = Assembler::AVX_128bit;
18116 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18117 ExternalAddress(float_signmask()), vlen_enc);
18118 %}
18119 ins_pipe(pipe_slow);
18120 %}
18121
18122 instruct absD_reg(regD dst) %{
18123 predicate(UseAVX == 0);
18124 match(Set dst (AbsD dst));
18125 ins_cost(150);
18126 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18127 "# abs double by sign masking" %}
18128 ins_encode %{
18129 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18130 %}
18131 ins_pipe(pipe_slow);
18132 %}
18133
18134 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18135 predicate(UseAVX > 0);
18136 match(Set dst (AbsD src));
18137 ins_cost(150);
18138 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18139 "# abs double by sign masking" %}
18140 ins_encode %{
18141 int vlen_enc = Assembler::AVX_128bit;
18142 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18143 ExternalAddress(double_signmask()), vlen_enc);
18144 %}
18145 ins_pipe(pipe_slow);
18146 %}
18147
18148 instruct negF_reg(regF dst) %{
18149 predicate(UseAVX == 0);
18150 match(Set dst (NegF dst));
18151 ins_cost(150);
18152 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18153 ins_encode %{
18154 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18155 %}
18156 ins_pipe(pipe_slow);
18157 %}
18158
18159 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18160 predicate(UseAVX > 0);
18161 match(Set dst (NegF src));
18162 ins_cost(150);
18163 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18164 ins_encode %{
18165 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18166 ExternalAddress(float_signflip()));
18167 %}
18168 ins_pipe(pipe_slow);
18169 %}
18170
18171 instruct negD_reg(regD dst) %{
18172 predicate(UseAVX == 0);
18173 match(Set dst (NegD dst));
18174 ins_cost(150);
18175 format %{ "xorpd $dst, [0x8000000000000000]\t"
18176 "# neg double by sign flipping" %}
18177 ins_encode %{
18178 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18179 %}
18180 ins_pipe(pipe_slow);
18181 %}
18182
18183 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18184 predicate(UseAVX > 0);
18185 match(Set dst (NegD src));
18186 ins_cost(150);
18187 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18188 "# neg double by sign flipping" %}
18189 ins_encode %{
18190 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18191 ExternalAddress(double_signflip()));
18192 %}
18193 ins_pipe(pipe_slow);
18194 %}
18195
18196 // sqrtss instruction needs destination register to be pre initialized for best performance
18197 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18198 instruct sqrtF_reg(regF dst) %{
18199 match(Set dst (SqrtF dst));
18200 format %{ "sqrtss $dst, $dst" %}
18201 ins_encode %{
18202 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18203 %}
18204 ins_pipe(pipe_slow);
18205 %}
18206
18207 // sqrtsd instruction needs destination register to be pre initialized for best performance
18208 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18209 instruct sqrtD_reg(regD dst) %{
18210 match(Set dst (SqrtD dst));
18211 format %{ "sqrtsd $dst, $dst" %}
18212 ins_encode %{
18213 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18214 %}
18215 ins_pipe(pipe_slow);
18216 %}
18217
18218 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18219 effect(TEMP tmp);
18220 match(Set dst (ConvF2HF src));
18221 ins_cost(125);
18222 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18223 ins_encode %{
18224 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18225 %}
18226 ins_pipe( pipe_slow );
18227 %}
18228
18229 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18230 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18231 effect(TEMP ktmp, TEMP rtmp);
18232 match(Set mem (StoreC mem (ConvF2HF src)));
18233 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18234 ins_encode %{
18235 __ movl($rtmp$$Register, 0x1);
18236 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18237 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18238 %}
18239 ins_pipe( pipe_slow );
18240 %}
18241
18242 instruct vconvF2HF(vec dst, vec src) %{
18243 match(Set dst (VectorCastF2HF src));
18244 format %{ "vector_conv_F2HF $dst $src" %}
18245 ins_encode %{
18246 int vlen_enc = vector_length_encoding(this, $src);
18247 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18248 %}
18249 ins_pipe( pipe_slow );
18250 %}
18251
18252 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18253 predicate(n->as_StoreVector()->memory_size() >= 16);
18254 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18255 format %{ "vcvtps2ph $mem,$src" %}
18256 ins_encode %{
18257 int vlen_enc = vector_length_encoding(this, $src);
18258 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18259 %}
18260 ins_pipe( pipe_slow );
18261 %}
18262
18263 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18264 match(Set dst (ConvHF2F src));
18265 format %{ "vcvtph2ps $dst,$src" %}
18266 ins_encode %{
18267 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18268 %}
18269 ins_pipe( pipe_slow );
18270 %}
18271
18272 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18273 match(Set dst (VectorCastHF2F (LoadVector mem)));
18274 format %{ "vcvtph2ps $dst,$mem" %}
18275 ins_encode %{
18276 int vlen_enc = vector_length_encoding(this);
18277 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18278 %}
18279 ins_pipe( pipe_slow );
18280 %}
18281
18282 instruct vconvHF2F(vec dst, vec src) %{
18283 match(Set dst (VectorCastHF2F src));
18284 ins_cost(125);
18285 format %{ "vector_conv_HF2F $dst,$src" %}
18286 ins_encode %{
18287 int vlen_enc = vector_length_encoding(this);
18288 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18289 %}
18290 ins_pipe( pipe_slow );
18291 %}
18292
18293 // ---------------------------------------- VectorReinterpret ------------------------------------
18294 instruct reinterpret_mask(kReg dst) %{
18295 predicate(n->bottom_type()->isa_vectmask() &&
18296 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18297 match(Set dst (VectorReinterpret dst));
18298 ins_cost(125);
18299 format %{ "vector_reinterpret $dst\t!" %}
18300 ins_encode %{
18301 // empty
18302 %}
18303 ins_pipe( pipe_slow );
18304 %}
18305
18306 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18307 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18308 n->bottom_type()->isa_vectmask() &&
18309 n->in(1)->bottom_type()->isa_vectmask() &&
18310 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18311 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18312 match(Set dst (VectorReinterpret src));
18313 effect(TEMP xtmp);
18314 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18315 ins_encode %{
18316 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18317 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18318 assert(src_sz == dst_sz , "src and dst size mismatch");
18319 int vlen_enc = vector_length_encoding(src_sz);
18320 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18321 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18322 %}
18323 ins_pipe( pipe_slow );
18324 %}
18325
18326 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18327 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18328 n->bottom_type()->isa_vectmask() &&
18329 n->in(1)->bottom_type()->isa_vectmask() &&
18330 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18331 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18332 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18333 match(Set dst (VectorReinterpret src));
18334 effect(TEMP xtmp);
18335 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18336 ins_encode %{
18337 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18338 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18339 assert(src_sz == dst_sz , "src and dst size mismatch");
18340 int vlen_enc = vector_length_encoding(src_sz);
18341 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18342 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18343 %}
18344 ins_pipe( pipe_slow );
18345 %}
18346
18347 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18348 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18349 n->bottom_type()->isa_vectmask() &&
18350 n->in(1)->bottom_type()->isa_vectmask() &&
18351 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18352 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18353 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18354 match(Set dst (VectorReinterpret src));
18355 effect(TEMP xtmp);
18356 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18357 ins_encode %{
18358 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18359 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18360 assert(src_sz == dst_sz , "src and dst size mismatch");
18361 int vlen_enc = vector_length_encoding(src_sz);
18362 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18363 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18364 %}
18365 ins_pipe( pipe_slow );
18366 %}
18367
18368 instruct reinterpret(vec dst) %{
18369 predicate(!n->bottom_type()->isa_vectmask() &&
18370 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18371 match(Set dst (VectorReinterpret dst));
18372 ins_cost(125);
18373 format %{ "vector_reinterpret $dst\t!" %}
18374 ins_encode %{
18375 // empty
18376 %}
18377 ins_pipe( pipe_slow );
18378 %}
18379
18380 instruct reinterpret_expand(vec dst, vec src) %{
18381 predicate(UseAVX == 0 &&
18382 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18383 match(Set dst (VectorReinterpret src));
18384 ins_cost(125);
18385 effect(TEMP dst);
18386 format %{ "vector_reinterpret_expand $dst,$src" %}
18387 ins_encode %{
18388 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18389 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18390
18391 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18392 if (src_vlen_in_bytes == 4) {
18393 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18394 } else {
18395 assert(src_vlen_in_bytes == 8, "");
18396 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18397 }
18398 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18399 %}
18400 ins_pipe( pipe_slow );
18401 %}
18402
18403 instruct vreinterpret_expand4(legVec dst, vec src) %{
18404 predicate(UseAVX > 0 &&
18405 !n->bottom_type()->isa_vectmask() &&
18406 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18407 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18408 match(Set dst (VectorReinterpret src));
18409 ins_cost(125);
18410 format %{ "vector_reinterpret_expand $dst,$src" %}
18411 ins_encode %{
18412 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18413 %}
18414 ins_pipe( pipe_slow );
18415 %}
18416
18417
18418 instruct vreinterpret_expand(legVec dst, vec src) %{
18419 predicate(UseAVX > 0 &&
18420 !n->bottom_type()->isa_vectmask() &&
18421 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18422 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18423 match(Set dst (VectorReinterpret src));
18424 ins_cost(125);
18425 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18426 ins_encode %{
18427 switch (Matcher::vector_length_in_bytes(this, $src)) {
18428 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18429 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18430 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18431 default: ShouldNotReachHere();
18432 }
18433 %}
18434 ins_pipe( pipe_slow );
18435 %}
18436
18437 instruct reinterpret_shrink(vec dst, legVec src) %{
18438 predicate(!n->bottom_type()->isa_vectmask() &&
18439 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18440 match(Set dst (VectorReinterpret src));
18441 ins_cost(125);
18442 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18443 ins_encode %{
18444 switch (Matcher::vector_length_in_bytes(this)) {
18445 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18446 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18447 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18448 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18449 default: ShouldNotReachHere();
18450 }
18451 %}
18452 ins_pipe( pipe_slow );
18453 %}
18454
18455 // ----------------------------------------------------------------------------------------------------
18456
18457 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18458 match(Set dst (RoundDoubleMode src rmode));
18459 format %{ "roundsd $dst,$src" %}
18460 ins_cost(150);
18461 ins_encode %{
18462 assert(UseSSE >= 4, "required");
18463 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18464 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18465 }
18466 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18467 %}
18468 ins_pipe(pipe_slow);
18469 %}
18470
18471 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18472 match(Set dst (RoundDoubleMode con rmode));
18473 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18474 ins_cost(150);
18475 ins_encode %{
18476 assert(UseSSE >= 4, "required");
18477 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18478 %}
18479 ins_pipe(pipe_slow);
18480 %}
18481
18482 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18483 predicate(Matcher::vector_length(n) < 8);
18484 match(Set dst (RoundDoubleModeV src rmode));
18485 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18486 ins_encode %{
18487 assert(UseAVX > 0, "required");
18488 int vlen_enc = vector_length_encoding(this);
18489 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18490 %}
18491 ins_pipe( pipe_slow );
18492 %}
18493
18494 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18495 predicate(Matcher::vector_length(n) == 8);
18496 match(Set dst (RoundDoubleModeV src rmode));
18497 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18498 ins_encode %{
18499 assert(UseAVX > 2, "required");
18500 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18501 %}
18502 ins_pipe( pipe_slow );
18503 %}
18504
18505 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18506 predicate(Matcher::vector_length(n) < 8);
18507 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18508 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18509 ins_encode %{
18510 assert(UseAVX > 0, "required");
18511 int vlen_enc = vector_length_encoding(this);
18512 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18518 predicate(Matcher::vector_length(n) == 8);
18519 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18520 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18521 ins_encode %{
18522 assert(UseAVX > 2, "required");
18523 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18524 %}
18525 ins_pipe( pipe_slow );
18526 %}
18527
18528 instruct onspinwait() %{
18529 match(OnSpinWait);
18530 ins_cost(200);
18531
18532 format %{
18533 $$template
18534 $$emit$$"pause\t! membar_onspinwait"
18535 %}
18536 ins_encode %{
18537 __ pause();
18538 %}
18539 ins_pipe(pipe_slow);
18540 %}
18541
18542 // a * b + c
18543 instruct fmaD_reg(regD a, regD b, regD c) %{
18544 match(Set c (FmaD c (Binary a b)));
18545 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18546 ins_cost(150);
18547 ins_encode %{
18548 assert(UseFMA, "Needs FMA instructions support.");
18549 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18550 %}
18551 ins_pipe( pipe_slow );
18552 %}
18553
18554 // a * b + c
18555 instruct fmaF_reg(regF a, regF b, regF c) %{
18556 match(Set c (FmaF c (Binary a b)));
18557 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18558 ins_cost(150);
18559 ins_encode %{
18560 assert(UseFMA, "Needs FMA instructions support.");
18561 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18562 %}
18563 ins_pipe( pipe_slow );
18564 %}
18565
18566 // ====================VECTOR INSTRUCTIONS=====================================
18567
18568 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18569 instruct MoveVec2Leg(legVec dst, vec src) %{
18570 match(Set dst src);
18571 format %{ "" %}
18572 ins_encode %{
18573 ShouldNotReachHere();
18574 %}
18575 ins_pipe( fpu_reg_reg );
18576 %}
18577
18578 instruct MoveLeg2Vec(vec dst, legVec src) %{
18579 match(Set dst src);
18580 format %{ "" %}
18581 ins_encode %{
18582 ShouldNotReachHere();
18583 %}
18584 ins_pipe( fpu_reg_reg );
18585 %}
18586
18587 // ============================================================================
18588
18589 // Load vectors generic operand pattern
18590 instruct loadV(vec dst, memory mem) %{
18591 match(Set dst (LoadVector mem));
18592 ins_cost(125);
18593 format %{ "load_vector $dst,$mem" %}
18594 ins_encode %{
18595 BasicType bt = Matcher::vector_element_basic_type(this);
18596 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18597 %}
18598 ins_pipe( pipe_slow );
18599 %}
18600
18601 // Store vectors generic operand pattern.
18602 instruct storeV(memory mem, vec src) %{
18603 match(Set mem (StoreVector mem src));
18604 ins_cost(145);
18605 format %{ "store_vector $mem,$src\n\t" %}
18606 ins_encode %{
18607 switch (Matcher::vector_length_in_bytes(this, $src)) {
18608 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18609 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18610 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18611 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18612 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18613 default: ShouldNotReachHere();
18614 }
18615 %}
18616 ins_pipe( pipe_slow );
18617 %}
18618
18619 // ---------------------------------------- Gather ------------------------------------
18620
18621 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18622
18623 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18624 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18625 Matcher::vector_length_in_bytes(n) <= 32);
18626 match(Set dst (LoadVectorGather mem idx));
18627 effect(TEMP dst, TEMP tmp, TEMP mask);
18628 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18629 ins_encode %{
18630 int vlen_enc = vector_length_encoding(this);
18631 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18632 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18633 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18634 __ lea($tmp$$Register, $mem$$Address);
18635 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18636 %}
18637 ins_pipe( pipe_slow );
18638 %}
18639
18640
18641 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18642 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18643 !is_subword_type(Matcher::vector_element_basic_type(n)));
18644 match(Set dst (LoadVectorGather mem idx));
18645 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18646 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18647 ins_encode %{
18648 int vlen_enc = vector_length_encoding(this);
18649 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18650 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18651 __ lea($tmp$$Register, $mem$$Address);
18652 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18653 %}
18654 ins_pipe( pipe_slow );
18655 %}
18656
18657 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18658 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18659 !is_subword_type(Matcher::vector_element_basic_type(n)));
18660 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18661 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18662 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18663 ins_encode %{
18664 assert(UseAVX > 2, "sanity");
18665 int vlen_enc = vector_length_encoding(this);
18666 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18667 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18668 // Note: Since gather instruction partially updates the opmask register used
18669 // for predication hense moving mask operand to a temporary.
18670 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18671 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18672 __ lea($tmp$$Register, $mem$$Address);
18673 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18674 %}
18675 ins_pipe( pipe_slow );
18676 %}
18677
18678 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18679 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18680 match(Set dst (LoadVectorGather mem idx_base));
18681 effect(TEMP tmp, TEMP rtmp);
18682 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18683 ins_encode %{
18684 int vlen_enc = vector_length_encoding(this);
18685 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18686 __ lea($tmp$$Register, $mem$$Address);
18687 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18688 %}
18689 ins_pipe( pipe_slow );
18690 %}
18691
18692 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18693 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18694 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18695 match(Set dst (LoadVectorGather mem idx_base));
18696 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18697 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18698 ins_encode %{
18699 int vlen_enc = vector_length_encoding(this);
18700 int vector_len = Matcher::vector_length(this);
18701 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18702 __ lea($tmp$$Register, $mem$$Address);
18703 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18704 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18705 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18706 %}
18707 ins_pipe( pipe_slow );
18708 %}
18709
18710 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18711 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18712 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18713 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18714 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18715 ins_encode %{
18716 int vlen_enc = vector_length_encoding(this);
18717 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18718 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18719 __ lea($tmp$$Register, $mem$$Address);
18720 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18721 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18722 %}
18723 ins_pipe( pipe_slow );
18724 %}
18725
18726 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18727 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18728 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18729 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18730 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18731 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18732 ins_encode %{
18733 int vlen_enc = vector_length_encoding(this);
18734 int vector_len = Matcher::vector_length(this);
18735 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18736 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18737 __ lea($tmp$$Register, $mem$$Address);
18738 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18739 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18740 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18741 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18742 %}
18743 ins_pipe( pipe_slow );
18744 %}
18745
18746 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18747 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18748 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18749 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18750 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18751 ins_encode %{
18752 int vlen_enc = vector_length_encoding(this);
18753 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18754 __ lea($tmp$$Register, $mem$$Address);
18755 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18756 if (elem_bt == T_SHORT) {
18757 __ movl($mask_idx$$Register, 0x55555555);
18758 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18759 }
18760 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18761 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18762 %}
18763 ins_pipe( pipe_slow );
18764 %}
18765
18766 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18767 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18768 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18769 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18770 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18771 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18772 ins_encode %{
18773 int vlen_enc = vector_length_encoding(this);
18774 int vector_len = Matcher::vector_length(this);
18775 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18776 __ lea($tmp$$Register, $mem$$Address);
18777 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18778 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18779 if (elem_bt == T_SHORT) {
18780 __ movl($mask_idx$$Register, 0x55555555);
18781 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18782 }
18783 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18784 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18785 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18786 %}
18787 ins_pipe( pipe_slow );
18788 %}
18789
18790 // ====================Scatter=======================================
18791
18792 // Scatter INT, LONG, FLOAT, DOUBLE
18793
18794 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18795 predicate(UseAVX > 2);
18796 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18797 effect(TEMP tmp, TEMP ktmp);
18798 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18799 ins_encode %{
18800 int vlen_enc = vector_length_encoding(this, $src);
18801 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18802
18803 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18804 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18805
18806 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18807 __ lea($tmp$$Register, $mem$$Address);
18808 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18809 %}
18810 ins_pipe( pipe_slow );
18811 %}
18812
18813 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18814 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18815 effect(TEMP tmp, TEMP ktmp);
18816 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18817 ins_encode %{
18818 int vlen_enc = vector_length_encoding(this, $src);
18819 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18820 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18821 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18822 // Note: Since scatter instruction partially updates the opmask register used
18823 // for predication hense moving mask operand to a temporary.
18824 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18825 __ lea($tmp$$Register, $mem$$Address);
18826 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18827 %}
18828 ins_pipe( pipe_slow );
18829 %}
18830
18831 // ====================REPLICATE=======================================
18832
18833 // Replicate byte scalar to be vector
18834 instruct vReplB_reg(vec dst, rRegI src) %{
18835 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18836 match(Set dst (Replicate src));
18837 format %{ "replicateB $dst,$src" %}
18838 ins_encode %{
18839 uint vlen = Matcher::vector_length(this);
18840 if (UseAVX >= 2) {
18841 int vlen_enc = vector_length_encoding(this);
18842 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18843 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18844 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18845 } else {
18846 __ movdl($dst$$XMMRegister, $src$$Register);
18847 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848 }
18849 } else {
18850 assert(UseAVX < 2, "");
18851 __ movdl($dst$$XMMRegister, $src$$Register);
18852 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18853 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18854 if (vlen >= 16) {
18855 assert(vlen == 16, "");
18856 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18857 }
18858 }
18859 %}
18860 ins_pipe( pipe_slow );
18861 %}
18862
18863 instruct ReplB_mem(vec dst, memory mem) %{
18864 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18865 match(Set dst (Replicate (LoadB mem)));
18866 format %{ "replicateB $dst,$mem" %}
18867 ins_encode %{
18868 int vlen_enc = vector_length_encoding(this);
18869 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18870 %}
18871 ins_pipe( pipe_slow );
18872 %}
18873
18874 // ====================ReplicateS=======================================
18875
18876 instruct vReplS_reg(vec dst, rRegI src) %{
18877 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18878 match(Set dst (Replicate src));
18879 format %{ "replicateS $dst,$src" %}
18880 ins_encode %{
18881 uint vlen = Matcher::vector_length(this);
18882 int vlen_enc = vector_length_encoding(this);
18883 if (UseAVX >= 2) {
18884 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18885 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18886 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18887 } else {
18888 __ movdl($dst$$XMMRegister, $src$$Register);
18889 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18890 }
18891 } else {
18892 assert(UseAVX < 2, "");
18893 __ movdl($dst$$XMMRegister, $src$$Register);
18894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18895 if (vlen >= 8) {
18896 assert(vlen == 8, "");
18897 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18898 }
18899 }
18900 %}
18901 ins_pipe( pipe_slow );
18902 %}
18903
18904 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18905 match(Set dst (Replicate con));
18906 effect(TEMP rtmp);
18907 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18908 ins_encode %{
18909 int vlen_enc = vector_length_encoding(this);
18910 BasicType bt = Matcher::vector_element_basic_type(this);
18911 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18912 __ movl($rtmp$$Register, $con$$constant);
18913 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18914 %}
18915 ins_pipe( pipe_slow );
18916 %}
18917
18918 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18919 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18920 match(Set dst (Replicate src));
18921 effect(TEMP rtmp);
18922 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18923 ins_encode %{
18924 int vlen_enc = vector_length_encoding(this);
18925 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18926 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18927 %}
18928 ins_pipe( pipe_slow );
18929 %}
18930
18931 instruct ReplS_mem(vec dst, memory mem) %{
18932 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18933 match(Set dst (Replicate (LoadS mem)));
18934 format %{ "replicateS $dst,$mem" %}
18935 ins_encode %{
18936 int vlen_enc = vector_length_encoding(this);
18937 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18938 %}
18939 ins_pipe( pipe_slow );
18940 %}
18941
18942 // ====================ReplicateI=======================================
18943
18944 instruct ReplI_reg(vec dst, rRegI src) %{
18945 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18946 match(Set dst (Replicate src));
18947 format %{ "replicateI $dst,$src" %}
18948 ins_encode %{
18949 uint vlen = Matcher::vector_length(this);
18950 int vlen_enc = vector_length_encoding(this);
18951 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18952 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18953 } else if (VM_Version::supports_avx2()) {
18954 __ movdl($dst$$XMMRegister, $src$$Register);
18955 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18956 } else {
18957 __ movdl($dst$$XMMRegister, $src$$Register);
18958 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18959 }
18960 %}
18961 ins_pipe( pipe_slow );
18962 %}
18963
18964 instruct ReplI_mem(vec dst, memory mem) %{
18965 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18966 match(Set dst (Replicate (LoadI mem)));
18967 format %{ "replicateI $dst,$mem" %}
18968 ins_encode %{
18969 int vlen_enc = vector_length_encoding(this);
18970 if (VM_Version::supports_avx2()) {
18971 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18972 } else if (VM_Version::supports_avx()) {
18973 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18974 } else {
18975 __ movdl($dst$$XMMRegister, $mem$$Address);
18976 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18977 }
18978 %}
18979 ins_pipe( pipe_slow );
18980 %}
18981
18982 instruct ReplI_imm(vec dst, immI con) %{
18983 predicate(Matcher::is_non_long_integral_vector(n));
18984 match(Set dst (Replicate con));
18985 format %{ "replicateI $dst,$con" %}
18986 ins_encode %{
18987 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18988 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18989 type2aelembytes(Matcher::vector_element_basic_type(this))));
18990 BasicType bt = Matcher::vector_element_basic_type(this);
18991 int vlen = Matcher::vector_length_in_bytes(this);
18992 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18993 %}
18994 ins_pipe( pipe_slow );
18995 %}
18996
18997 // Replicate scalar zero to be vector
18998 instruct ReplI_zero(vec dst, immI_0 zero) %{
18999 predicate(Matcher::is_non_long_integral_vector(n));
19000 match(Set dst (Replicate zero));
19001 format %{ "replicateI $dst,$zero" %}
19002 ins_encode %{
19003 int vlen_enc = vector_length_encoding(this);
19004 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19005 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19006 } else {
19007 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19008 }
19009 %}
19010 ins_pipe( fpu_reg_reg );
19011 %}
19012
19013 instruct ReplI_M1(vec dst, immI_M1 con) %{
19014 predicate(Matcher::is_non_long_integral_vector(n));
19015 match(Set dst (Replicate con));
19016 format %{ "vallones $dst" %}
19017 ins_encode %{
19018 int vector_len = vector_length_encoding(this);
19019 __ vallones($dst$$XMMRegister, vector_len);
19020 %}
19021 ins_pipe( pipe_slow );
19022 %}
19023
19024 // ====================ReplicateL=======================================
19025
19026 // Replicate long (8 byte) scalar to be vector
19027 instruct ReplL_reg(vec dst, rRegL src) %{
19028 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19029 match(Set dst (Replicate src));
19030 format %{ "replicateL $dst,$src" %}
19031 ins_encode %{
19032 int vlen = Matcher::vector_length(this);
19033 int vlen_enc = vector_length_encoding(this);
19034 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19035 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19036 } else if (VM_Version::supports_avx2()) {
19037 __ movdq($dst$$XMMRegister, $src$$Register);
19038 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19039 } else {
19040 __ movdq($dst$$XMMRegister, $src$$Register);
19041 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19042 }
19043 %}
19044 ins_pipe( pipe_slow );
19045 %}
19046
19047 instruct ReplL_mem(vec dst, memory mem) %{
19048 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19049 match(Set dst (Replicate (LoadL mem)));
19050 format %{ "replicateL $dst,$mem" %}
19051 ins_encode %{
19052 int vlen_enc = vector_length_encoding(this);
19053 if (VM_Version::supports_avx2()) {
19054 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19055 } else if (VM_Version::supports_sse3()) {
19056 __ movddup($dst$$XMMRegister, $mem$$Address);
19057 } else {
19058 __ movq($dst$$XMMRegister, $mem$$Address);
19059 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19060 }
19061 %}
19062 ins_pipe( pipe_slow );
19063 %}
19064
19065 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19066 instruct ReplL_imm(vec dst, immL con) %{
19067 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19068 match(Set dst (Replicate con));
19069 format %{ "replicateL $dst,$con" %}
19070 ins_encode %{
19071 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19072 int vlen = Matcher::vector_length_in_bytes(this);
19073 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19074 %}
19075 ins_pipe( pipe_slow );
19076 %}
19077
19078 instruct ReplL_zero(vec dst, immL0 zero) %{
19079 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19080 match(Set dst (Replicate zero));
19081 format %{ "replicateL $dst,$zero" %}
19082 ins_encode %{
19083 int vlen_enc = vector_length_encoding(this);
19084 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19085 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19086 } else {
19087 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19088 }
19089 %}
19090 ins_pipe( fpu_reg_reg );
19091 %}
19092
19093 instruct ReplL_M1(vec dst, immL_M1 con) %{
19094 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19095 match(Set dst (Replicate con));
19096 format %{ "vallones $dst" %}
19097 ins_encode %{
19098 int vector_len = vector_length_encoding(this);
19099 __ vallones($dst$$XMMRegister, vector_len);
19100 %}
19101 ins_pipe( pipe_slow );
19102 %}
19103
19104 // ====================ReplicateF=======================================
19105
19106 instruct vReplF_reg(vec dst, vlRegF src) %{
19107 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19108 match(Set dst (Replicate src));
19109 format %{ "replicateF $dst,$src" %}
19110 ins_encode %{
19111 uint vlen = Matcher::vector_length(this);
19112 int vlen_enc = vector_length_encoding(this);
19113 if (vlen <= 4) {
19114 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19115 } else if (VM_Version::supports_avx2()) {
19116 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19117 } else {
19118 assert(vlen == 8, "sanity");
19119 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19120 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19121 }
19122 %}
19123 ins_pipe( pipe_slow );
19124 %}
19125
19126 instruct ReplF_reg(vec dst, vlRegF src) %{
19127 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19128 match(Set dst (Replicate src));
19129 format %{ "replicateF $dst,$src" %}
19130 ins_encode %{
19131 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19132 %}
19133 ins_pipe( pipe_slow );
19134 %}
19135
19136 instruct ReplF_mem(vec dst, memory mem) %{
19137 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19138 match(Set dst (Replicate (LoadF mem)));
19139 format %{ "replicateF $dst,$mem" %}
19140 ins_encode %{
19141 int vlen_enc = vector_length_encoding(this);
19142 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19143 %}
19144 ins_pipe( pipe_slow );
19145 %}
19146
19147 // Replicate float scalar immediate to be vector by loading from const table.
19148 instruct ReplF_imm(vec dst, immF con) %{
19149 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19150 match(Set dst (Replicate con));
19151 format %{ "replicateF $dst,$con" %}
19152 ins_encode %{
19153 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19154 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19155 int vlen = Matcher::vector_length_in_bytes(this);
19156 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19157 %}
19158 ins_pipe( pipe_slow );
19159 %}
19160
19161 instruct ReplF_zero(vec dst, immF0 zero) %{
19162 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19163 match(Set dst (Replicate zero));
19164 format %{ "replicateF $dst,$zero" %}
19165 ins_encode %{
19166 int vlen_enc = vector_length_encoding(this);
19167 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19168 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19169 } else {
19170 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19171 }
19172 %}
19173 ins_pipe( fpu_reg_reg );
19174 %}
19175
19176 // ====================ReplicateD=======================================
19177
19178 // Replicate double (8 bytes) scalar to be vector
19179 instruct vReplD_reg(vec dst, vlRegD src) %{
19180 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19181 match(Set dst (Replicate src));
19182 format %{ "replicateD $dst,$src" %}
19183 ins_encode %{
19184 uint vlen = Matcher::vector_length(this);
19185 int vlen_enc = vector_length_encoding(this);
19186 if (vlen <= 2) {
19187 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19188 } else if (VM_Version::supports_avx2()) {
19189 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19190 } else {
19191 assert(vlen == 4, "sanity");
19192 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19193 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19194 }
19195 %}
19196 ins_pipe( pipe_slow );
19197 %}
19198
19199 instruct ReplD_reg(vec dst, vlRegD src) %{
19200 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19201 match(Set dst (Replicate src));
19202 format %{ "replicateD $dst,$src" %}
19203 ins_encode %{
19204 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19205 %}
19206 ins_pipe( pipe_slow );
19207 %}
19208
19209 instruct ReplD_mem(vec dst, memory mem) %{
19210 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19211 match(Set dst (Replicate (LoadD mem)));
19212 format %{ "replicateD $dst,$mem" %}
19213 ins_encode %{
19214 if (Matcher::vector_length(this) >= 4) {
19215 int vlen_enc = vector_length_encoding(this);
19216 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19217 } else {
19218 __ movddup($dst$$XMMRegister, $mem$$Address);
19219 }
19220 %}
19221 ins_pipe( pipe_slow );
19222 %}
19223
19224 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19225 instruct ReplD_imm(vec dst, immD con) %{
19226 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19227 match(Set dst (Replicate con));
19228 format %{ "replicateD $dst,$con" %}
19229 ins_encode %{
19230 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19231 int vlen = Matcher::vector_length_in_bytes(this);
19232 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19233 %}
19234 ins_pipe( pipe_slow );
19235 %}
19236
19237 instruct ReplD_zero(vec dst, immD0 zero) %{
19238 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19239 match(Set dst (Replicate zero));
19240 format %{ "replicateD $dst,$zero" %}
19241 ins_encode %{
19242 int vlen_enc = vector_length_encoding(this);
19243 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19244 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19245 } else {
19246 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19247 }
19248 %}
19249 ins_pipe( fpu_reg_reg );
19250 %}
19251
19252 // ====================VECTOR INSERT=======================================
19253
19254 instruct insert(vec dst, rRegI val, immU8 idx) %{
19255 predicate(Matcher::vector_length_in_bytes(n) < 32);
19256 match(Set dst (VectorInsert (Binary dst val) idx));
19257 format %{ "vector_insert $dst,$val,$idx" %}
19258 ins_encode %{
19259 assert(UseSSE >= 4, "required");
19260 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19261
19262 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19263
19264 assert(is_integral_type(elem_bt), "");
19265 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19266
19267 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19268 %}
19269 ins_pipe( pipe_slow );
19270 %}
19271
19272 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19273 predicate(Matcher::vector_length_in_bytes(n) == 32);
19274 match(Set dst (VectorInsert (Binary src val) idx));
19275 effect(TEMP vtmp);
19276 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19277 ins_encode %{
19278 int vlen_enc = Assembler::AVX_256bit;
19279 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19280 int elem_per_lane = 16/type2aelembytes(elem_bt);
19281 int log2epr = log2(elem_per_lane);
19282
19283 assert(is_integral_type(elem_bt), "sanity");
19284 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19285
19286 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19287 uint y_idx = ($idx$$constant >> log2epr) & 1;
19288 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19289 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19290 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19291 %}
19292 ins_pipe( pipe_slow );
19293 %}
19294
19295 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19296 predicate(Matcher::vector_length_in_bytes(n) == 64);
19297 match(Set dst (VectorInsert (Binary src val) idx));
19298 effect(TEMP vtmp);
19299 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19300 ins_encode %{
19301 assert(UseAVX > 2, "sanity");
19302
19303 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19304 int elem_per_lane = 16/type2aelembytes(elem_bt);
19305 int log2epr = log2(elem_per_lane);
19306
19307 assert(is_integral_type(elem_bt), "");
19308 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19309
19310 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19311 uint y_idx = ($idx$$constant >> log2epr) & 3;
19312 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19313 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19314 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19315 %}
19316 ins_pipe( pipe_slow );
19317 %}
19318
19319 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19320 predicate(Matcher::vector_length(n) == 2);
19321 match(Set dst (VectorInsert (Binary dst val) idx));
19322 format %{ "vector_insert $dst,$val,$idx" %}
19323 ins_encode %{
19324 assert(UseSSE >= 4, "required");
19325 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19326 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19327
19328 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19329 %}
19330 ins_pipe( pipe_slow );
19331 %}
19332
19333 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19334 predicate(Matcher::vector_length(n) == 4);
19335 match(Set dst (VectorInsert (Binary src val) idx));
19336 effect(TEMP vtmp);
19337 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19338 ins_encode %{
19339 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19340 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19341
19342 uint x_idx = $idx$$constant & right_n_bits(1);
19343 uint y_idx = ($idx$$constant >> 1) & 1;
19344 int vlen_enc = Assembler::AVX_256bit;
19345 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19346 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19347 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19348 %}
19349 ins_pipe( pipe_slow );
19350 %}
19351
19352 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19353 predicate(Matcher::vector_length(n) == 8);
19354 match(Set dst (VectorInsert (Binary src val) idx));
19355 effect(TEMP vtmp);
19356 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19357 ins_encode %{
19358 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19359 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19360
19361 uint x_idx = $idx$$constant & right_n_bits(1);
19362 uint y_idx = ($idx$$constant >> 1) & 3;
19363 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19364 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19365 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19366 %}
19367 ins_pipe( pipe_slow );
19368 %}
19369
19370 instruct insertF(vec dst, regF val, immU8 idx) %{
19371 predicate(Matcher::vector_length(n) < 8);
19372 match(Set dst (VectorInsert (Binary dst val) idx));
19373 format %{ "vector_insert $dst,$val,$idx" %}
19374 ins_encode %{
19375 assert(UseSSE >= 4, "sanity");
19376
19377 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19378 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19379
19380 uint x_idx = $idx$$constant & right_n_bits(2);
19381 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19382 %}
19383 ins_pipe( pipe_slow );
19384 %}
19385
19386 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19387 predicate(Matcher::vector_length(n) >= 8);
19388 match(Set dst (VectorInsert (Binary src val) idx));
19389 effect(TEMP vtmp);
19390 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19391 ins_encode %{
19392 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19393 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19394
19395 int vlen = Matcher::vector_length(this);
19396 uint x_idx = $idx$$constant & right_n_bits(2);
19397 if (vlen == 8) {
19398 uint y_idx = ($idx$$constant >> 2) & 1;
19399 int vlen_enc = Assembler::AVX_256bit;
19400 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19401 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19402 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19403 } else {
19404 assert(vlen == 16, "sanity");
19405 uint y_idx = ($idx$$constant >> 2) & 3;
19406 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19407 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19408 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19409 }
19410 %}
19411 ins_pipe( pipe_slow );
19412 %}
19413
19414 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19415 predicate(Matcher::vector_length(n) == 2);
19416 match(Set dst (VectorInsert (Binary dst val) idx));
19417 effect(TEMP tmp);
19418 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19419 ins_encode %{
19420 assert(UseSSE >= 4, "sanity");
19421 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19422 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19423
19424 __ movq($tmp$$Register, $val$$XMMRegister);
19425 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19426 %}
19427 ins_pipe( pipe_slow );
19428 %}
19429
19430 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19431 predicate(Matcher::vector_length(n) == 4);
19432 match(Set dst (VectorInsert (Binary src val) idx));
19433 effect(TEMP vtmp, TEMP tmp);
19434 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19435 ins_encode %{
19436 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19437 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19438
19439 uint x_idx = $idx$$constant & right_n_bits(1);
19440 uint y_idx = ($idx$$constant >> 1) & 1;
19441 int vlen_enc = Assembler::AVX_256bit;
19442 __ movq($tmp$$Register, $val$$XMMRegister);
19443 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19444 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19445 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19446 %}
19447 ins_pipe( pipe_slow );
19448 %}
19449
19450 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19451 predicate(Matcher::vector_length(n) == 8);
19452 match(Set dst (VectorInsert (Binary src val) idx));
19453 effect(TEMP tmp, TEMP vtmp);
19454 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19455 ins_encode %{
19456 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19457 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19458
19459 uint x_idx = $idx$$constant & right_n_bits(1);
19460 uint y_idx = ($idx$$constant >> 1) & 3;
19461 __ movq($tmp$$Register, $val$$XMMRegister);
19462 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19463 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19464 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19465 %}
19466 ins_pipe( pipe_slow );
19467 %}
19468
19469 // ====================REDUCTION ARITHMETIC=======================================
19470
19471 // =======================Int Reduction==========================================
19472
19473 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19474 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19475 match(Set dst (AddReductionVI src1 src2));
19476 match(Set dst (MulReductionVI src1 src2));
19477 match(Set dst (AndReductionV src1 src2));
19478 match(Set dst ( OrReductionV src1 src2));
19479 match(Set dst (XorReductionV src1 src2));
19480 match(Set dst (MinReductionV src1 src2));
19481 match(Set dst (MaxReductionV src1 src2));
19482 effect(TEMP vtmp1, TEMP vtmp2);
19483 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19484 ins_encode %{
19485 int opcode = this->ideal_Opcode();
19486 int vlen = Matcher::vector_length(this, $src2);
19487 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19488 %}
19489 ins_pipe( pipe_slow );
19490 %}
19491
19492 // =======================Long Reduction==========================================
19493
19494 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19496 match(Set dst (AddReductionVL src1 src2));
19497 match(Set dst (MulReductionVL src1 src2));
19498 match(Set dst (AndReductionV src1 src2));
19499 match(Set dst ( OrReductionV src1 src2));
19500 match(Set dst (XorReductionV src1 src2));
19501 match(Set dst (MinReductionV src1 src2));
19502 match(Set dst (MaxReductionV src1 src2));
19503 effect(TEMP vtmp1, TEMP vtmp2);
19504 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19505 ins_encode %{
19506 int opcode = this->ideal_Opcode();
19507 int vlen = Matcher::vector_length(this, $src2);
19508 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19509 %}
19510 ins_pipe( pipe_slow );
19511 %}
19512
19513 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19514 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19515 match(Set dst (AddReductionVL src1 src2));
19516 match(Set dst (MulReductionVL src1 src2));
19517 match(Set dst (AndReductionV src1 src2));
19518 match(Set dst ( OrReductionV src1 src2));
19519 match(Set dst (XorReductionV src1 src2));
19520 match(Set dst (MinReductionV src1 src2));
19521 match(Set dst (MaxReductionV src1 src2));
19522 effect(TEMP vtmp1, TEMP vtmp2);
19523 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19524 ins_encode %{
19525 int opcode = this->ideal_Opcode();
19526 int vlen = Matcher::vector_length(this, $src2);
19527 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19528 %}
19529 ins_pipe( pipe_slow );
19530 %}
19531
19532 // =======================Float Reduction==========================================
19533
19534 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19535 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19536 match(Set dst (AddReductionVF dst src));
19537 match(Set dst (MulReductionVF dst src));
19538 effect(TEMP dst, TEMP vtmp);
19539 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19540 ins_encode %{
19541 int opcode = this->ideal_Opcode();
19542 int vlen = Matcher::vector_length(this, $src);
19543 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19544 %}
19545 ins_pipe( pipe_slow );
19546 %}
19547
19548 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19549 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19550 match(Set dst (AddReductionVF dst src));
19551 match(Set dst (MulReductionVF dst src));
19552 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19553 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19554 ins_encode %{
19555 int opcode = this->ideal_Opcode();
19556 int vlen = Matcher::vector_length(this, $src);
19557 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19558 %}
19559 ins_pipe( pipe_slow );
19560 %}
19561
19562 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19563 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19564 match(Set dst (AddReductionVF dst src));
19565 match(Set dst (MulReductionVF dst src));
19566 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19567 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19568 ins_encode %{
19569 int opcode = this->ideal_Opcode();
19570 int vlen = Matcher::vector_length(this, $src);
19571 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572 %}
19573 ins_pipe( pipe_slow );
19574 %}
19575
19576
19577 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19578 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19579 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19580 // src1 contains reduction identity
19581 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19582 match(Set dst (AddReductionVF src1 src2));
19583 match(Set dst (MulReductionVF src1 src2));
19584 effect(TEMP dst);
19585 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19586 ins_encode %{
19587 int opcode = this->ideal_Opcode();
19588 int vlen = Matcher::vector_length(this, $src2);
19589 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19590 %}
19591 ins_pipe( pipe_slow );
19592 %}
19593
19594 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19595 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19596 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19597 // src1 contains reduction identity
19598 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19599 match(Set dst (AddReductionVF src1 src2));
19600 match(Set dst (MulReductionVF src1 src2));
19601 effect(TEMP dst, TEMP vtmp);
19602 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19603 ins_encode %{
19604 int opcode = this->ideal_Opcode();
19605 int vlen = Matcher::vector_length(this, $src2);
19606 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19607 %}
19608 ins_pipe( pipe_slow );
19609 %}
19610
19611 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19612 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19613 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19614 // src1 contains reduction identity
19615 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19616 match(Set dst (AddReductionVF src1 src2));
19617 match(Set dst (MulReductionVF src1 src2));
19618 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19619 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19620 ins_encode %{
19621 int opcode = this->ideal_Opcode();
19622 int vlen = Matcher::vector_length(this, $src2);
19623 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19624 %}
19625 ins_pipe( pipe_slow );
19626 %}
19627
19628 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19629 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19630 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19631 // src1 contains reduction identity
19632 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19633 match(Set dst (AddReductionVF src1 src2));
19634 match(Set dst (MulReductionVF src1 src2));
19635 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19636 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19637 ins_encode %{
19638 int opcode = this->ideal_Opcode();
19639 int vlen = Matcher::vector_length(this, $src2);
19640 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19641 %}
19642 ins_pipe( pipe_slow );
19643 %}
19644
19645 // =======================Double Reduction==========================================
19646
19647 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19648 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19649 match(Set dst (AddReductionVD dst src));
19650 match(Set dst (MulReductionVD dst src));
19651 effect(TEMP dst, TEMP vtmp);
19652 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19653 ins_encode %{
19654 int opcode = this->ideal_Opcode();
19655 int vlen = Matcher::vector_length(this, $src);
19656 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19657 %}
19658 ins_pipe( pipe_slow );
19659 %}
19660
19661 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19662 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19663 match(Set dst (AddReductionVD dst src));
19664 match(Set dst (MulReductionVD dst src));
19665 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19666 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19667 ins_encode %{
19668 int opcode = this->ideal_Opcode();
19669 int vlen = Matcher::vector_length(this, $src);
19670 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19671 %}
19672 ins_pipe( pipe_slow );
19673 %}
19674
19675 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19676 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19677 match(Set dst (AddReductionVD dst src));
19678 match(Set dst (MulReductionVD dst src));
19679 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19680 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19681 ins_encode %{
19682 int opcode = this->ideal_Opcode();
19683 int vlen = Matcher::vector_length(this, $src);
19684 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19685 %}
19686 ins_pipe( pipe_slow );
19687 %}
19688
19689 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19690 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19691 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19692 // src1 contains reduction identity
19693 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19694 match(Set dst (AddReductionVD src1 src2));
19695 match(Set dst (MulReductionVD src1 src2));
19696 effect(TEMP dst);
19697 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19698 ins_encode %{
19699 int opcode = this->ideal_Opcode();
19700 int vlen = Matcher::vector_length(this, $src2);
19701 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19702 %}
19703 ins_pipe( pipe_slow );
19704 %}
19705
19706 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19707 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19708 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19709 // src1 contains reduction identity
19710 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19711 match(Set dst (AddReductionVD src1 src2));
19712 match(Set dst (MulReductionVD src1 src2));
19713 effect(TEMP dst, TEMP vtmp);
19714 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19715 ins_encode %{
19716 int opcode = this->ideal_Opcode();
19717 int vlen = Matcher::vector_length(this, $src2);
19718 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19719 %}
19720 ins_pipe( pipe_slow );
19721 %}
19722
19723 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19724 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19725 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19726 // src1 contains reduction identity
19727 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19728 match(Set dst (AddReductionVD src1 src2));
19729 match(Set dst (MulReductionVD src1 src2));
19730 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19731 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19732 ins_encode %{
19733 int opcode = this->ideal_Opcode();
19734 int vlen = Matcher::vector_length(this, $src2);
19735 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19736 %}
19737 ins_pipe( pipe_slow );
19738 %}
19739
19740 // =======================Byte Reduction==========================================
19741
19742 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19743 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19744 match(Set dst (AddReductionVI src1 src2));
19745 match(Set dst (AndReductionV src1 src2));
19746 match(Set dst ( OrReductionV src1 src2));
19747 match(Set dst (XorReductionV src1 src2));
19748 match(Set dst (MinReductionV src1 src2));
19749 match(Set dst (MaxReductionV src1 src2));
19750 effect(TEMP vtmp1, TEMP vtmp2);
19751 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19752 ins_encode %{
19753 int opcode = this->ideal_Opcode();
19754 int vlen = Matcher::vector_length(this, $src2);
19755 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19756 %}
19757 ins_pipe( pipe_slow );
19758 %}
19759
19760 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19761 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19762 match(Set dst (AddReductionVI src1 src2));
19763 match(Set dst (AndReductionV src1 src2));
19764 match(Set dst ( OrReductionV src1 src2));
19765 match(Set dst (XorReductionV src1 src2));
19766 match(Set dst (MinReductionV src1 src2));
19767 match(Set dst (MaxReductionV src1 src2));
19768 effect(TEMP vtmp1, TEMP vtmp2);
19769 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19770 ins_encode %{
19771 int opcode = this->ideal_Opcode();
19772 int vlen = Matcher::vector_length(this, $src2);
19773 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19774 %}
19775 ins_pipe( pipe_slow );
19776 %}
19777
19778 // =======================Short Reduction==========================================
19779
19780 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19781 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19782 match(Set dst (AddReductionVI src1 src2));
19783 match(Set dst (MulReductionVI src1 src2));
19784 match(Set dst (AndReductionV src1 src2));
19785 match(Set dst ( OrReductionV src1 src2));
19786 match(Set dst (XorReductionV src1 src2));
19787 match(Set dst (MinReductionV src1 src2));
19788 match(Set dst (MaxReductionV src1 src2));
19789 effect(TEMP vtmp1, TEMP vtmp2);
19790 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19791 ins_encode %{
19792 int opcode = this->ideal_Opcode();
19793 int vlen = Matcher::vector_length(this, $src2);
19794 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19795 %}
19796 ins_pipe( pipe_slow );
19797 %}
19798
19799 // =======================Mul Reduction==========================================
19800
19801 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19802 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19803 Matcher::vector_length(n->in(2)) <= 32); // src2
19804 match(Set dst (MulReductionVI src1 src2));
19805 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19806 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19807 ins_encode %{
19808 int opcode = this->ideal_Opcode();
19809 int vlen = Matcher::vector_length(this, $src2);
19810 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19811 %}
19812 ins_pipe( pipe_slow );
19813 %}
19814
19815 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19816 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19817 Matcher::vector_length(n->in(2)) == 64); // src2
19818 match(Set dst (MulReductionVI src1 src2));
19819 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19820 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19821 ins_encode %{
19822 int opcode = this->ideal_Opcode();
19823 int vlen = Matcher::vector_length(this, $src2);
19824 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19825 %}
19826 ins_pipe( pipe_slow );
19827 %}
19828
19829 //--------------------Min/Max Float Reduction --------------------
19830 // Float Min Reduction
19831 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19832 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19833 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19834 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19835 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19836 Matcher::vector_length(n->in(2)) == 2);
19837 match(Set dst (MinReductionV src1 src2));
19838 match(Set dst (MaxReductionV src1 src2));
19839 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19840 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19841 ins_encode %{
19842 assert(UseAVX > 0, "sanity");
19843
19844 int opcode = this->ideal_Opcode();
19845 int vlen = Matcher::vector_length(this, $src2);
19846 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19847 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19848 %}
19849 ins_pipe( pipe_slow );
19850 %}
19851
19852 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19853 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19854 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19855 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19856 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19857 Matcher::vector_length(n->in(2)) >= 4);
19858 match(Set dst (MinReductionV src1 src2));
19859 match(Set dst (MaxReductionV src1 src2));
19860 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19861 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19862 ins_encode %{
19863 assert(UseAVX > 0, "sanity");
19864
19865 int opcode = this->ideal_Opcode();
19866 int vlen = Matcher::vector_length(this, $src2);
19867 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19868 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19869 %}
19870 ins_pipe( pipe_slow );
19871 %}
19872
19873 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19874 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19875 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19876 Matcher::vector_length(n->in(2)) == 2);
19877 match(Set dst (MinReductionV dst src));
19878 match(Set dst (MaxReductionV dst src));
19879 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19880 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19881 ins_encode %{
19882 assert(UseAVX > 0, "sanity");
19883
19884 int opcode = this->ideal_Opcode();
19885 int vlen = Matcher::vector_length(this, $src);
19886 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19887 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19888 %}
19889 ins_pipe( pipe_slow );
19890 %}
19891
19892
19893 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19894 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19895 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19896 Matcher::vector_length(n->in(2)) >= 4);
19897 match(Set dst (MinReductionV dst src));
19898 match(Set dst (MaxReductionV dst src));
19899 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19900 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19901 ins_encode %{
19902 assert(UseAVX > 0, "sanity");
19903
19904 int opcode = this->ideal_Opcode();
19905 int vlen = Matcher::vector_length(this, $src);
19906 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19907 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19908 %}
19909 ins_pipe( pipe_slow );
19910 %}
19911
19912 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19913 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19914 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19915 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19916 Matcher::vector_length(n->in(2)) == 2);
19917 match(Set dst (MinReductionV src1 src2));
19918 match(Set dst (MaxReductionV src1 src2));
19919 effect(TEMP dst, TEMP xtmp1);
19920 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19921 ins_encode %{
19922 int opcode = this->ideal_Opcode();
19923 int vlen = Matcher::vector_length(this, $src2);
19924 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19925 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19926 %}
19927 ins_pipe( pipe_slow );
19928 %}
19929
19930 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19931 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19932 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19933 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19934 Matcher::vector_length(n->in(2)) >= 4);
19935 match(Set dst (MinReductionV src1 src2));
19936 match(Set dst (MaxReductionV src1 src2));
19937 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19938 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19939 ins_encode %{
19940 int opcode = this->ideal_Opcode();
19941 int vlen = Matcher::vector_length(this, $src2);
19942 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19943 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19944 %}
19945 ins_pipe( pipe_slow );
19946 %}
19947
19948 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19949 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19950 Matcher::vector_length(n->in(2)) == 2);
19951 match(Set dst (MinReductionV dst src));
19952 match(Set dst (MaxReductionV dst src));
19953 effect(TEMP dst, TEMP xtmp1);
19954 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19955 ins_encode %{
19956 int opcode = this->ideal_Opcode();
19957 int vlen = Matcher::vector_length(this, $src);
19958 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19959 $xtmp1$$XMMRegister);
19960 %}
19961 ins_pipe( pipe_slow );
19962 %}
19963
19964 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19965 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19966 Matcher::vector_length(n->in(2)) >= 4);
19967 match(Set dst (MinReductionV dst src));
19968 match(Set dst (MaxReductionV dst src));
19969 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19970 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19971 ins_encode %{
19972 int opcode = this->ideal_Opcode();
19973 int vlen = Matcher::vector_length(this, $src);
19974 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19975 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19976 %}
19977 ins_pipe( pipe_slow );
19978 %}
19979
19980 //--------------------Min Double Reduction --------------------
19981 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19982 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19983 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19984 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19985 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19986 Matcher::vector_length(n->in(2)) == 2);
19987 match(Set dst (MinReductionV src1 src2));
19988 match(Set dst (MaxReductionV src1 src2));
19989 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19990 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19991 ins_encode %{
19992 assert(UseAVX > 0, "sanity");
19993
19994 int opcode = this->ideal_Opcode();
19995 int vlen = Matcher::vector_length(this, $src2);
19996 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19997 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19998 %}
19999 ins_pipe( pipe_slow );
20000 %}
20001
20002 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20003 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20004 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20005 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20006 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20007 Matcher::vector_length(n->in(2)) >= 4);
20008 match(Set dst (MinReductionV src1 src2));
20009 match(Set dst (MaxReductionV src1 src2));
20010 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20011 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20012 ins_encode %{
20013 assert(UseAVX > 0, "sanity");
20014
20015 int opcode = this->ideal_Opcode();
20016 int vlen = Matcher::vector_length(this, $src2);
20017 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20018 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20019 %}
20020 ins_pipe( pipe_slow );
20021 %}
20022
20023
20024 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20025 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20026 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20027 Matcher::vector_length(n->in(2)) == 2);
20028 match(Set dst (MinReductionV dst src));
20029 match(Set dst (MaxReductionV dst src));
20030 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20031 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20032 ins_encode %{
20033 assert(UseAVX > 0, "sanity");
20034
20035 int opcode = this->ideal_Opcode();
20036 int vlen = Matcher::vector_length(this, $src);
20037 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20038 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20039 %}
20040 ins_pipe( pipe_slow );
20041 %}
20042
20043 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20044 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20045 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20046 Matcher::vector_length(n->in(2)) >= 4);
20047 match(Set dst (MinReductionV dst src));
20048 match(Set dst (MaxReductionV dst src));
20049 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20050 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20051 ins_encode %{
20052 assert(UseAVX > 0, "sanity");
20053
20054 int opcode = this->ideal_Opcode();
20055 int vlen = Matcher::vector_length(this, $src);
20056 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20057 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20058 %}
20059 ins_pipe( pipe_slow );
20060 %}
20061
20062 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
20063 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20064 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20065 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20066 Matcher::vector_length(n->in(2)) == 2);
20067 match(Set dst (MinReductionV src1 src2));
20068 match(Set dst (MaxReductionV src1 src2));
20069 effect(TEMP dst, TEMP xtmp1);
20070 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20071 ins_encode %{
20072 int opcode = this->ideal_Opcode();
20073 int vlen = Matcher::vector_length(this, $src2);
20074 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20075 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20076 %}
20077 ins_pipe( pipe_slow );
20078 %}
20079
20080 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20081 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20082 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20083 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20084 Matcher::vector_length(n->in(2)) >= 4);
20085 match(Set dst (MinReductionV src1 src2));
20086 match(Set dst (MaxReductionV src1 src2));
20087 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20088 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20089 ins_encode %{
20090 int opcode = this->ideal_Opcode();
20091 int vlen = Matcher::vector_length(this, $src2);
20092 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20093 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20094 %}
20095 ins_pipe( pipe_slow );
20096 %}
20097
20098
20099 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
20100 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20101 Matcher::vector_length(n->in(2)) == 2);
20102 match(Set dst (MinReductionV dst src));
20103 match(Set dst (MaxReductionV dst src));
20104 effect(TEMP dst, TEMP xtmp1);
20105 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20106 ins_encode %{
20107 int opcode = this->ideal_Opcode();
20108 int vlen = Matcher::vector_length(this, $src);
20109 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20110 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20111 %}
20112 ins_pipe( pipe_slow );
20113 %}
20114
20115 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20116 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20117 Matcher::vector_length(n->in(2)) >= 4);
20118 match(Set dst (MinReductionV dst src));
20119 match(Set dst (MaxReductionV dst src));
20120 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20121 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20122 ins_encode %{
20123 int opcode = this->ideal_Opcode();
20124 int vlen = Matcher::vector_length(this, $src);
20125 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20126 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20127 %}
20128 ins_pipe( pipe_slow );
20129 %}
20130
20131 // ====================VECTOR ARITHMETIC=======================================
20132
20133 // --------------------------------- ADD --------------------------------------
20134
20135 // Bytes vector add
20136 instruct vaddB(vec dst, vec src) %{
20137 predicate(UseAVX == 0);
20138 match(Set dst (AddVB dst src));
20139 format %{ "paddb $dst,$src\t! add packedB" %}
20140 ins_encode %{
20141 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20142 %}
20143 ins_pipe( pipe_slow );
20144 %}
20145
20146 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20147 predicate(UseAVX > 0);
20148 match(Set dst (AddVB src1 src2));
20149 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20150 ins_encode %{
20151 int vlen_enc = vector_length_encoding(this);
20152 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20153 %}
20154 ins_pipe( pipe_slow );
20155 %}
20156
20157 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20158 predicate((UseAVX > 0) &&
20159 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20160 match(Set dst (AddVB src (LoadVector mem)));
20161 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20162 ins_encode %{
20163 int vlen_enc = vector_length_encoding(this);
20164 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20165 %}
20166 ins_pipe( pipe_slow );
20167 %}
20168
20169 // Shorts/Chars vector add
20170 instruct vaddS(vec dst, vec src) %{
20171 predicate(UseAVX == 0);
20172 match(Set dst (AddVS dst src));
20173 format %{ "paddw $dst,$src\t! add packedS" %}
20174 ins_encode %{
20175 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20176 %}
20177 ins_pipe( pipe_slow );
20178 %}
20179
20180 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20181 predicate(UseAVX > 0);
20182 match(Set dst (AddVS src1 src2));
20183 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20184 ins_encode %{
20185 int vlen_enc = vector_length_encoding(this);
20186 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20187 %}
20188 ins_pipe( pipe_slow );
20189 %}
20190
20191 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20192 predicate((UseAVX > 0) &&
20193 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20194 match(Set dst (AddVS src (LoadVector mem)));
20195 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20196 ins_encode %{
20197 int vlen_enc = vector_length_encoding(this);
20198 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20199 %}
20200 ins_pipe( pipe_slow );
20201 %}
20202
20203 // Integers vector add
20204 instruct vaddI(vec dst, vec src) %{
20205 predicate(UseAVX == 0);
20206 match(Set dst (AddVI dst src));
20207 format %{ "paddd $dst,$src\t! add packedI" %}
20208 ins_encode %{
20209 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20210 %}
20211 ins_pipe( pipe_slow );
20212 %}
20213
20214 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20215 predicate(UseAVX > 0);
20216 match(Set dst (AddVI src1 src2));
20217 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20218 ins_encode %{
20219 int vlen_enc = vector_length_encoding(this);
20220 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221 %}
20222 ins_pipe( pipe_slow );
20223 %}
20224
20225
20226 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20227 predicate((UseAVX > 0) &&
20228 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20229 match(Set dst (AddVI src (LoadVector mem)));
20230 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20231 ins_encode %{
20232 int vlen_enc = vector_length_encoding(this);
20233 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20234 %}
20235 ins_pipe( pipe_slow );
20236 %}
20237
20238 // Longs vector add
20239 instruct vaddL(vec dst, vec src) %{
20240 predicate(UseAVX == 0);
20241 match(Set dst (AddVL dst src));
20242 format %{ "paddq $dst,$src\t! add packedL" %}
20243 ins_encode %{
20244 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20245 %}
20246 ins_pipe( pipe_slow );
20247 %}
20248
20249 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20250 predicate(UseAVX > 0);
20251 match(Set dst (AddVL src1 src2));
20252 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20253 ins_encode %{
20254 int vlen_enc = vector_length_encoding(this);
20255 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20256 %}
20257 ins_pipe( pipe_slow );
20258 %}
20259
20260 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20261 predicate((UseAVX > 0) &&
20262 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20263 match(Set dst (AddVL src (LoadVector mem)));
20264 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20265 ins_encode %{
20266 int vlen_enc = vector_length_encoding(this);
20267 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20268 %}
20269 ins_pipe( pipe_slow );
20270 %}
20271
20272 // Floats vector add
20273 instruct vaddF(vec dst, vec src) %{
20274 predicate(UseAVX == 0);
20275 match(Set dst (AddVF dst src));
20276 format %{ "addps $dst,$src\t! add packedF" %}
20277 ins_encode %{
20278 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20279 %}
20280 ins_pipe( pipe_slow );
20281 %}
20282
20283 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20284 predicate(UseAVX > 0);
20285 match(Set dst (AddVF src1 src2));
20286 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20287 ins_encode %{
20288 int vlen_enc = vector_length_encoding(this);
20289 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20290 %}
20291 ins_pipe( pipe_slow );
20292 %}
20293
20294 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20295 predicate((UseAVX > 0) &&
20296 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20297 match(Set dst (AddVF src (LoadVector mem)));
20298 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20299 ins_encode %{
20300 int vlen_enc = vector_length_encoding(this);
20301 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20302 %}
20303 ins_pipe( pipe_slow );
20304 %}
20305
20306 // Doubles vector add
20307 instruct vaddD(vec dst, vec src) %{
20308 predicate(UseAVX == 0);
20309 match(Set dst (AddVD dst src));
20310 format %{ "addpd $dst,$src\t! add packedD" %}
20311 ins_encode %{
20312 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20313 %}
20314 ins_pipe( pipe_slow );
20315 %}
20316
20317 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20318 predicate(UseAVX > 0);
20319 match(Set dst (AddVD src1 src2));
20320 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20321 ins_encode %{
20322 int vlen_enc = vector_length_encoding(this);
20323 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324 %}
20325 ins_pipe( pipe_slow );
20326 %}
20327
20328 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20329 predicate((UseAVX > 0) &&
20330 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20331 match(Set dst (AddVD src (LoadVector mem)));
20332 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20333 ins_encode %{
20334 int vlen_enc = vector_length_encoding(this);
20335 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20336 %}
20337 ins_pipe( pipe_slow );
20338 %}
20339
20340 // --------------------------------- SUB --------------------------------------
20341
20342 // Bytes vector sub
20343 instruct vsubB(vec dst, vec src) %{
20344 predicate(UseAVX == 0);
20345 match(Set dst (SubVB dst src));
20346 format %{ "psubb $dst,$src\t! sub packedB" %}
20347 ins_encode %{
20348 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20349 %}
20350 ins_pipe( pipe_slow );
20351 %}
20352
20353 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20354 predicate(UseAVX > 0);
20355 match(Set dst (SubVB src1 src2));
20356 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20357 ins_encode %{
20358 int vlen_enc = vector_length_encoding(this);
20359 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20360 %}
20361 ins_pipe( pipe_slow );
20362 %}
20363
20364 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20365 predicate((UseAVX > 0) &&
20366 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20367 match(Set dst (SubVB src (LoadVector mem)));
20368 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20369 ins_encode %{
20370 int vlen_enc = vector_length_encoding(this);
20371 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20372 %}
20373 ins_pipe( pipe_slow );
20374 %}
20375
20376 // Shorts/Chars vector sub
20377 instruct vsubS(vec dst, vec src) %{
20378 predicate(UseAVX == 0);
20379 match(Set dst (SubVS dst src));
20380 format %{ "psubw $dst,$src\t! sub packedS" %}
20381 ins_encode %{
20382 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20383 %}
20384 ins_pipe( pipe_slow );
20385 %}
20386
20387
20388 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20389 predicate(UseAVX > 0);
20390 match(Set dst (SubVS src1 src2));
20391 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20392 ins_encode %{
20393 int vlen_enc = vector_length_encoding(this);
20394 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20395 %}
20396 ins_pipe( pipe_slow );
20397 %}
20398
20399 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20400 predicate((UseAVX > 0) &&
20401 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20402 match(Set dst (SubVS src (LoadVector mem)));
20403 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20404 ins_encode %{
20405 int vlen_enc = vector_length_encoding(this);
20406 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20407 %}
20408 ins_pipe( pipe_slow );
20409 %}
20410
20411 // Integers vector sub
20412 instruct vsubI(vec dst, vec src) %{
20413 predicate(UseAVX == 0);
20414 match(Set dst (SubVI dst src));
20415 format %{ "psubd $dst,$src\t! sub packedI" %}
20416 ins_encode %{
20417 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20418 %}
20419 ins_pipe( pipe_slow );
20420 %}
20421
20422 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20423 predicate(UseAVX > 0);
20424 match(Set dst (SubVI src1 src2));
20425 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20426 ins_encode %{
20427 int vlen_enc = vector_length_encoding(this);
20428 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429 %}
20430 ins_pipe( pipe_slow );
20431 %}
20432
20433 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20434 predicate((UseAVX > 0) &&
20435 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20436 match(Set dst (SubVI src (LoadVector mem)));
20437 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20438 ins_encode %{
20439 int vlen_enc = vector_length_encoding(this);
20440 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20441 %}
20442 ins_pipe( pipe_slow );
20443 %}
20444
20445 // Longs vector sub
20446 instruct vsubL(vec dst, vec src) %{
20447 predicate(UseAVX == 0);
20448 match(Set dst (SubVL dst src));
20449 format %{ "psubq $dst,$src\t! sub packedL" %}
20450 ins_encode %{
20451 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20452 %}
20453 ins_pipe( pipe_slow );
20454 %}
20455
20456 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20457 predicate(UseAVX > 0);
20458 match(Set dst (SubVL src1 src2));
20459 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20460 ins_encode %{
20461 int vlen_enc = vector_length_encoding(this);
20462 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467
20468 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20469 predicate((UseAVX > 0) &&
20470 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20471 match(Set dst (SubVL src (LoadVector mem)));
20472 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20473 ins_encode %{
20474 int vlen_enc = vector_length_encoding(this);
20475 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20476 %}
20477 ins_pipe( pipe_slow );
20478 %}
20479
20480 // Floats vector sub
20481 instruct vsubF(vec dst, vec src) %{
20482 predicate(UseAVX == 0);
20483 match(Set dst (SubVF dst src));
20484 format %{ "subps $dst,$src\t! sub packedF" %}
20485 ins_encode %{
20486 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20487 %}
20488 ins_pipe( pipe_slow );
20489 %}
20490
20491 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20492 predicate(UseAVX > 0);
20493 match(Set dst (SubVF src1 src2));
20494 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20495 ins_encode %{
20496 int vlen_enc = vector_length_encoding(this);
20497 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20498 %}
20499 ins_pipe( pipe_slow );
20500 %}
20501
20502 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20503 predicate((UseAVX > 0) &&
20504 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20505 match(Set dst (SubVF src (LoadVector mem)));
20506 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20507 ins_encode %{
20508 int vlen_enc = vector_length_encoding(this);
20509 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20510 %}
20511 ins_pipe( pipe_slow );
20512 %}
20513
20514 // Doubles vector sub
20515 instruct vsubD(vec dst, vec src) %{
20516 predicate(UseAVX == 0);
20517 match(Set dst (SubVD dst src));
20518 format %{ "subpd $dst,$src\t! sub packedD" %}
20519 ins_encode %{
20520 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20521 %}
20522 ins_pipe( pipe_slow );
20523 %}
20524
20525 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20526 predicate(UseAVX > 0);
20527 match(Set dst (SubVD src1 src2));
20528 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20529 ins_encode %{
20530 int vlen_enc = vector_length_encoding(this);
20531 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20532 %}
20533 ins_pipe( pipe_slow );
20534 %}
20535
20536 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20537 predicate((UseAVX > 0) &&
20538 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20539 match(Set dst (SubVD src (LoadVector mem)));
20540 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20541 ins_encode %{
20542 int vlen_enc = vector_length_encoding(this);
20543 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20544 %}
20545 ins_pipe( pipe_slow );
20546 %}
20547
20548 // --------------------------------- MUL --------------------------------------
20549
20550 // Byte vector mul
20551 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20552 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20553 match(Set dst (MulVB src1 src2));
20554 effect(TEMP dst, TEMP xtmp);
20555 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20556 ins_encode %{
20557 assert(UseSSE > 3, "required");
20558 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20559 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20560 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20561 __ psllw($dst$$XMMRegister, 8);
20562 __ psrlw($dst$$XMMRegister, 8);
20563 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20564 %}
20565 ins_pipe( pipe_slow );
20566 %}
20567
20568 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20569 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20570 match(Set dst (MulVB src1 src2));
20571 effect(TEMP dst, TEMP xtmp);
20572 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20573 ins_encode %{
20574 assert(UseSSE > 3, "required");
20575 // Odd-index elements
20576 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20577 __ psrlw($dst$$XMMRegister, 8);
20578 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20579 __ psrlw($xtmp$$XMMRegister, 8);
20580 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20581 __ psllw($dst$$XMMRegister, 8);
20582 // Even-index elements
20583 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20584 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20585 __ psllw($xtmp$$XMMRegister, 8);
20586 __ psrlw($xtmp$$XMMRegister, 8);
20587 // Combine
20588 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20589 %}
20590 ins_pipe( pipe_slow );
20591 %}
20592
20593 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20594 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20595 match(Set dst (MulVB src1 src2));
20596 effect(TEMP xtmp1, TEMP xtmp2);
20597 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20598 ins_encode %{
20599 int vlen_enc = vector_length_encoding(this);
20600 // Odd-index elements
20601 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20602 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20603 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20604 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20605 // Even-index elements
20606 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20607 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20608 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20609 // Combine
20610 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20611 %}
20612 ins_pipe( pipe_slow );
20613 %}
20614
20615 // Shorts/Chars vector mul
20616 instruct vmulS(vec dst, vec src) %{
20617 predicate(UseAVX == 0);
20618 match(Set dst (MulVS dst src));
20619 format %{ "pmullw $dst,$src\t! mul packedS" %}
20620 ins_encode %{
20621 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20622 %}
20623 ins_pipe( pipe_slow );
20624 %}
20625
20626 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20627 predicate(UseAVX > 0);
20628 match(Set dst (MulVS src1 src2));
20629 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20630 ins_encode %{
20631 int vlen_enc = vector_length_encoding(this);
20632 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20633 %}
20634 ins_pipe( pipe_slow );
20635 %}
20636
20637 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20638 predicate((UseAVX > 0) &&
20639 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20640 match(Set dst (MulVS src (LoadVector mem)));
20641 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20642 ins_encode %{
20643 int vlen_enc = vector_length_encoding(this);
20644 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20645 %}
20646 ins_pipe( pipe_slow );
20647 %}
20648
20649 // Integers vector mul
20650 instruct vmulI(vec dst, vec src) %{
20651 predicate(UseAVX == 0);
20652 match(Set dst (MulVI dst src));
20653 format %{ "pmulld $dst,$src\t! mul packedI" %}
20654 ins_encode %{
20655 assert(UseSSE > 3, "required");
20656 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20657 %}
20658 ins_pipe( pipe_slow );
20659 %}
20660
20661 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20662 predicate(UseAVX > 0);
20663 match(Set dst (MulVI src1 src2));
20664 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20665 ins_encode %{
20666 int vlen_enc = vector_length_encoding(this);
20667 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20668 %}
20669 ins_pipe( pipe_slow );
20670 %}
20671
20672 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20673 predicate((UseAVX > 0) &&
20674 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20675 match(Set dst (MulVI src (LoadVector mem)));
20676 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20677 ins_encode %{
20678 int vlen_enc = vector_length_encoding(this);
20679 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20680 %}
20681 ins_pipe( pipe_slow );
20682 %}
20683
20684 // Longs vector mul
20685 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20686 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20687 VM_Version::supports_avx512dq()) ||
20688 VM_Version::supports_avx512vldq());
20689 match(Set dst (MulVL src1 src2));
20690 ins_cost(500);
20691 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20692 ins_encode %{
20693 assert(UseAVX > 2, "required");
20694 int vlen_enc = vector_length_encoding(this);
20695 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20696 %}
20697 ins_pipe( pipe_slow );
20698 %}
20699
20700 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20701 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20702 VM_Version::supports_avx512dq()) ||
20703 (Matcher::vector_length_in_bytes(n) > 8 &&
20704 VM_Version::supports_avx512vldq()));
20705 match(Set dst (MulVL src (LoadVector mem)));
20706 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20707 ins_cost(500);
20708 ins_encode %{
20709 assert(UseAVX > 2, "required");
20710 int vlen_enc = vector_length_encoding(this);
20711 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20712 %}
20713 ins_pipe( pipe_slow );
20714 %}
20715
20716 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20717 predicate(UseAVX == 0);
20718 match(Set dst (MulVL src1 src2));
20719 ins_cost(500);
20720 effect(TEMP dst, TEMP xtmp);
20721 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20722 ins_encode %{
20723 assert(VM_Version::supports_sse4_1(), "required");
20724 // Get the lo-hi products, only the lower 32 bits is in concerns
20725 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20726 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20727 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20728 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20729 __ psllq($dst$$XMMRegister, 32);
20730 // Get the lo-lo products
20731 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20732 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20733 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20734 %}
20735 ins_pipe( pipe_slow );
20736 %}
20737
20738 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20739 predicate(UseAVX > 0 &&
20740 ((Matcher::vector_length_in_bytes(n) == 64 &&
20741 !VM_Version::supports_avx512dq()) ||
20742 (Matcher::vector_length_in_bytes(n) < 64 &&
20743 !VM_Version::supports_avx512vldq())));
20744 match(Set dst (MulVL src1 src2));
20745 effect(TEMP xtmp1, TEMP xtmp2);
20746 ins_cost(500);
20747 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20748 ins_encode %{
20749 int vlen_enc = vector_length_encoding(this);
20750 // Get the lo-hi products, only the lower 32 bits is in concerns
20751 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20752 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20753 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20754 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20755 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20756 // Get the lo-lo products
20757 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20758 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20759 %}
20760 ins_pipe( pipe_slow );
20761 %}
20762
20763 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20764 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20765 match(Set dst (MulVL src1 src2));
20766 ins_cost(100);
20767 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20768 ins_encode %{
20769 int vlen_enc = vector_length_encoding(this);
20770 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20771 %}
20772 ins_pipe( pipe_slow );
20773 %}
20774
20775 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20776 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20777 match(Set dst (MulVL src1 src2));
20778 ins_cost(100);
20779 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20780 ins_encode %{
20781 int vlen_enc = vector_length_encoding(this);
20782 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20783 %}
20784 ins_pipe( pipe_slow );
20785 %}
20786
20787 // Floats vector mul
20788 instruct vmulF(vec dst, vec src) %{
20789 predicate(UseAVX == 0);
20790 match(Set dst (MulVF dst src));
20791 format %{ "mulps $dst,$src\t! mul packedF" %}
20792 ins_encode %{
20793 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20794 %}
20795 ins_pipe( pipe_slow );
20796 %}
20797
20798 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20799 predicate(UseAVX > 0);
20800 match(Set dst (MulVF src1 src2));
20801 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20802 ins_encode %{
20803 int vlen_enc = vector_length_encoding(this);
20804 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20805 %}
20806 ins_pipe( pipe_slow );
20807 %}
20808
20809 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20810 predicate((UseAVX > 0) &&
20811 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20812 match(Set dst (MulVF src (LoadVector mem)));
20813 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20814 ins_encode %{
20815 int vlen_enc = vector_length_encoding(this);
20816 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20817 %}
20818 ins_pipe( pipe_slow );
20819 %}
20820
20821 // Doubles vector mul
20822 instruct vmulD(vec dst, vec src) %{
20823 predicate(UseAVX == 0);
20824 match(Set dst (MulVD dst src));
20825 format %{ "mulpd $dst,$src\t! mul packedD" %}
20826 ins_encode %{
20827 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20828 %}
20829 ins_pipe( pipe_slow );
20830 %}
20831
20832 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20833 predicate(UseAVX > 0);
20834 match(Set dst (MulVD src1 src2));
20835 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20836 ins_encode %{
20837 int vlen_enc = vector_length_encoding(this);
20838 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20839 %}
20840 ins_pipe( pipe_slow );
20841 %}
20842
20843 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20844 predicate((UseAVX > 0) &&
20845 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20846 match(Set dst (MulVD src (LoadVector mem)));
20847 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20848 ins_encode %{
20849 int vlen_enc = vector_length_encoding(this);
20850 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20851 %}
20852 ins_pipe( pipe_slow );
20853 %}
20854
20855 // --------------------------------- DIV --------------------------------------
20856
20857 // Floats vector div
20858 instruct vdivF(vec dst, vec src) %{
20859 predicate(UseAVX == 0);
20860 match(Set dst (DivVF dst src));
20861 format %{ "divps $dst,$src\t! div packedF" %}
20862 ins_encode %{
20863 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20864 %}
20865 ins_pipe( pipe_slow );
20866 %}
20867
20868 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20869 predicate(UseAVX > 0);
20870 match(Set dst (DivVF src1 src2));
20871 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20872 ins_encode %{
20873 int vlen_enc = vector_length_encoding(this);
20874 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20875 %}
20876 ins_pipe( pipe_slow );
20877 %}
20878
20879 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20880 predicate((UseAVX > 0) &&
20881 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20882 match(Set dst (DivVF src (LoadVector mem)));
20883 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20884 ins_encode %{
20885 int vlen_enc = vector_length_encoding(this);
20886 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20887 %}
20888 ins_pipe( pipe_slow );
20889 %}
20890
20891 // Doubles vector div
20892 instruct vdivD(vec dst, vec src) %{
20893 predicate(UseAVX == 0);
20894 match(Set dst (DivVD dst src));
20895 format %{ "divpd $dst,$src\t! div packedD" %}
20896 ins_encode %{
20897 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20898 %}
20899 ins_pipe( pipe_slow );
20900 %}
20901
20902 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20903 predicate(UseAVX > 0);
20904 match(Set dst (DivVD src1 src2));
20905 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20906 ins_encode %{
20907 int vlen_enc = vector_length_encoding(this);
20908 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20909 %}
20910 ins_pipe( pipe_slow );
20911 %}
20912
20913 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20914 predicate((UseAVX > 0) &&
20915 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20916 match(Set dst (DivVD src (LoadVector mem)));
20917 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20918 ins_encode %{
20919 int vlen_enc = vector_length_encoding(this);
20920 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20921 %}
20922 ins_pipe( pipe_slow );
20923 %}
20924
20925 // ------------------------------ MinMax ---------------------------------------
20926
20927 // Byte, Short, Int vector Min/Max
20928 instruct minmax_reg_sse(vec dst, vec src) %{
20929 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20930 UseAVX == 0);
20931 match(Set dst (MinV dst src));
20932 match(Set dst (MaxV dst src));
20933 format %{ "vector_minmax $dst,$src\t! " %}
20934 ins_encode %{
20935 assert(UseSSE >= 4, "required");
20936
20937 int opcode = this->ideal_Opcode();
20938 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20939 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20940 %}
20941 ins_pipe( pipe_slow );
20942 %}
20943
20944 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20945 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20946 UseAVX > 0);
20947 match(Set dst (MinV src1 src2));
20948 match(Set dst (MaxV src1 src2));
20949 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20950 ins_encode %{
20951 int opcode = this->ideal_Opcode();
20952 int vlen_enc = vector_length_encoding(this);
20953 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20954
20955 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20956 %}
20957 ins_pipe( pipe_slow );
20958 %}
20959
20960 // Long vector Min/Max
20961 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20962 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20963 UseAVX == 0);
20964 match(Set dst (MinV dst src));
20965 match(Set dst (MaxV src dst));
20966 effect(TEMP dst, TEMP tmp);
20967 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20968 ins_encode %{
20969 assert(UseSSE >= 4, "required");
20970
20971 int opcode = this->ideal_Opcode();
20972 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20973 assert(elem_bt == T_LONG, "sanity");
20974
20975 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20976 %}
20977 ins_pipe( pipe_slow );
20978 %}
20979
20980 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20981 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20982 UseAVX > 0 && !VM_Version::supports_avx512vl());
20983 match(Set dst (MinV src1 src2));
20984 match(Set dst (MaxV src1 src2));
20985 effect(TEMP dst);
20986 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20987 ins_encode %{
20988 int vlen_enc = vector_length_encoding(this);
20989 int opcode = this->ideal_Opcode();
20990 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20991 assert(elem_bt == T_LONG, "sanity");
20992
20993 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20994 %}
20995 ins_pipe( pipe_slow );
20996 %}
20997
20998 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20999 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21000 Matcher::vector_element_basic_type(n) == T_LONG);
21001 match(Set dst (MinV src1 src2));
21002 match(Set dst (MaxV src1 src2));
21003 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21004 ins_encode %{
21005 assert(UseAVX > 2, "required");
21006
21007 int vlen_enc = vector_length_encoding(this);
21008 int opcode = this->ideal_Opcode();
21009 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21010 assert(elem_bt == T_LONG, "sanity");
21011
21012 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21013 %}
21014 ins_pipe( pipe_slow );
21015 %}
21016
21017 // Float/Double vector Min/Max
21018 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
21019 predicate(VM_Version::supports_avx10_2() &&
21020 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21021 match(Set dst (MinV a b));
21022 match(Set dst (MaxV a b));
21023 format %{ "vector_minmaxFP $dst, $a, $b" %}
21024 ins_encode %{
21025 int vlen_enc = vector_length_encoding(this);
21026 int opcode = this->ideal_Opcode();
21027 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21028 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21029 %}
21030 ins_pipe( pipe_slow );
21031 %}
21032
21033 // Float/Double vector Min/Max
21034 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21035 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21036 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21037 UseAVX > 0);
21038 match(Set dst (MinV a b));
21039 match(Set dst (MaxV a b));
21040 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21041 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21042 ins_encode %{
21043 assert(UseAVX > 0, "required");
21044
21045 int opcode = this->ideal_Opcode();
21046 int vlen_enc = vector_length_encoding(this);
21047 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21048
21049 __ vminmax_fp(opcode, elem_bt,
21050 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21051 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21052 %}
21053 ins_pipe( pipe_slow );
21054 %}
21055
21056 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21057 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21058 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21059 match(Set dst (MinV a b));
21060 match(Set dst (MaxV a b));
21061 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21062 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21063 ins_encode %{
21064 assert(UseAVX > 2, "required");
21065
21066 int opcode = this->ideal_Opcode();
21067 int vlen_enc = vector_length_encoding(this);
21068 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21069
21070 __ evminmax_fp(opcode, elem_bt,
21071 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21072 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21073 %}
21074 ins_pipe( pipe_slow );
21075 %}
21076
21077 // ------------------------------ Unsigned vector Min/Max ----------------------
21078
21079 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21080 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21081 match(Set dst (UMinV a b));
21082 match(Set dst (UMaxV a b));
21083 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21084 ins_encode %{
21085 int opcode = this->ideal_Opcode();
21086 int vlen_enc = vector_length_encoding(this);
21087 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21088 assert(is_integral_type(elem_bt), "");
21089 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21090 %}
21091 ins_pipe( pipe_slow );
21092 %}
21093
21094 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21095 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21096 match(Set dst (UMinV a (LoadVector b)));
21097 match(Set dst (UMaxV a (LoadVector b)));
21098 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21099 ins_encode %{
21100 int opcode = this->ideal_Opcode();
21101 int vlen_enc = vector_length_encoding(this);
21102 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21103 assert(is_integral_type(elem_bt), "");
21104 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21105 %}
21106 ins_pipe( pipe_slow );
21107 %}
21108
21109 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21110 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21111 match(Set dst (UMinV a b));
21112 match(Set dst (UMaxV a b));
21113 effect(TEMP xtmp1, TEMP xtmp2);
21114 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21115 ins_encode %{
21116 int opcode = this->ideal_Opcode();
21117 int vlen_enc = vector_length_encoding(this);
21118 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21119 %}
21120 ins_pipe( pipe_slow );
21121 %}
21122
21123 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21124 match(Set dst (UMinV (Binary dst src2) mask));
21125 match(Set dst (UMaxV (Binary dst src2) mask));
21126 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21127 ins_encode %{
21128 int vlen_enc = vector_length_encoding(this);
21129 BasicType bt = Matcher::vector_element_basic_type(this);
21130 int opc = this->ideal_Opcode();
21131 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21132 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21133 %}
21134 ins_pipe( pipe_slow );
21135 %}
21136
21137 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21138 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21139 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21140 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21141 ins_encode %{
21142 int vlen_enc = vector_length_encoding(this);
21143 BasicType bt = Matcher::vector_element_basic_type(this);
21144 int opc = this->ideal_Opcode();
21145 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21146 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21147 %}
21148 ins_pipe( pipe_slow );
21149 %}
21150
21151 // --------------------------------- Signum/CopySign ---------------------------
21152
21153 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21154 match(Set dst (SignumF dst (Binary zero one)));
21155 effect(KILL cr);
21156 format %{ "signumF $dst, $dst" %}
21157 ins_encode %{
21158 int opcode = this->ideal_Opcode();
21159 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21160 %}
21161 ins_pipe( pipe_slow );
21162 %}
21163
21164 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21165 match(Set dst (SignumD dst (Binary zero one)));
21166 effect(KILL cr);
21167 format %{ "signumD $dst, $dst" %}
21168 ins_encode %{
21169 int opcode = this->ideal_Opcode();
21170 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21171 %}
21172 ins_pipe( pipe_slow );
21173 %}
21174
21175 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21176 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21177 match(Set dst (SignumVF src (Binary zero one)));
21178 match(Set dst (SignumVD src (Binary zero one)));
21179 effect(TEMP dst, TEMP xtmp1);
21180 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21181 ins_encode %{
21182 int opcode = this->ideal_Opcode();
21183 int vec_enc = vector_length_encoding(this);
21184 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21185 $xtmp1$$XMMRegister, vec_enc);
21186 %}
21187 ins_pipe( pipe_slow );
21188 %}
21189
21190 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21191 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21192 match(Set dst (SignumVF src (Binary zero one)));
21193 match(Set dst (SignumVD src (Binary zero one)));
21194 effect(TEMP dst, TEMP ktmp1);
21195 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21196 ins_encode %{
21197 int opcode = this->ideal_Opcode();
21198 int vec_enc = vector_length_encoding(this);
21199 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21200 $ktmp1$$KRegister, vec_enc);
21201 %}
21202 ins_pipe( pipe_slow );
21203 %}
21204
21205 // ---------------------------------------
21206 // For copySign use 0xE4 as writemask for vpternlog
21207 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21208 // C (xmm2) is set to 0x7FFFFFFF
21209 // Wherever xmm2 is 0, we want to pick from B (sign)
21210 // Wherever xmm2 is 1, we want to pick from A (src)
21211 //
21212 // A B C Result
21213 // 0 0 0 0
21214 // 0 0 1 0
21215 // 0 1 0 1
21216 // 0 1 1 0
21217 // 1 0 0 0
21218 // 1 0 1 1
21219 // 1 1 0 1
21220 // 1 1 1 1
21221 //
21222 // Result going from high bit to low bit is 0x11100100 = 0xe4
21223 // ---------------------------------------
21224
21225 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21226 match(Set dst (CopySignF dst src));
21227 effect(TEMP tmp1, TEMP tmp2);
21228 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21229 ins_encode %{
21230 __ movl($tmp2$$Register, 0x7FFFFFFF);
21231 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21232 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21233 %}
21234 ins_pipe( pipe_slow );
21235 %}
21236
21237 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21238 match(Set dst (CopySignD dst (Binary src zero)));
21239 ins_cost(100);
21240 effect(TEMP tmp1, TEMP tmp2);
21241 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21242 ins_encode %{
21243 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21244 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21245 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21246 %}
21247 ins_pipe( pipe_slow );
21248 %}
21249
21250 //----------------------------- CompressBits/ExpandBits ------------------------
21251
21252 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21253 predicate(n->bottom_type()->isa_int());
21254 match(Set dst (CompressBits src mask));
21255 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21256 ins_encode %{
21257 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21258 %}
21259 ins_pipe( pipe_slow );
21260 %}
21261
21262 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21263 predicate(n->bottom_type()->isa_int());
21264 match(Set dst (ExpandBits src mask));
21265 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21266 ins_encode %{
21267 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21268 %}
21269 ins_pipe( pipe_slow );
21270 %}
21271
21272 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21273 predicate(n->bottom_type()->isa_int());
21274 match(Set dst (CompressBits src (LoadI mask)));
21275 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21276 ins_encode %{
21277 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21278 %}
21279 ins_pipe( pipe_slow );
21280 %}
21281
21282 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21283 predicate(n->bottom_type()->isa_int());
21284 match(Set dst (ExpandBits src (LoadI mask)));
21285 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21286 ins_encode %{
21287 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21288 %}
21289 ins_pipe( pipe_slow );
21290 %}
21291
21292 // --------------------------------- Sqrt --------------------------------------
21293
21294 instruct vsqrtF_reg(vec dst, vec src) %{
21295 match(Set dst (SqrtVF src));
21296 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21297 ins_encode %{
21298 assert(UseAVX > 0, "required");
21299 int vlen_enc = vector_length_encoding(this);
21300 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21301 %}
21302 ins_pipe( pipe_slow );
21303 %}
21304
21305 instruct vsqrtF_mem(vec dst, memory mem) %{
21306 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21307 match(Set dst (SqrtVF (LoadVector mem)));
21308 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21309 ins_encode %{
21310 assert(UseAVX > 0, "required");
21311 int vlen_enc = vector_length_encoding(this);
21312 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21313 %}
21314 ins_pipe( pipe_slow );
21315 %}
21316
21317 // Floating point vector sqrt
21318 instruct vsqrtD_reg(vec dst, vec src) %{
21319 match(Set dst (SqrtVD src));
21320 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21321 ins_encode %{
21322 assert(UseAVX > 0, "required");
21323 int vlen_enc = vector_length_encoding(this);
21324 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 instruct vsqrtD_mem(vec dst, memory mem) %{
21330 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21331 match(Set dst (SqrtVD (LoadVector mem)));
21332 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21333 ins_encode %{
21334 assert(UseAVX > 0, "required");
21335 int vlen_enc = vector_length_encoding(this);
21336 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21337 %}
21338 ins_pipe( pipe_slow );
21339 %}
21340
21341 // ------------------------------ Shift ---------------------------------------
21342
21343 // Left and right shift count vectors are the same on x86
21344 // (only lowest bits of xmm reg are used for count).
21345 instruct vshiftcnt(vec dst, rRegI cnt) %{
21346 match(Set dst (LShiftCntV cnt));
21347 match(Set dst (RShiftCntV cnt));
21348 format %{ "movdl $dst,$cnt\t! load shift count" %}
21349 ins_encode %{
21350 __ movdl($dst$$XMMRegister, $cnt$$Register);
21351 %}
21352 ins_pipe( pipe_slow );
21353 %}
21354
21355 // Byte vector shift
21356 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21357 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21358 match(Set dst ( LShiftVB src shift));
21359 match(Set dst ( RShiftVB src shift));
21360 match(Set dst (URShiftVB src shift));
21361 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21362 format %{"vector_byte_shift $dst,$src,$shift" %}
21363 ins_encode %{
21364 assert(UseSSE > 3, "required");
21365 int opcode = this->ideal_Opcode();
21366 bool sign = (opcode != Op_URShiftVB);
21367 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21368 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21369 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21370 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21371 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21372 %}
21373 ins_pipe( pipe_slow );
21374 %}
21375
21376 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21377 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21378 UseAVX <= 1);
21379 match(Set dst ( LShiftVB src shift));
21380 match(Set dst ( RShiftVB src shift));
21381 match(Set dst (URShiftVB src shift));
21382 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21383 format %{"vector_byte_shift $dst,$src,$shift" %}
21384 ins_encode %{
21385 assert(UseSSE > 3, "required");
21386 int opcode = this->ideal_Opcode();
21387 bool sign = (opcode != Op_URShiftVB);
21388 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21389 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21390 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21391 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21392 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21393 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21394 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21395 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21396 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21397 %}
21398 ins_pipe( pipe_slow );
21399 %}
21400
21401 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21402 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21403 UseAVX > 1);
21404 match(Set dst ( LShiftVB src shift));
21405 match(Set dst ( RShiftVB src shift));
21406 match(Set dst (URShiftVB src shift));
21407 effect(TEMP dst, TEMP tmp);
21408 format %{"vector_byte_shift $dst,$src,$shift" %}
21409 ins_encode %{
21410 int opcode = this->ideal_Opcode();
21411 bool sign = (opcode != Op_URShiftVB);
21412 int vlen_enc = Assembler::AVX_256bit;
21413 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21414 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21415 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21416 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21417 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21418 %}
21419 ins_pipe( pipe_slow );
21420 %}
21421
21422 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21423 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21424 match(Set dst ( LShiftVB src shift));
21425 match(Set dst ( RShiftVB src shift));
21426 match(Set dst (URShiftVB src shift));
21427 effect(TEMP dst, TEMP tmp);
21428 format %{"vector_byte_shift $dst,$src,$shift" %}
21429 ins_encode %{
21430 assert(UseAVX > 1, "required");
21431 int opcode = this->ideal_Opcode();
21432 bool sign = (opcode != Op_URShiftVB);
21433 int vlen_enc = Assembler::AVX_256bit;
21434 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21435 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21436 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21437 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21438 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21439 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21440 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21441 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21442 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21443 %}
21444 ins_pipe( pipe_slow );
21445 %}
21446
21447 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21448 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21449 match(Set dst ( LShiftVB src shift));
21450 match(Set dst (RShiftVB src shift));
21451 match(Set dst (URShiftVB src shift));
21452 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21453 format %{"vector_byte_shift $dst,$src,$shift" %}
21454 ins_encode %{
21455 assert(UseAVX > 2, "required");
21456 int opcode = this->ideal_Opcode();
21457 bool sign = (opcode != Op_URShiftVB);
21458 int vlen_enc = Assembler::AVX_512bit;
21459 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21460 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21461 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21462 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21463 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21464 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21465 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21466 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21467 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21468 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21469 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21470 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21471 %}
21472 ins_pipe( pipe_slow );
21473 %}
21474
21475 // Shorts vector logical right shift produces incorrect Java result
21476 // for negative data because java code convert short value into int with
21477 // sign extension before a shift. But char vectors are fine since chars are
21478 // unsigned values.
21479 // Shorts/Chars vector left shift
21480 instruct vshiftS(vec dst, vec src, vec shift) %{
21481 predicate(!n->as_ShiftV()->is_var_shift());
21482 match(Set dst ( LShiftVS src shift));
21483 match(Set dst ( RShiftVS src shift));
21484 match(Set dst (URShiftVS src shift));
21485 effect(TEMP dst, USE src, USE shift);
21486 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21487 ins_encode %{
21488 int opcode = this->ideal_Opcode();
21489 if (UseAVX > 0) {
21490 int vlen_enc = vector_length_encoding(this);
21491 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21492 } else {
21493 int vlen = Matcher::vector_length(this);
21494 if (vlen == 2) {
21495 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21496 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21497 } else if (vlen == 4) {
21498 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21499 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21500 } else {
21501 assert (vlen == 8, "sanity");
21502 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21503 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21504 }
21505 }
21506 %}
21507 ins_pipe( pipe_slow );
21508 %}
21509
21510 // Integers vector left shift
21511 instruct vshiftI(vec dst, vec src, vec shift) %{
21512 predicate(!n->as_ShiftV()->is_var_shift());
21513 match(Set dst ( LShiftVI src shift));
21514 match(Set dst ( RShiftVI src shift));
21515 match(Set dst (URShiftVI src shift));
21516 effect(TEMP dst, USE src, USE shift);
21517 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21518 ins_encode %{
21519 int opcode = this->ideal_Opcode();
21520 if (UseAVX > 0) {
21521 int vlen_enc = vector_length_encoding(this);
21522 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21523 } else {
21524 int vlen = Matcher::vector_length(this);
21525 if (vlen == 2) {
21526 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21527 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21528 } else {
21529 assert(vlen == 4, "sanity");
21530 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21531 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21532 }
21533 }
21534 %}
21535 ins_pipe( pipe_slow );
21536 %}
21537
21538 // Integers vector left constant shift
21539 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21540 match(Set dst (LShiftVI src (LShiftCntV shift)));
21541 match(Set dst (RShiftVI src (RShiftCntV shift)));
21542 match(Set dst (URShiftVI src (RShiftCntV shift)));
21543 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21544 ins_encode %{
21545 int opcode = this->ideal_Opcode();
21546 if (UseAVX > 0) {
21547 int vector_len = vector_length_encoding(this);
21548 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21549 } else {
21550 int vlen = Matcher::vector_length(this);
21551 if (vlen == 2) {
21552 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21553 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21554 } else {
21555 assert(vlen == 4, "sanity");
21556 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21557 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21558 }
21559 }
21560 %}
21561 ins_pipe( pipe_slow );
21562 %}
21563
21564 // Longs vector shift
21565 instruct vshiftL(vec dst, vec src, vec shift) %{
21566 predicate(!n->as_ShiftV()->is_var_shift());
21567 match(Set dst ( LShiftVL src shift));
21568 match(Set dst (URShiftVL src shift));
21569 effect(TEMP dst, USE src, USE shift);
21570 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21571 ins_encode %{
21572 int opcode = this->ideal_Opcode();
21573 if (UseAVX > 0) {
21574 int vlen_enc = vector_length_encoding(this);
21575 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21576 } else {
21577 assert(Matcher::vector_length(this) == 2, "");
21578 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21579 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21580 }
21581 %}
21582 ins_pipe( pipe_slow );
21583 %}
21584
21585 // Longs vector constant shift
21586 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21587 match(Set dst (LShiftVL src (LShiftCntV shift)));
21588 match(Set dst (URShiftVL src (RShiftCntV shift)));
21589 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21590 ins_encode %{
21591 int opcode = this->ideal_Opcode();
21592 if (UseAVX > 0) {
21593 int vector_len = vector_length_encoding(this);
21594 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21595 } else {
21596 assert(Matcher::vector_length(this) == 2, "");
21597 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21598 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21599 }
21600 %}
21601 ins_pipe( pipe_slow );
21602 %}
21603
21604 // -------------------ArithmeticRightShift -----------------------------------
21605 // Long vector arithmetic right shift
21606 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21607 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21608 match(Set dst (RShiftVL src shift));
21609 effect(TEMP dst, TEMP tmp);
21610 format %{ "vshiftq $dst,$src,$shift" %}
21611 ins_encode %{
21612 uint vlen = Matcher::vector_length(this);
21613 if (vlen == 2) {
21614 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21615 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21616 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21617 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21618 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21619 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21620 } else {
21621 assert(vlen == 4, "sanity");
21622 assert(UseAVX > 1, "required");
21623 int vlen_enc = Assembler::AVX_256bit;
21624 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21625 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21626 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21627 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21628 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21629 }
21630 %}
21631 ins_pipe( pipe_slow );
21632 %}
21633
21634 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21635 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21636 match(Set dst (RShiftVL src shift));
21637 format %{ "vshiftq $dst,$src,$shift" %}
21638 ins_encode %{
21639 int vlen_enc = vector_length_encoding(this);
21640 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21641 %}
21642 ins_pipe( pipe_slow );
21643 %}
21644
21645 // ------------------- Variable Shift -----------------------------
21646 // Byte variable shift
21647 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21648 predicate(Matcher::vector_length(n) <= 8 &&
21649 n->as_ShiftV()->is_var_shift() &&
21650 !VM_Version::supports_avx512bw());
21651 match(Set dst ( LShiftVB src shift));
21652 match(Set dst ( RShiftVB src shift));
21653 match(Set dst (URShiftVB src shift));
21654 effect(TEMP dst, TEMP vtmp);
21655 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21656 ins_encode %{
21657 assert(UseAVX >= 2, "required");
21658
21659 int opcode = this->ideal_Opcode();
21660 int vlen_enc = Assembler::AVX_128bit;
21661 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21662 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21663 %}
21664 ins_pipe( pipe_slow );
21665 %}
21666
21667 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21668 predicate(Matcher::vector_length(n) == 16 &&
21669 n->as_ShiftV()->is_var_shift() &&
21670 !VM_Version::supports_avx512bw());
21671 match(Set dst ( LShiftVB src shift));
21672 match(Set dst ( RShiftVB src shift));
21673 match(Set dst (URShiftVB src shift));
21674 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21675 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21676 ins_encode %{
21677 assert(UseAVX >= 2, "required");
21678
21679 int opcode = this->ideal_Opcode();
21680 int vlen_enc = Assembler::AVX_128bit;
21681 // Shift lower half and get word result in dst
21682 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21683
21684 // Shift upper half and get word result in vtmp1
21685 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21686 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21687 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21688
21689 // Merge and down convert the two word results to byte in dst
21690 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21691 %}
21692 ins_pipe( pipe_slow );
21693 %}
21694
21695 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21696 predicate(Matcher::vector_length(n) == 32 &&
21697 n->as_ShiftV()->is_var_shift() &&
21698 !VM_Version::supports_avx512bw());
21699 match(Set dst ( LShiftVB src shift));
21700 match(Set dst ( RShiftVB src shift));
21701 match(Set dst (URShiftVB src shift));
21702 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21703 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21704 ins_encode %{
21705 assert(UseAVX >= 2, "required");
21706
21707 int opcode = this->ideal_Opcode();
21708 int vlen_enc = Assembler::AVX_128bit;
21709 // Process lower 128 bits and get result in dst
21710 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21711 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21712 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21713 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21714 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21715
21716 // Process higher 128 bits and get result in vtmp3
21717 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21718 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21719 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21720 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21721 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21722 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21723 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21724
21725 // Merge the two results in dst
21726 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21727 %}
21728 ins_pipe( pipe_slow );
21729 %}
21730
21731 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21732 predicate(Matcher::vector_length(n) <= 32 &&
21733 n->as_ShiftV()->is_var_shift() &&
21734 VM_Version::supports_avx512bw());
21735 match(Set dst ( LShiftVB src shift));
21736 match(Set dst ( RShiftVB src shift));
21737 match(Set dst (URShiftVB src shift));
21738 effect(TEMP dst, TEMP vtmp);
21739 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21740 ins_encode %{
21741 assert(UseAVX > 2, "required");
21742
21743 int opcode = this->ideal_Opcode();
21744 int vlen_enc = vector_length_encoding(this);
21745 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21746 %}
21747 ins_pipe( pipe_slow );
21748 %}
21749
21750 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21751 predicate(Matcher::vector_length(n) == 64 &&
21752 n->as_ShiftV()->is_var_shift() &&
21753 VM_Version::supports_avx512bw());
21754 match(Set dst ( LShiftVB src shift));
21755 match(Set dst ( RShiftVB src shift));
21756 match(Set dst (URShiftVB src shift));
21757 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21758 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21759 ins_encode %{
21760 assert(UseAVX > 2, "required");
21761
21762 int opcode = this->ideal_Opcode();
21763 int vlen_enc = Assembler::AVX_256bit;
21764 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21765 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21766 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21767 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21768 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21769 %}
21770 ins_pipe( pipe_slow );
21771 %}
21772
21773 // Short variable shift
21774 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21775 predicate(Matcher::vector_length(n) <= 8 &&
21776 n->as_ShiftV()->is_var_shift() &&
21777 !VM_Version::supports_avx512bw());
21778 match(Set dst ( LShiftVS src shift));
21779 match(Set dst ( RShiftVS src shift));
21780 match(Set dst (URShiftVS src shift));
21781 effect(TEMP dst, TEMP vtmp);
21782 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21783 ins_encode %{
21784 assert(UseAVX >= 2, "required");
21785
21786 int opcode = this->ideal_Opcode();
21787 bool sign = (opcode != Op_URShiftVS);
21788 int vlen_enc = Assembler::AVX_256bit;
21789 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21790 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21791 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21792 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21793 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21794 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21795 %}
21796 ins_pipe( pipe_slow );
21797 %}
21798
21799 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21800 predicate(Matcher::vector_length(n) == 16 &&
21801 n->as_ShiftV()->is_var_shift() &&
21802 !VM_Version::supports_avx512bw());
21803 match(Set dst ( LShiftVS src shift));
21804 match(Set dst ( RShiftVS src shift));
21805 match(Set dst (URShiftVS src shift));
21806 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21807 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21808 ins_encode %{
21809 assert(UseAVX >= 2, "required");
21810
21811 int opcode = this->ideal_Opcode();
21812 bool sign = (opcode != Op_URShiftVS);
21813 int vlen_enc = Assembler::AVX_256bit;
21814 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21815 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21816 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21817 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21818 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21819
21820 // Shift upper half, with result in dst using vtmp1 as TEMP
21821 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21822 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21823 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21824 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21825 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21826 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21827
21828 // Merge lower and upper half result into dst
21829 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21830 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21831 %}
21832 ins_pipe( pipe_slow );
21833 %}
21834
21835 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21836 predicate(n->as_ShiftV()->is_var_shift() &&
21837 VM_Version::supports_avx512bw());
21838 match(Set dst ( LShiftVS src shift));
21839 match(Set dst ( RShiftVS src shift));
21840 match(Set dst (URShiftVS src shift));
21841 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21842 ins_encode %{
21843 assert(UseAVX > 2, "required");
21844
21845 int opcode = this->ideal_Opcode();
21846 int vlen_enc = vector_length_encoding(this);
21847 if (!VM_Version::supports_avx512vl()) {
21848 vlen_enc = Assembler::AVX_512bit;
21849 }
21850 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21851 %}
21852 ins_pipe( pipe_slow );
21853 %}
21854
21855 //Integer variable shift
21856 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21857 predicate(n->as_ShiftV()->is_var_shift());
21858 match(Set dst ( LShiftVI src shift));
21859 match(Set dst ( RShiftVI src shift));
21860 match(Set dst (URShiftVI src shift));
21861 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21862 ins_encode %{
21863 assert(UseAVX >= 2, "required");
21864
21865 int opcode = this->ideal_Opcode();
21866 int vlen_enc = vector_length_encoding(this);
21867 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21868 %}
21869 ins_pipe( pipe_slow );
21870 %}
21871
21872 //Long variable shift
21873 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21874 predicate(n->as_ShiftV()->is_var_shift());
21875 match(Set dst ( LShiftVL src shift));
21876 match(Set dst (URShiftVL src shift));
21877 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21878 ins_encode %{
21879 assert(UseAVX >= 2, "required");
21880
21881 int opcode = this->ideal_Opcode();
21882 int vlen_enc = vector_length_encoding(this);
21883 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21884 %}
21885 ins_pipe( pipe_slow );
21886 %}
21887
21888 //Long variable right shift arithmetic
21889 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21890 predicate(Matcher::vector_length(n) <= 4 &&
21891 n->as_ShiftV()->is_var_shift() &&
21892 UseAVX == 2);
21893 match(Set dst (RShiftVL src shift));
21894 effect(TEMP dst, TEMP vtmp);
21895 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21896 ins_encode %{
21897 int opcode = this->ideal_Opcode();
21898 int vlen_enc = vector_length_encoding(this);
21899 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21900 $vtmp$$XMMRegister);
21901 %}
21902 ins_pipe( pipe_slow );
21903 %}
21904
21905 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21906 predicate(n->as_ShiftV()->is_var_shift() &&
21907 UseAVX > 2);
21908 match(Set dst (RShiftVL src shift));
21909 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21910 ins_encode %{
21911 int opcode = this->ideal_Opcode();
21912 int vlen_enc = vector_length_encoding(this);
21913 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21914 %}
21915 ins_pipe( pipe_slow );
21916 %}
21917
21918 // --------------------------------- AND --------------------------------------
21919
21920 instruct vand(vec dst, vec src) %{
21921 predicate(UseAVX == 0);
21922 match(Set dst (AndV dst src));
21923 format %{ "pand $dst,$src\t! and vectors" %}
21924 ins_encode %{
21925 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21926 %}
21927 ins_pipe( pipe_slow );
21928 %}
21929
21930 instruct vand_reg(vec dst, vec src1, vec src2) %{
21931 predicate(UseAVX > 0);
21932 match(Set dst (AndV src1 src2));
21933 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21934 ins_encode %{
21935 int vlen_enc = vector_length_encoding(this);
21936 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21937 %}
21938 ins_pipe( pipe_slow );
21939 %}
21940
21941 instruct vand_mem(vec dst, vec src, memory mem) %{
21942 predicate((UseAVX > 0) &&
21943 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21944 match(Set dst (AndV src (LoadVector mem)));
21945 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21946 ins_encode %{
21947 int vlen_enc = vector_length_encoding(this);
21948 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21949 %}
21950 ins_pipe( pipe_slow );
21951 %}
21952
21953 // --------------------------------- OR ---------------------------------------
21954
21955 instruct vor(vec dst, vec src) %{
21956 predicate(UseAVX == 0);
21957 match(Set dst (OrV dst src));
21958 format %{ "por $dst,$src\t! or vectors" %}
21959 ins_encode %{
21960 __ por($dst$$XMMRegister, $src$$XMMRegister);
21961 %}
21962 ins_pipe( pipe_slow );
21963 %}
21964
21965 instruct vor_reg(vec dst, vec src1, vec src2) %{
21966 predicate(UseAVX > 0);
21967 match(Set dst (OrV src1 src2));
21968 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21969 ins_encode %{
21970 int vlen_enc = vector_length_encoding(this);
21971 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21972 %}
21973 ins_pipe( pipe_slow );
21974 %}
21975
21976 instruct vor_mem(vec dst, vec src, memory mem) %{
21977 predicate((UseAVX > 0) &&
21978 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21979 match(Set dst (OrV src (LoadVector mem)));
21980 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21981 ins_encode %{
21982 int vlen_enc = vector_length_encoding(this);
21983 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21984 %}
21985 ins_pipe( pipe_slow );
21986 %}
21987
21988 // --------------------------------- XOR --------------------------------------
21989
21990 instruct vxor(vec dst, vec src) %{
21991 predicate(UseAVX == 0);
21992 match(Set dst (XorV dst src));
21993 format %{ "pxor $dst,$src\t! xor vectors" %}
21994 ins_encode %{
21995 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21996 %}
21997 ins_pipe( pipe_slow );
21998 %}
21999
22000 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22001 predicate(UseAVX > 0);
22002 match(Set dst (XorV src1 src2));
22003 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22004 ins_encode %{
22005 int vlen_enc = vector_length_encoding(this);
22006 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22007 %}
22008 ins_pipe( pipe_slow );
22009 %}
22010
22011 instruct vxor_mem(vec dst, vec src, memory mem) %{
22012 predicate((UseAVX > 0) &&
22013 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22014 match(Set dst (XorV src (LoadVector mem)));
22015 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22016 ins_encode %{
22017 int vlen_enc = vector_length_encoding(this);
22018 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22019 %}
22020 ins_pipe( pipe_slow );
22021 %}
22022
22023 // --------------------------------- VectorCast --------------------------------------
22024
22025 instruct vcastBtoX(vec dst, vec src) %{
22026 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22027 match(Set dst (VectorCastB2X src));
22028 format %{ "vector_cast_b2x $dst,$src\t!" %}
22029 ins_encode %{
22030 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22031 int vlen_enc = vector_length_encoding(this);
22032 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033 %}
22034 ins_pipe( pipe_slow );
22035 %}
22036
22037 instruct vcastBtoD(legVec dst, legVec src) %{
22038 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22039 match(Set dst (VectorCastB2X src));
22040 format %{ "vector_cast_b2x $dst,$src\t!" %}
22041 ins_encode %{
22042 int vlen_enc = vector_length_encoding(this);
22043 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22044 %}
22045 ins_pipe( pipe_slow );
22046 %}
22047
22048 instruct castStoX(vec dst, vec src) %{
22049 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22050 Matcher::vector_length(n->in(1)) <= 8 && // src
22051 Matcher::vector_element_basic_type(n) == T_BYTE);
22052 match(Set dst (VectorCastS2X src));
22053 format %{ "vector_cast_s2x $dst,$src" %}
22054 ins_encode %{
22055 assert(UseAVX > 0, "required");
22056
22057 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22058 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22059 %}
22060 ins_pipe( pipe_slow );
22061 %}
22062
22063 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22064 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22065 Matcher::vector_length(n->in(1)) == 16 && // src
22066 Matcher::vector_element_basic_type(n) == T_BYTE);
22067 effect(TEMP dst, TEMP vtmp);
22068 match(Set dst (VectorCastS2X src));
22069 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22070 ins_encode %{
22071 assert(UseAVX > 0, "required");
22072
22073 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22074 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22075 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22076 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22077 %}
22078 ins_pipe( pipe_slow );
22079 %}
22080
22081 instruct vcastStoX_evex(vec dst, vec src) %{
22082 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22083 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22084 match(Set dst (VectorCastS2X src));
22085 format %{ "vector_cast_s2x $dst,$src\t!" %}
22086 ins_encode %{
22087 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22088 int src_vlen_enc = vector_length_encoding(this, $src);
22089 int vlen_enc = vector_length_encoding(this);
22090 switch (to_elem_bt) {
22091 case T_BYTE:
22092 if (!VM_Version::supports_avx512vl()) {
22093 vlen_enc = Assembler::AVX_512bit;
22094 }
22095 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22096 break;
22097 case T_INT:
22098 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22099 break;
22100 case T_FLOAT:
22101 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22102 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22103 break;
22104 case T_LONG:
22105 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22106 break;
22107 case T_DOUBLE: {
22108 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22109 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22110 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22111 break;
22112 }
22113 default:
22114 ShouldNotReachHere();
22115 }
22116 %}
22117 ins_pipe( pipe_slow );
22118 %}
22119
22120 instruct castItoX(vec dst, vec src) %{
22121 predicate(UseAVX <= 2 &&
22122 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22123 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22124 match(Set dst (VectorCastI2X src));
22125 format %{ "vector_cast_i2x $dst,$src" %}
22126 ins_encode %{
22127 assert(UseAVX > 0, "required");
22128
22129 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22130 int vlen_enc = vector_length_encoding(this, $src);
22131
22132 if (to_elem_bt == T_BYTE) {
22133 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22134 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22135 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22136 } else {
22137 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22138 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22139 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22140 }
22141 %}
22142 ins_pipe( pipe_slow );
22143 %}
22144
22145 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22146 predicate(UseAVX <= 2 &&
22147 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22148 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22149 match(Set dst (VectorCastI2X src));
22150 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22151 effect(TEMP dst, TEMP vtmp);
22152 ins_encode %{
22153 assert(UseAVX > 0, "required");
22154
22155 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156 int vlen_enc = vector_length_encoding(this, $src);
22157
22158 if (to_elem_bt == T_BYTE) {
22159 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22160 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22161 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22162 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163 } else {
22164 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22165 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22166 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22167 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22168 }
22169 %}
22170 ins_pipe( pipe_slow );
22171 %}
22172
22173 instruct vcastItoX_evex(vec dst, vec src) %{
22174 predicate(UseAVX > 2 ||
22175 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22176 match(Set dst (VectorCastI2X src));
22177 format %{ "vector_cast_i2x $dst,$src\t!" %}
22178 ins_encode %{
22179 assert(UseAVX > 0, "required");
22180
22181 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22182 int src_vlen_enc = vector_length_encoding(this, $src);
22183 int dst_vlen_enc = vector_length_encoding(this);
22184 switch (dst_elem_bt) {
22185 case T_BYTE:
22186 if (!VM_Version::supports_avx512vl()) {
22187 src_vlen_enc = Assembler::AVX_512bit;
22188 }
22189 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22190 break;
22191 case T_SHORT:
22192 if (!VM_Version::supports_avx512vl()) {
22193 src_vlen_enc = Assembler::AVX_512bit;
22194 }
22195 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22196 break;
22197 case T_FLOAT:
22198 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22199 break;
22200 case T_LONG:
22201 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22202 break;
22203 case T_DOUBLE:
22204 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22205 break;
22206 default:
22207 ShouldNotReachHere();
22208 }
22209 %}
22210 ins_pipe( pipe_slow );
22211 %}
22212
22213 instruct vcastLtoBS(vec dst, vec src) %{
22214 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22215 UseAVX <= 2);
22216 match(Set dst (VectorCastL2X src));
22217 format %{ "vector_cast_l2x $dst,$src" %}
22218 ins_encode %{
22219 assert(UseAVX > 0, "required");
22220
22221 int vlen = Matcher::vector_length_in_bytes(this, $src);
22222 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22223 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22224 : ExternalAddress(vector_int_to_short_mask());
22225 if (vlen <= 16) {
22226 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22227 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22228 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22229 } else {
22230 assert(vlen <= 32, "required");
22231 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22232 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22233 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22234 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22235 }
22236 if (to_elem_bt == T_BYTE) {
22237 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22238 }
22239 %}
22240 ins_pipe( pipe_slow );
22241 %}
22242
22243 instruct vcastLtoX_evex(vec dst, vec src) %{
22244 predicate(UseAVX > 2 ||
22245 (Matcher::vector_element_basic_type(n) == T_INT ||
22246 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22247 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22248 match(Set dst (VectorCastL2X src));
22249 format %{ "vector_cast_l2x $dst,$src\t!" %}
22250 ins_encode %{
22251 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22252 int vlen = Matcher::vector_length_in_bytes(this, $src);
22253 int vlen_enc = vector_length_encoding(this, $src);
22254 switch (to_elem_bt) {
22255 case T_BYTE:
22256 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22257 vlen_enc = Assembler::AVX_512bit;
22258 }
22259 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22260 break;
22261 case T_SHORT:
22262 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22263 vlen_enc = Assembler::AVX_512bit;
22264 }
22265 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22266 break;
22267 case T_INT:
22268 if (vlen == 8) {
22269 if ($dst$$XMMRegister != $src$$XMMRegister) {
22270 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22271 }
22272 } else if (vlen == 16) {
22273 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22274 } else if (vlen == 32) {
22275 if (UseAVX > 2) {
22276 if (!VM_Version::supports_avx512vl()) {
22277 vlen_enc = Assembler::AVX_512bit;
22278 }
22279 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22280 } else {
22281 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22282 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22283 }
22284 } else { // vlen == 64
22285 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22286 }
22287 break;
22288 case T_FLOAT:
22289 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22290 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22291 break;
22292 case T_DOUBLE:
22293 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22294 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22295 break;
22296
22297 default: assert(false, "%s", type2name(to_elem_bt));
22298 }
22299 %}
22300 ins_pipe( pipe_slow );
22301 %}
22302
22303 instruct vcastFtoD_reg(vec dst, vec src) %{
22304 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22305 match(Set dst (VectorCastF2X src));
22306 format %{ "vector_cast_f2d $dst,$src\t!" %}
22307 ins_encode %{
22308 int vlen_enc = vector_length_encoding(this);
22309 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22310 %}
22311 ins_pipe( pipe_slow );
22312 %}
22313
22314
22315 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22316 predicate(!VM_Version::supports_avx10_2() &&
22317 !VM_Version::supports_avx512vl() &&
22318 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22319 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22320 is_integral_type(Matcher::vector_element_basic_type(n)));
22321 match(Set dst (VectorCastF2X src));
22322 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22323 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22324 ins_encode %{
22325 int vlen_enc = vector_length_encoding(this, $src);
22326 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22327 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22328 // 32 bit addresses for register indirect addressing mode since stub constants
22329 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22330 // However, targets are free to increase this limit, but having a large code cache size
22331 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22332 // cap we save a temporary register allocation which in limiting case can prevent
22333 // spilling in high register pressure blocks.
22334 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22335 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22336 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22337 %}
22338 ins_pipe( pipe_slow );
22339 %}
22340
22341 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22342 predicate(!VM_Version::supports_avx10_2() &&
22343 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22344 is_integral_type(Matcher::vector_element_basic_type(n)));
22345 match(Set dst (VectorCastF2X src));
22346 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22347 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22348 ins_encode %{
22349 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22350 if (to_elem_bt == T_LONG) {
22351 int vlen_enc = vector_length_encoding(this);
22352 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22353 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22354 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22355 } else {
22356 int vlen_enc = vector_length_encoding(this, $src);
22357 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22358 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22359 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22360 }
22361 %}
22362 ins_pipe( pipe_slow );
22363 %}
22364
22365 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22366 predicate(VM_Version::supports_avx10_2() &&
22367 is_integral_type(Matcher::vector_element_basic_type(n)));
22368 match(Set dst (VectorCastF2X src));
22369 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22370 ins_encode %{
22371 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22372 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22373 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22374 %}
22375 ins_pipe( pipe_slow );
22376 %}
22377
22378 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22379 predicate(VM_Version::supports_avx10_2() &&
22380 is_integral_type(Matcher::vector_element_basic_type(n)));
22381 match(Set dst (VectorCastF2X (LoadVector src)));
22382 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22383 ins_encode %{
22384 int vlen = Matcher::vector_length(this);
22385 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22386 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22387 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22388 %}
22389 ins_pipe( pipe_slow );
22390 %}
22391
22392 instruct vcastDtoF_reg(vec dst, vec src) %{
22393 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22394 match(Set dst (VectorCastD2X src));
22395 format %{ "vector_cast_d2x $dst,$src\t!" %}
22396 ins_encode %{
22397 int vlen_enc = vector_length_encoding(this, $src);
22398 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22399 %}
22400 ins_pipe( pipe_slow );
22401 %}
22402
22403 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22404 predicate(!VM_Version::supports_avx10_2() &&
22405 !VM_Version::supports_avx512vl() &&
22406 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22407 is_integral_type(Matcher::vector_element_basic_type(n)));
22408 match(Set dst (VectorCastD2X src));
22409 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22410 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22411 ins_encode %{
22412 int vlen_enc = vector_length_encoding(this, $src);
22413 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22414 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22415 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22416 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22417 %}
22418 ins_pipe( pipe_slow );
22419 %}
22420
22421 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22422 predicate(!VM_Version::supports_avx10_2() &&
22423 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22424 is_integral_type(Matcher::vector_element_basic_type(n)));
22425 match(Set dst (VectorCastD2X src));
22426 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22427 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22428 ins_encode %{
22429 int vlen_enc = vector_length_encoding(this, $src);
22430 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22431 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22432 ExternalAddress(vector_float_signflip());
22433 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22434 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22435 %}
22436 ins_pipe( pipe_slow );
22437 %}
22438
22439 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22440 predicate(VM_Version::supports_avx10_2() &&
22441 is_integral_type(Matcher::vector_element_basic_type(n)));
22442 match(Set dst (VectorCastD2X src));
22443 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22444 ins_encode %{
22445 int vlen_enc = vector_length_encoding(this, $src);
22446 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22447 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22448 %}
22449 ins_pipe( pipe_slow );
22450 %}
22451
22452 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22453 predicate(VM_Version::supports_avx10_2() &&
22454 is_integral_type(Matcher::vector_element_basic_type(n)));
22455 match(Set dst (VectorCastD2X (LoadVector src)));
22456 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22457 ins_encode %{
22458 int vlen = Matcher::vector_length(this);
22459 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22460 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22461 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22462 %}
22463 ins_pipe( pipe_slow );
22464 %}
22465
22466 instruct vucast(vec dst, vec src) %{
22467 match(Set dst (VectorUCastB2X src));
22468 match(Set dst (VectorUCastS2X src));
22469 match(Set dst (VectorUCastI2X src));
22470 format %{ "vector_ucast $dst,$src\t!" %}
22471 ins_encode %{
22472 assert(UseAVX > 0, "required");
22473
22474 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22475 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22476 int vlen_enc = vector_length_encoding(this);
22477 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22478 %}
22479 ins_pipe( pipe_slow );
22480 %}
22481
22482 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22483 predicate(!VM_Version::supports_avx512vl() &&
22484 Matcher::vector_length_in_bytes(n) < 64 &&
22485 Matcher::vector_element_basic_type(n) == T_INT);
22486 match(Set dst (RoundVF src));
22487 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22488 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22489 ins_encode %{
22490 int vlen_enc = vector_length_encoding(this);
22491 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22492 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22493 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22494 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22495 %}
22496 ins_pipe( pipe_slow );
22497 %}
22498
22499 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22500 predicate((VM_Version::supports_avx512vl() ||
22501 Matcher::vector_length_in_bytes(n) == 64) &&
22502 Matcher::vector_element_basic_type(n) == T_INT);
22503 match(Set dst (RoundVF src));
22504 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22505 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22506 ins_encode %{
22507 int vlen_enc = vector_length_encoding(this);
22508 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22509 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22510 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22511 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22512 %}
22513 ins_pipe( pipe_slow );
22514 %}
22515
22516 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22517 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22518 match(Set dst (RoundVD src));
22519 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22520 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22521 ins_encode %{
22522 int vlen_enc = vector_length_encoding(this);
22523 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22524 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22525 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22526 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22527 %}
22528 ins_pipe( pipe_slow );
22529 %}
22530
22531 // --------------------------------- VectorMaskCmp --------------------------------------
22532
22533 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22534 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22535 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22536 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22537 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22538 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22539 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22540 ins_encode %{
22541 int vlen_enc = vector_length_encoding(this, $src1);
22542 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22543 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22544 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22545 } else {
22546 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22547 }
22548 %}
22549 ins_pipe( pipe_slow );
22550 %}
22551
22552 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22553 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22554 n->bottom_type()->isa_vectmask() == nullptr &&
22555 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22556 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22557 effect(TEMP ktmp);
22558 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22559 ins_encode %{
22560 int vlen_enc = Assembler::AVX_512bit;
22561 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22562 KRegister mask = k0; // The comparison itself is not being masked.
22563 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22564 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22565 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22566 } else {
22567 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22568 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22569 }
22570 %}
22571 ins_pipe( pipe_slow );
22572 %}
22573
22574 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22575 predicate(n->bottom_type()->isa_vectmask() &&
22576 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22577 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22578 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22579 ins_encode %{
22580 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22581 int vlen_enc = vector_length_encoding(this, $src1);
22582 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22583 KRegister mask = k0; // The comparison itself is not being masked.
22584 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22585 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22586 } else {
22587 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22588 }
22589 %}
22590 ins_pipe( pipe_slow );
22591 %}
22592
22593 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22594 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22595 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22596 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22597 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22598 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22599 (n->in(2)->get_int() == BoolTest::eq ||
22600 n->in(2)->get_int() == BoolTest::lt ||
22601 n->in(2)->get_int() == BoolTest::gt)); // cond
22602 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22603 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22604 ins_encode %{
22605 int vlen_enc = vector_length_encoding(this, $src1);
22606 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22607 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22608 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22609 %}
22610 ins_pipe( pipe_slow );
22611 %}
22612
22613 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22614 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22615 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22616 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22617 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22618 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22619 (n->in(2)->get_int() == BoolTest::ne ||
22620 n->in(2)->get_int() == BoolTest::le ||
22621 n->in(2)->get_int() == BoolTest::ge)); // cond
22622 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22623 effect(TEMP dst, TEMP xtmp);
22624 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22625 ins_encode %{
22626 int vlen_enc = vector_length_encoding(this, $src1);
22627 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22628 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22629 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22630 %}
22631 ins_pipe( pipe_slow );
22632 %}
22633
22634 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22635 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22636 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22637 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22638 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22639 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22640 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22641 effect(TEMP dst, TEMP xtmp);
22642 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22643 ins_encode %{
22644 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22645 int vlen_enc = vector_length_encoding(this, $src1);
22646 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22647 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22648
22649 if (vlen_enc == Assembler::AVX_128bit) {
22650 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22651 } else {
22652 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22653 }
22654 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22655 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22656 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22657 %}
22658 ins_pipe( pipe_slow );
22659 %}
22660
22661 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22662 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22663 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22664 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22665 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22666 effect(TEMP ktmp);
22667 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22668 ins_encode %{
22669 assert(UseAVX > 2, "required");
22670
22671 int vlen_enc = vector_length_encoding(this, $src1);
22672 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22673 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22674 KRegister mask = k0; // The comparison itself is not being masked.
22675 bool merge = false;
22676 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22677
22678 switch (src1_elem_bt) {
22679 case T_INT: {
22680 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22681 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22682 break;
22683 }
22684 case T_LONG: {
22685 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22686 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22687 break;
22688 }
22689 default: assert(false, "%s", type2name(src1_elem_bt));
22690 }
22691 %}
22692 ins_pipe( pipe_slow );
22693 %}
22694
22695
22696 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22697 predicate(n->bottom_type()->isa_vectmask() &&
22698 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22699 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22700 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22701 ins_encode %{
22702 assert(UseAVX > 2, "required");
22703 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22704
22705 int vlen_enc = vector_length_encoding(this, $src1);
22706 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22707 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22708 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22709
22710 // Comparison i
22711 switch (src1_elem_bt) {
22712 case T_BYTE: {
22713 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22714 break;
22715 }
22716 case T_SHORT: {
22717 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22718 break;
22719 }
22720 case T_INT: {
22721 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22722 break;
22723 }
22724 case T_LONG: {
22725 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22726 break;
22727 }
22728 default: assert(false, "%s", type2name(src1_elem_bt));
22729 }
22730 %}
22731 ins_pipe( pipe_slow );
22732 %}
22733
22734 // Extract
22735
22736 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22737 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22738 match(Set dst (ExtractI src idx));
22739 match(Set dst (ExtractS src idx));
22740 match(Set dst (ExtractB src idx));
22741 format %{ "extractI $dst,$src,$idx\t!" %}
22742 ins_encode %{
22743 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22744
22745 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22746 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22747 %}
22748 ins_pipe( pipe_slow );
22749 %}
22750
22751 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22752 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22753 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22754 match(Set dst (ExtractI src idx));
22755 match(Set dst (ExtractS src idx));
22756 match(Set dst (ExtractB src idx));
22757 effect(TEMP vtmp);
22758 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22759 ins_encode %{
22760 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22761
22762 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22763 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22764 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22765 %}
22766 ins_pipe( pipe_slow );
22767 %}
22768
22769 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22770 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22771 match(Set dst (ExtractL src idx));
22772 format %{ "extractL $dst,$src,$idx\t!" %}
22773 ins_encode %{
22774 assert(UseSSE >= 4, "required");
22775 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22776
22777 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22778 %}
22779 ins_pipe( pipe_slow );
22780 %}
22781
22782 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22783 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22784 Matcher::vector_length(n->in(1)) == 8); // src
22785 match(Set dst (ExtractL src idx));
22786 effect(TEMP vtmp);
22787 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22788 ins_encode %{
22789 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22790
22791 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22792 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22793 %}
22794 ins_pipe( pipe_slow );
22795 %}
22796
22797 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22798 predicate(Matcher::vector_length(n->in(1)) <= 4);
22799 match(Set dst (ExtractF src idx));
22800 effect(TEMP dst, TEMP vtmp);
22801 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22802 ins_encode %{
22803 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22804
22805 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22806 %}
22807 ins_pipe( pipe_slow );
22808 %}
22809
22810 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22811 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22812 Matcher::vector_length(n->in(1)/*src*/) == 16);
22813 match(Set dst (ExtractF src idx));
22814 effect(TEMP vtmp);
22815 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22816 ins_encode %{
22817 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22818
22819 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22820 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22821 %}
22822 ins_pipe( pipe_slow );
22823 %}
22824
22825 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22826 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22827 match(Set dst (ExtractD src idx));
22828 format %{ "extractD $dst,$src,$idx\t!" %}
22829 ins_encode %{
22830 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22831
22832 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22833 %}
22834 ins_pipe( pipe_slow );
22835 %}
22836
22837 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22838 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22839 Matcher::vector_length(n->in(1)) == 8); // src
22840 match(Set dst (ExtractD src idx));
22841 effect(TEMP vtmp);
22842 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22843 ins_encode %{
22844 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22845
22846 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22847 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22848 %}
22849 ins_pipe( pipe_slow );
22850 %}
22851
22852 // --------------------------------- Vector Blend --------------------------------------
22853
22854 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22855 predicate(UseAVX == 0);
22856 match(Set dst (VectorBlend (Binary dst src) mask));
22857 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22858 effect(TEMP tmp);
22859 ins_encode %{
22860 assert(UseSSE >= 4, "required");
22861
22862 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22863 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22864 }
22865 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22866 %}
22867 ins_pipe( pipe_slow );
22868 %}
22869
22870 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22871 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22872 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22873 Matcher::vector_length_in_bytes(n) <= 32 &&
22874 is_integral_type(Matcher::vector_element_basic_type(n)));
22875 match(Set dst (VectorBlend (Binary src1 src2) mask));
22876 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22877 ins_encode %{
22878 int vlen_enc = vector_length_encoding(this);
22879 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22880 %}
22881 ins_pipe( pipe_slow );
22882 %}
22883
22884 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22885 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22886 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22887 Matcher::vector_length_in_bytes(n) <= 32 &&
22888 !is_integral_type(Matcher::vector_element_basic_type(n)));
22889 match(Set dst (VectorBlend (Binary src1 src2) mask));
22890 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22891 ins_encode %{
22892 int vlen_enc = vector_length_encoding(this);
22893 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22894 %}
22895 ins_pipe( pipe_slow );
22896 %}
22897
22898 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22899 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22900 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22901 Matcher::vector_length_in_bytes(n) <= 32);
22902 match(Set dst (VectorBlend (Binary src1 src2) mask));
22903 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22904 effect(TEMP vtmp, TEMP dst);
22905 ins_encode %{
22906 int vlen_enc = vector_length_encoding(this);
22907 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22908 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22909 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22910 %}
22911 ins_pipe( pipe_slow );
22912 %}
22913
22914 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22915 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22916 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22917 match(Set dst (VectorBlend (Binary src1 src2) mask));
22918 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22919 effect(TEMP ktmp);
22920 ins_encode %{
22921 int vlen_enc = Assembler::AVX_512bit;
22922 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22923 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22924 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22925 %}
22926 ins_pipe( pipe_slow );
22927 %}
22928
22929
22930 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22931 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22932 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22933 VM_Version::supports_avx512bw()));
22934 match(Set dst (VectorBlend (Binary src1 src2) mask));
22935 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22936 ins_encode %{
22937 int vlen_enc = vector_length_encoding(this);
22938 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22939 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22940 %}
22941 ins_pipe( pipe_slow );
22942 %}
22943
22944 // --------------------------------- ABS --------------------------------------
22945 // a = |a|
22946 instruct vabsB_reg(vec dst, vec src) %{
22947 match(Set dst (AbsVB src));
22948 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22949 ins_encode %{
22950 uint vlen = Matcher::vector_length(this);
22951 if (vlen <= 16) {
22952 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22953 } else {
22954 int vlen_enc = vector_length_encoding(this);
22955 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956 }
22957 %}
22958 ins_pipe( pipe_slow );
22959 %}
22960
22961 instruct vabsS_reg(vec dst, vec src) %{
22962 match(Set dst (AbsVS src));
22963 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22964 ins_encode %{
22965 uint vlen = Matcher::vector_length(this);
22966 if (vlen <= 8) {
22967 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22968 } else {
22969 int vlen_enc = vector_length_encoding(this);
22970 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22971 }
22972 %}
22973 ins_pipe( pipe_slow );
22974 %}
22975
22976 instruct vabsI_reg(vec dst, vec src) %{
22977 match(Set dst (AbsVI src));
22978 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22979 ins_encode %{
22980 uint vlen = Matcher::vector_length(this);
22981 if (vlen <= 4) {
22982 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22983 } else {
22984 int vlen_enc = vector_length_encoding(this);
22985 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986 }
22987 %}
22988 ins_pipe( pipe_slow );
22989 %}
22990
22991 instruct vabsL_reg(vec dst, vec src) %{
22992 match(Set dst (AbsVL src));
22993 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22994 ins_encode %{
22995 assert(UseAVX > 2, "required");
22996 int vlen_enc = vector_length_encoding(this);
22997 if (!VM_Version::supports_avx512vl()) {
22998 vlen_enc = Assembler::AVX_512bit;
22999 }
23000 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23001 %}
23002 ins_pipe( pipe_slow );
23003 %}
23004
23005 // --------------------------------- ABSNEG --------------------------------------
23006
23007 instruct vabsnegF(vec dst, vec src) %{
23008 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23009 match(Set dst (AbsVF src));
23010 match(Set dst (NegVF src));
23011 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23012 ins_cost(150);
23013 ins_encode %{
23014 int opcode = this->ideal_Opcode();
23015 int vlen = Matcher::vector_length(this);
23016 if (vlen == 2) {
23017 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23018 } else {
23019 assert(vlen == 8 || vlen == 16, "required");
23020 int vlen_enc = vector_length_encoding(this);
23021 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23022 }
23023 %}
23024 ins_pipe( pipe_slow );
23025 %}
23026
23027 instruct vabsneg4F(vec dst) %{
23028 predicate(Matcher::vector_length(n) == 4);
23029 match(Set dst (AbsVF dst));
23030 match(Set dst (NegVF dst));
23031 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23032 ins_cost(150);
23033 ins_encode %{
23034 int opcode = this->ideal_Opcode();
23035 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23036 %}
23037 ins_pipe( pipe_slow );
23038 %}
23039
23040 instruct vabsnegD(vec dst, vec src) %{
23041 match(Set dst (AbsVD src));
23042 match(Set dst (NegVD src));
23043 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23044 ins_encode %{
23045 int opcode = this->ideal_Opcode();
23046 uint vlen = Matcher::vector_length(this);
23047 if (vlen == 2) {
23048 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23049 } else {
23050 int vlen_enc = vector_length_encoding(this);
23051 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23052 }
23053 %}
23054 ins_pipe( pipe_slow );
23055 %}
23056
23057 //------------------------------------- VectorTest --------------------------------------------
23058
23059 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23060 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23061 match(Set cr (VectorTest src1 src2));
23062 effect(TEMP vtmp);
23063 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23064 ins_encode %{
23065 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23066 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23067 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23068 %}
23069 ins_pipe( pipe_slow );
23070 %}
23071
23072 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23073 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23074 match(Set cr (VectorTest src1 src2));
23075 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23076 ins_encode %{
23077 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23078 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23079 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23080 %}
23081 ins_pipe( pipe_slow );
23082 %}
23083
23084 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23085 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23086 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23087 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23088 match(Set cr (VectorTest src1 src2));
23089 effect(TEMP tmp);
23090 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23091 ins_encode %{
23092 uint masklen = Matcher::vector_length(this, $src1);
23093 __ kmovwl($tmp$$Register, $src1$$KRegister);
23094 __ andl($tmp$$Register, (1 << masklen) - 1);
23095 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23096 %}
23097 ins_pipe( pipe_slow );
23098 %}
23099
23100 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23101 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23102 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23103 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23104 match(Set cr (VectorTest src1 src2));
23105 effect(TEMP tmp);
23106 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23107 ins_encode %{
23108 uint masklen = Matcher::vector_length(this, $src1);
23109 __ kmovwl($tmp$$Register, $src1$$KRegister);
23110 __ andl($tmp$$Register, (1 << masklen) - 1);
23111 %}
23112 ins_pipe( pipe_slow );
23113 %}
23114
23115 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23116 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23117 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23118 match(Set cr (VectorTest src1 src2));
23119 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23120 ins_encode %{
23121 uint masklen = Matcher::vector_length(this, $src1);
23122 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23123 %}
23124 ins_pipe( pipe_slow );
23125 %}
23126
23127 //------------------------------------- LoadMask --------------------------------------------
23128
23129 instruct loadMask(legVec dst, legVec src) %{
23130 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23131 match(Set dst (VectorLoadMask src));
23132 effect(TEMP dst);
23133 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23134 ins_encode %{
23135 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23136 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23137 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23138 %}
23139 ins_pipe( pipe_slow );
23140 %}
23141
23142 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23143 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23144 match(Set dst (VectorLoadMask src));
23145 effect(TEMP xtmp);
23146 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23147 ins_encode %{
23148 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23149 true, Assembler::AVX_512bit);
23150 %}
23151 ins_pipe( pipe_slow );
23152 %}
23153
23154 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23155 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23156 match(Set dst (VectorLoadMask src));
23157 effect(TEMP xtmp);
23158 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23159 ins_encode %{
23160 int vlen_enc = vector_length_encoding(in(1));
23161 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23162 false, vlen_enc);
23163 %}
23164 ins_pipe( pipe_slow );
23165 %}
23166
23167 //------------------------------------- StoreMask --------------------------------------------
23168
23169 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23170 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23171 match(Set dst (VectorStoreMask src size));
23172 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23173 ins_encode %{
23174 int vlen = Matcher::vector_length(this);
23175 if (vlen <= 16 && UseAVX <= 2) {
23176 assert(UseSSE >= 3, "required");
23177 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23178 } else {
23179 assert(UseAVX > 0, "required");
23180 int src_vlen_enc = vector_length_encoding(this, $src);
23181 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23182 }
23183 %}
23184 ins_pipe( pipe_slow );
23185 %}
23186
23187 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23188 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23189 match(Set dst (VectorStoreMask src size));
23190 effect(TEMP_DEF dst, TEMP xtmp);
23191 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23192 ins_encode %{
23193 int vlen_enc = Assembler::AVX_128bit;
23194 int vlen = Matcher::vector_length(this);
23195 if (vlen <= 8) {
23196 assert(UseSSE >= 3, "required");
23197 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23198 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23199 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23200 } else {
23201 assert(UseAVX > 0, "required");
23202 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23203 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23204 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23205 }
23206 %}
23207 ins_pipe( pipe_slow );
23208 %}
23209
23210 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23211 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23212 match(Set dst (VectorStoreMask src size));
23213 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23214 effect(TEMP_DEF dst, TEMP xtmp);
23215 ins_encode %{
23216 int vlen_enc = Assembler::AVX_128bit;
23217 int vlen = Matcher::vector_length(this);
23218 if (vlen <= 4) {
23219 assert(UseSSE >= 3, "required");
23220 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23221 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23222 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23223 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23224 } else {
23225 assert(UseAVX > 0, "required");
23226 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23227 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23228 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23229 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23230 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23231 }
23232 %}
23233 ins_pipe( pipe_slow );
23234 %}
23235
23236 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23237 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23238 match(Set dst (VectorStoreMask src size));
23239 effect(TEMP_DEF dst, TEMP xtmp);
23240 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23241 ins_encode %{
23242 assert(UseSSE >= 3, "required");
23243 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23244 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23245 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23246 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23247 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23248 %}
23249 ins_pipe( pipe_slow );
23250 %}
23251
23252 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23253 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23254 match(Set dst (VectorStoreMask src size));
23255 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23256 effect(TEMP_DEF dst, TEMP vtmp);
23257 ins_encode %{
23258 int vlen_enc = Assembler::AVX_128bit;
23259 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23260 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23261 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23262 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23263 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23264 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23265 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23266 %}
23267 ins_pipe( pipe_slow );
23268 %}
23269
23270 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23271 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23272 match(Set dst (VectorStoreMask src size));
23273 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23274 ins_encode %{
23275 int src_vlen_enc = vector_length_encoding(this, $src);
23276 int dst_vlen_enc = vector_length_encoding(this);
23277 if (!VM_Version::supports_avx512vl()) {
23278 src_vlen_enc = Assembler::AVX_512bit;
23279 }
23280 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23281 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23282 %}
23283 ins_pipe( pipe_slow );
23284 %}
23285
23286 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23287 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23288 match(Set dst (VectorStoreMask src size));
23289 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23290 ins_encode %{
23291 int src_vlen_enc = vector_length_encoding(this, $src);
23292 int dst_vlen_enc = vector_length_encoding(this);
23293 if (!VM_Version::supports_avx512vl()) {
23294 src_vlen_enc = Assembler::AVX_512bit;
23295 }
23296 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23297 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23298 %}
23299 ins_pipe( pipe_slow );
23300 %}
23301
23302 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23303 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23304 match(Set dst (VectorStoreMask mask size));
23305 effect(TEMP_DEF dst);
23306 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23307 ins_encode %{
23308 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23309 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23310 false, Assembler::AVX_512bit, noreg);
23311 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23312 %}
23313 ins_pipe( pipe_slow );
23314 %}
23315
23316 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23317 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23318 match(Set dst (VectorStoreMask mask size));
23319 effect(TEMP_DEF dst);
23320 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23321 ins_encode %{
23322 int dst_vlen_enc = vector_length_encoding(this);
23323 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23324 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 instruct vmaskcast_evex(kReg dst) %{
23330 match(Set dst (VectorMaskCast dst));
23331 ins_cost(0);
23332 format %{ "vector_mask_cast $dst" %}
23333 ins_encode %{
23334 // empty
23335 %}
23336 ins_pipe(empty);
23337 %}
23338
23339 instruct vmaskcast(vec dst) %{
23340 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23341 match(Set dst (VectorMaskCast dst));
23342 ins_cost(0);
23343 format %{ "vector_mask_cast $dst" %}
23344 ins_encode %{
23345 // empty
23346 %}
23347 ins_pipe(empty);
23348 %}
23349
23350 instruct vmaskcast_avx(vec dst, vec src) %{
23351 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23352 match(Set dst (VectorMaskCast src));
23353 format %{ "vector_mask_cast $dst, $src" %}
23354 ins_encode %{
23355 int vlen = Matcher::vector_length(this);
23356 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23357 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23358 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23359 %}
23360 ins_pipe(pipe_slow);
23361 %}
23362
23363 //-------------------------------- Load Iota Indices ----------------------------------
23364
23365 instruct loadIotaIndices(vec dst, immI_0 src) %{
23366 match(Set dst (VectorLoadConst src));
23367 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23368 ins_encode %{
23369 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23370 BasicType bt = Matcher::vector_element_basic_type(this);
23371 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23372 %}
23373 ins_pipe( pipe_slow );
23374 %}
23375
23376 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23377 match(Set dst (PopulateIndex src1 src2));
23378 effect(TEMP dst, TEMP vtmp);
23379 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23380 ins_encode %{
23381 assert($src2$$constant == 1, "required");
23382 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23383 int vlen_enc = vector_length_encoding(this);
23384 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23385 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23386 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23387 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23388 %}
23389 ins_pipe( pipe_slow );
23390 %}
23391
23392 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23393 match(Set dst (PopulateIndex src1 src2));
23394 effect(TEMP dst, TEMP vtmp);
23395 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23396 ins_encode %{
23397 assert($src2$$constant == 1, "required");
23398 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23399 int vlen_enc = vector_length_encoding(this);
23400 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23401 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23402 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23403 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23404 %}
23405 ins_pipe( pipe_slow );
23406 %}
23407
23408 //-------------------------------- Rearrange ----------------------------------
23409
23410 // LoadShuffle/Rearrange for Byte
23411 instruct rearrangeB(vec dst, vec shuffle) %{
23412 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23413 Matcher::vector_length(n) < 32);
23414 match(Set dst (VectorRearrange dst shuffle));
23415 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23416 ins_encode %{
23417 assert(UseSSE >= 4, "required");
23418 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23419 %}
23420 ins_pipe( pipe_slow );
23421 %}
23422
23423 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23424 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23425 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23426 match(Set dst (VectorRearrange src shuffle));
23427 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23428 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23429 ins_encode %{
23430 assert(UseAVX >= 2, "required");
23431 // Swap src into vtmp1
23432 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23433 // Shuffle swapped src to get entries from other 128 bit lane
23434 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23435 // Shuffle original src to get entries from self 128 bit lane
23436 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23437 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23438 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23439 // Perform the blend
23440 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23441 %}
23442 ins_pipe( pipe_slow );
23443 %}
23444
23445
23446 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23447 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23448 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23449 match(Set dst (VectorRearrange src shuffle));
23450 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23451 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23452 ins_encode %{
23453 int vlen_enc = vector_length_encoding(this);
23454 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23455 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23456 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23457 %}
23458 ins_pipe( pipe_slow );
23459 %}
23460
23461 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23462 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23463 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23464 match(Set dst (VectorRearrange src shuffle));
23465 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23466 ins_encode %{
23467 int vlen_enc = vector_length_encoding(this);
23468 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23469 %}
23470 ins_pipe( pipe_slow );
23471 %}
23472
23473 // LoadShuffle/Rearrange for Short
23474
23475 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23476 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23477 !VM_Version::supports_avx512bw());
23478 match(Set dst (VectorLoadShuffle src));
23479 effect(TEMP dst, TEMP vtmp);
23480 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23481 ins_encode %{
23482 // Create a byte shuffle mask from short shuffle mask
23483 // only byte shuffle instruction available on these platforms
23484 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23485 if (UseAVX == 0) {
23486 assert(vlen_in_bytes <= 16, "required");
23487 // Multiply each shuffle by two to get byte index
23488 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23489 __ psllw($vtmp$$XMMRegister, 1);
23490
23491 // Duplicate to create 2 copies of byte index
23492 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23493 __ psllw($dst$$XMMRegister, 8);
23494 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23495
23496 // Add one to get alternate byte index
23497 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23498 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23499 } else {
23500 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23501 int vlen_enc = vector_length_encoding(this);
23502 // Multiply each shuffle by two to get byte index
23503 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23504
23505 // Duplicate to create 2 copies of byte index
23506 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23507 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23508
23509 // Add one to get alternate byte index
23510 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23511 }
23512 %}
23513 ins_pipe( pipe_slow );
23514 %}
23515
23516 instruct rearrangeS(vec dst, vec shuffle) %{
23517 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23518 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23519 match(Set dst (VectorRearrange dst shuffle));
23520 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23521 ins_encode %{
23522 assert(UseSSE >= 4, "required");
23523 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23524 %}
23525 ins_pipe( pipe_slow );
23526 %}
23527
23528 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23529 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23530 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23531 match(Set dst (VectorRearrange src shuffle));
23532 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23533 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23534 ins_encode %{
23535 assert(UseAVX >= 2, "required");
23536 // Swap src into vtmp1
23537 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23538 // Shuffle swapped src to get entries from other 128 bit lane
23539 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23540 // Shuffle original src to get entries from self 128 bit lane
23541 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23542 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23543 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23544 // Perform the blend
23545 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23546 %}
23547 ins_pipe( pipe_slow );
23548 %}
23549
23550 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23551 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23552 VM_Version::supports_avx512bw());
23553 match(Set dst (VectorRearrange src shuffle));
23554 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23555 ins_encode %{
23556 int vlen_enc = vector_length_encoding(this);
23557 if (!VM_Version::supports_avx512vl()) {
23558 vlen_enc = Assembler::AVX_512bit;
23559 }
23560 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23561 %}
23562 ins_pipe( pipe_slow );
23563 %}
23564
23565 // LoadShuffle/Rearrange for Integer and Float
23566
23567 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23568 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23569 Matcher::vector_length(n) == 4 && UseAVX == 0);
23570 match(Set dst (VectorLoadShuffle src));
23571 effect(TEMP dst, TEMP vtmp);
23572 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23573 ins_encode %{
23574 assert(UseSSE >= 4, "required");
23575
23576 // Create a byte shuffle mask from int shuffle mask
23577 // only byte shuffle instruction available on these platforms
23578
23579 // Duplicate and multiply each shuffle by 4
23580 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23581 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23582 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23583 __ psllw($vtmp$$XMMRegister, 2);
23584
23585 // Duplicate again to create 4 copies of byte index
23586 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23587 __ psllw($dst$$XMMRegister, 8);
23588 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23589
23590 // Add 3,2,1,0 to get alternate byte index
23591 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23592 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23593 %}
23594 ins_pipe( pipe_slow );
23595 %}
23596
23597 instruct rearrangeI(vec dst, vec shuffle) %{
23598 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23599 UseAVX == 0);
23600 match(Set dst (VectorRearrange dst shuffle));
23601 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23602 ins_encode %{
23603 assert(UseSSE >= 4, "required");
23604 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23605 %}
23606 ins_pipe( pipe_slow );
23607 %}
23608
23609 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23610 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23611 UseAVX > 0);
23612 match(Set dst (VectorRearrange src shuffle));
23613 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23614 ins_encode %{
23615 int vlen_enc = vector_length_encoding(this);
23616 BasicType bt = Matcher::vector_element_basic_type(this);
23617 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23618 %}
23619 ins_pipe( pipe_slow );
23620 %}
23621
23622 // LoadShuffle/Rearrange for Long and Double
23623
23624 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23625 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23626 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23627 match(Set dst (VectorLoadShuffle src));
23628 effect(TEMP dst, TEMP vtmp);
23629 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23630 ins_encode %{
23631 assert(UseAVX >= 2, "required");
23632
23633 int vlen_enc = vector_length_encoding(this);
23634 // Create a double word shuffle mask from long shuffle mask
23635 // only double word shuffle instruction available on these platforms
23636
23637 // Multiply each shuffle by two to get double word index
23638 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23639
23640 // Duplicate each double word shuffle
23641 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23642 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23643
23644 // Add one to get alternate double word index
23645 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23646 %}
23647 ins_pipe( pipe_slow );
23648 %}
23649
23650 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23651 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23652 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23653 match(Set dst (VectorRearrange src shuffle));
23654 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23655 ins_encode %{
23656 assert(UseAVX >= 2, "required");
23657
23658 int vlen_enc = vector_length_encoding(this);
23659 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23660 %}
23661 ins_pipe( pipe_slow );
23662 %}
23663
23664 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23665 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23666 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23667 match(Set dst (VectorRearrange src shuffle));
23668 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23669 ins_encode %{
23670 assert(UseAVX > 2, "required");
23671
23672 int vlen_enc = vector_length_encoding(this);
23673 if (vlen_enc == Assembler::AVX_128bit) {
23674 vlen_enc = Assembler::AVX_256bit;
23675 }
23676 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23677 %}
23678 ins_pipe( pipe_slow );
23679 %}
23680
23681 // --------------------------------- FMA --------------------------------------
23682 // a * b + c
23683
23684 instruct vfmaF_reg(vec a, vec b, vec c) %{
23685 match(Set c (FmaVF c (Binary a b)));
23686 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23687 ins_cost(150);
23688 ins_encode %{
23689 assert(UseFMA, "not enabled");
23690 int vlen_enc = vector_length_encoding(this);
23691 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23692 %}
23693 ins_pipe( pipe_slow );
23694 %}
23695
23696 instruct vfmaF_mem(vec a, memory b, vec c) %{
23697 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23698 match(Set c (FmaVF c (Binary a (LoadVector b))));
23699 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23700 ins_cost(150);
23701 ins_encode %{
23702 assert(UseFMA, "not enabled");
23703 int vlen_enc = vector_length_encoding(this);
23704 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23705 %}
23706 ins_pipe( pipe_slow );
23707 %}
23708
23709 instruct vfmaD_reg(vec a, vec b, vec c) %{
23710 match(Set c (FmaVD c (Binary a b)));
23711 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23712 ins_cost(150);
23713 ins_encode %{
23714 assert(UseFMA, "not enabled");
23715 int vlen_enc = vector_length_encoding(this);
23716 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23717 %}
23718 ins_pipe( pipe_slow );
23719 %}
23720
23721 instruct vfmaD_mem(vec a, memory b, vec c) %{
23722 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23723 match(Set c (FmaVD c (Binary a (LoadVector b))));
23724 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23725 ins_cost(150);
23726 ins_encode %{
23727 assert(UseFMA, "not enabled");
23728 int vlen_enc = vector_length_encoding(this);
23729 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23730 %}
23731 ins_pipe( pipe_slow );
23732 %}
23733
23734 // --------------------------------- Vector Multiply Add --------------------------------------
23735
23736 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23737 predicate(UseAVX == 0);
23738 match(Set dst (MulAddVS2VI dst src1));
23739 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23740 ins_encode %{
23741 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23742 %}
23743 ins_pipe( pipe_slow );
23744 %}
23745
23746 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23747 predicate(UseAVX > 0);
23748 match(Set dst (MulAddVS2VI src1 src2));
23749 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23750 ins_encode %{
23751 int vlen_enc = vector_length_encoding(this);
23752 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23753 %}
23754 ins_pipe( pipe_slow );
23755 %}
23756
23757 // --------------------------------- Vector Multiply Add Add ----------------------------------
23758
23759 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23760 predicate(VM_Version::supports_avx512_vnni());
23761 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23762 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23763 ins_encode %{
23764 assert(UseAVX > 2, "required");
23765 int vlen_enc = vector_length_encoding(this);
23766 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23767 %}
23768 ins_pipe( pipe_slow );
23769 ins_cost(10);
23770 %}
23771
23772 // --------------------------------- PopCount --------------------------------------
23773
23774 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23775 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23776 match(Set dst (PopCountVI src));
23777 match(Set dst (PopCountVL src));
23778 format %{ "vector_popcount_integral $dst, $src" %}
23779 ins_encode %{
23780 int opcode = this->ideal_Opcode();
23781 int vlen_enc = vector_length_encoding(this, $src);
23782 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23783 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23784 %}
23785 ins_pipe( pipe_slow );
23786 %}
23787
23788 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23789 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23790 match(Set dst (PopCountVI src mask));
23791 match(Set dst (PopCountVL src mask));
23792 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23793 ins_encode %{
23794 int vlen_enc = vector_length_encoding(this, $src);
23795 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23796 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23797 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23798 %}
23799 ins_pipe( pipe_slow );
23800 %}
23801
23802 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23803 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23804 match(Set dst (PopCountVI src));
23805 match(Set dst (PopCountVL src));
23806 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23807 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23808 ins_encode %{
23809 int opcode = this->ideal_Opcode();
23810 int vlen_enc = vector_length_encoding(this, $src);
23811 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23812 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23813 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23814 %}
23815 ins_pipe( pipe_slow );
23816 %}
23817
23818 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23819
23820 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23821 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23822 Matcher::vector_length_in_bytes(n->in(1))));
23823 match(Set dst (CountTrailingZerosV src));
23824 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23825 ins_cost(400);
23826 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23827 ins_encode %{
23828 int vlen_enc = vector_length_encoding(this, $src);
23829 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23830 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23831 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23832 %}
23833 ins_pipe( pipe_slow );
23834 %}
23835
23836 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23837 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23838 VM_Version::supports_avx512cd() &&
23839 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23840 match(Set dst (CountTrailingZerosV src));
23841 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23842 ins_cost(400);
23843 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23844 ins_encode %{
23845 int vlen_enc = vector_length_encoding(this, $src);
23846 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23847 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23848 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23849 %}
23850 ins_pipe( pipe_slow );
23851 %}
23852
23853 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23854 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23855 match(Set dst (CountTrailingZerosV src));
23856 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23857 ins_cost(400);
23858 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23859 ins_encode %{
23860 int vlen_enc = vector_length_encoding(this, $src);
23861 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23862 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23863 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23864 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23865 %}
23866 ins_pipe( pipe_slow );
23867 %}
23868
23869 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23870 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23871 match(Set dst (CountTrailingZerosV src));
23872 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23873 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23874 ins_encode %{
23875 int vlen_enc = vector_length_encoding(this, $src);
23876 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23877 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23878 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23879 %}
23880 ins_pipe( pipe_slow );
23881 %}
23882
23883
23884 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23885
23886 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23887 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23888 effect(TEMP dst);
23889 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23890 ins_encode %{
23891 int vector_len = vector_length_encoding(this);
23892 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23893 %}
23894 ins_pipe( pipe_slow );
23895 %}
23896
23897 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23898 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23899 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23900 effect(TEMP dst);
23901 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23902 ins_encode %{
23903 int vector_len = vector_length_encoding(this);
23904 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23905 %}
23906 ins_pipe( pipe_slow );
23907 %}
23908
23909 // --------------------------------- Rotation Operations ----------------------------------
23910 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23911 match(Set dst (RotateLeftV src shift));
23912 match(Set dst (RotateRightV src shift));
23913 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23914 ins_encode %{
23915 int opcode = this->ideal_Opcode();
23916 int vector_len = vector_length_encoding(this);
23917 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23918 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23919 %}
23920 ins_pipe( pipe_slow );
23921 %}
23922
23923 instruct vprorate(vec dst, vec src, vec shift) %{
23924 match(Set dst (RotateLeftV src shift));
23925 match(Set dst (RotateRightV src shift));
23926 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23927 ins_encode %{
23928 int opcode = this->ideal_Opcode();
23929 int vector_len = vector_length_encoding(this);
23930 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23931 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23932 %}
23933 ins_pipe( pipe_slow );
23934 %}
23935
23936 // ---------------------------------- Masked Operations ------------------------------------
23937 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23938 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23939 match(Set dst (LoadVectorMasked mem mask));
23940 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23941 ins_encode %{
23942 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23943 int vlen_enc = vector_length_encoding(this);
23944 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23945 %}
23946 ins_pipe( pipe_slow );
23947 %}
23948
23949
23950 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23951 predicate(n->in(3)->bottom_type()->isa_vectmask());
23952 match(Set dst (LoadVectorMasked mem mask));
23953 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23954 ins_encode %{
23955 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23956 int vector_len = vector_length_encoding(this);
23957 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23958 %}
23959 ins_pipe( pipe_slow );
23960 %}
23961
23962 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23963 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23964 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23965 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23966 ins_encode %{
23967 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23968 int vlen_enc = vector_length_encoding(src_node);
23969 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23970 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23971 %}
23972 ins_pipe( pipe_slow );
23973 %}
23974
23975 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23976 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23977 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23978 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23979 ins_encode %{
23980 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23981 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23982 int vlen_enc = vector_length_encoding(src_node);
23983 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23984 %}
23985 ins_pipe( pipe_slow );
23986 %}
23987
23988 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23989 match(Set addr (VerifyVectorAlignment addr mask));
23990 effect(KILL cr);
23991 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23992 ins_encode %{
23993 Label Lskip;
23994 // check if masked bits of addr are zero
23995 __ testq($addr$$Register, $mask$$constant);
23996 __ jccb(Assembler::equal, Lskip);
23997 __ stop("verify_vector_alignment found a misaligned vector memory access");
23998 __ bind(Lskip);
23999 %}
24000 ins_pipe(pipe_slow);
24001 %}
24002
24003 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24004 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24005 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24006 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24007 ins_encode %{
24008 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24009 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24010
24011 Label DONE;
24012 int vlen_enc = vector_length_encoding(this, $src1);
24013 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24014
24015 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24016 __ mov64($dst$$Register, -1L);
24017 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24018 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24019 __ jccb(Assembler::carrySet, DONE);
24020 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24021 __ notq($dst$$Register);
24022 __ tzcntq($dst$$Register, $dst$$Register);
24023 __ bind(DONE);
24024 %}
24025 ins_pipe( pipe_slow );
24026 %}
24027
24028
24029 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24030 match(Set dst (VectorMaskGen len));
24031 effect(TEMP temp, KILL cr);
24032 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24033 ins_encode %{
24034 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24035 %}
24036 ins_pipe( pipe_slow );
24037 %}
24038
24039 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24040 match(Set dst (VectorMaskGen len));
24041 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24042 effect(TEMP temp);
24043 ins_encode %{
24044 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24045 __ kmovql($dst$$KRegister, $temp$$Register);
24046 %}
24047 ins_pipe( pipe_slow );
24048 %}
24049
24050 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24051 predicate(n->in(1)->bottom_type()->isa_vectmask());
24052 match(Set dst (VectorMaskToLong mask));
24053 effect(TEMP dst, KILL cr);
24054 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24055 ins_encode %{
24056 int opcode = this->ideal_Opcode();
24057 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24058 int mask_len = Matcher::vector_length(this, $mask);
24059 int mask_size = mask_len * type2aelembytes(mbt);
24060 int vlen_enc = vector_length_encoding(this, $mask);
24061 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24062 $dst$$Register, mask_len, mask_size, vlen_enc);
24063 %}
24064 ins_pipe( pipe_slow );
24065 %}
24066
24067 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24068 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24069 match(Set dst (VectorMaskToLong mask));
24070 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24071 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24072 ins_encode %{
24073 int opcode = this->ideal_Opcode();
24074 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24075 int mask_len = Matcher::vector_length(this, $mask);
24076 int vlen_enc = vector_length_encoding(this, $mask);
24077 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24078 $dst$$Register, mask_len, mbt, vlen_enc);
24079 %}
24080 ins_pipe( pipe_slow );
24081 %}
24082
24083 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24084 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24085 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24086 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24087 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24088 ins_encode %{
24089 int opcode = this->ideal_Opcode();
24090 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24091 int mask_len = Matcher::vector_length(this, $mask);
24092 int vlen_enc = vector_length_encoding(this, $mask);
24093 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24094 $dst$$Register, mask_len, mbt, vlen_enc);
24095 %}
24096 ins_pipe( pipe_slow );
24097 %}
24098
24099 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24100 predicate(n->in(1)->bottom_type()->isa_vectmask());
24101 match(Set dst (VectorMaskTrueCount mask));
24102 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24103 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24104 ins_encode %{
24105 int opcode = this->ideal_Opcode();
24106 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24107 int mask_len = Matcher::vector_length(this, $mask);
24108 int mask_size = mask_len * type2aelembytes(mbt);
24109 int vlen_enc = vector_length_encoding(this, $mask);
24110 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24111 $tmp$$Register, mask_len, mask_size, vlen_enc);
24112 %}
24113 ins_pipe( pipe_slow );
24114 %}
24115
24116 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24117 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24118 match(Set dst (VectorMaskTrueCount mask));
24119 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24120 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24121 ins_encode %{
24122 int opcode = this->ideal_Opcode();
24123 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24124 int mask_len = Matcher::vector_length(this, $mask);
24125 int vlen_enc = vector_length_encoding(this, $mask);
24126 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24127 $tmp$$Register, mask_len, mbt, vlen_enc);
24128 %}
24129 ins_pipe( pipe_slow );
24130 %}
24131
24132 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24133 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24134 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24135 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24136 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24137 ins_encode %{
24138 int opcode = this->ideal_Opcode();
24139 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24140 int mask_len = Matcher::vector_length(this, $mask);
24141 int vlen_enc = vector_length_encoding(this, $mask);
24142 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24143 $tmp$$Register, mask_len, mbt, vlen_enc);
24144 %}
24145 ins_pipe( pipe_slow );
24146 %}
24147
24148 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24149 predicate(n->in(1)->bottom_type()->isa_vectmask());
24150 match(Set dst (VectorMaskFirstTrue mask));
24151 match(Set dst (VectorMaskLastTrue mask));
24152 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24153 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24154 ins_encode %{
24155 int opcode = this->ideal_Opcode();
24156 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24157 int mask_len = Matcher::vector_length(this, $mask);
24158 int mask_size = mask_len * type2aelembytes(mbt);
24159 int vlen_enc = vector_length_encoding(this, $mask);
24160 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24161 $tmp$$Register, mask_len, mask_size, vlen_enc);
24162 %}
24163 ins_pipe( pipe_slow );
24164 %}
24165
24166 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24167 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24168 match(Set dst (VectorMaskFirstTrue mask));
24169 match(Set dst (VectorMaskLastTrue mask));
24170 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24171 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24172 ins_encode %{
24173 int opcode = this->ideal_Opcode();
24174 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24175 int mask_len = Matcher::vector_length(this, $mask);
24176 int vlen_enc = vector_length_encoding(this, $mask);
24177 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24178 $tmp$$Register, mask_len, mbt, vlen_enc);
24179 %}
24180 ins_pipe( pipe_slow );
24181 %}
24182
24183 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24184 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24185 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24186 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24187 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24188 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24189 ins_encode %{
24190 int opcode = this->ideal_Opcode();
24191 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24192 int mask_len = Matcher::vector_length(this, $mask);
24193 int vlen_enc = vector_length_encoding(this, $mask);
24194 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24195 $tmp$$Register, mask_len, mbt, vlen_enc);
24196 %}
24197 ins_pipe( pipe_slow );
24198 %}
24199
24200 // --------------------------------- Compress/Expand Operations ---------------------------
24201 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24202 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24203 match(Set dst (CompressV src mask));
24204 match(Set dst (ExpandV src mask));
24205 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24206 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24207 ins_encode %{
24208 int opcode = this->ideal_Opcode();
24209 int vlen_enc = vector_length_encoding(this);
24210 BasicType bt = Matcher::vector_element_basic_type(this);
24211 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24212 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24213 %}
24214 ins_pipe( pipe_slow );
24215 %}
24216
24217 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24218 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24219 match(Set dst (CompressV src mask));
24220 match(Set dst (ExpandV src mask));
24221 format %{ "vector_compress_expand $dst, $src, $mask" %}
24222 ins_encode %{
24223 int opcode = this->ideal_Opcode();
24224 int vector_len = vector_length_encoding(this);
24225 BasicType bt = Matcher::vector_element_basic_type(this);
24226 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24227 %}
24228 ins_pipe( pipe_slow );
24229 %}
24230
24231 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24232 match(Set dst (CompressM mask));
24233 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24234 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24235 ins_encode %{
24236 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24237 int mask_len = Matcher::vector_length(this);
24238 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24239 %}
24240 ins_pipe( pipe_slow );
24241 %}
24242
24243 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24244
24245 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24246 predicate(!VM_Version::supports_gfni());
24247 match(Set dst (ReverseV src));
24248 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24249 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24250 ins_encode %{
24251 int vec_enc = vector_length_encoding(this);
24252 BasicType bt = Matcher::vector_element_basic_type(this);
24253 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24254 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24255 %}
24256 ins_pipe( pipe_slow );
24257 %}
24258
24259 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24260 predicate(VM_Version::supports_gfni());
24261 match(Set dst (ReverseV src));
24262 effect(TEMP dst, TEMP xtmp);
24263 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24264 ins_encode %{
24265 int vec_enc = vector_length_encoding(this);
24266 BasicType bt = Matcher::vector_element_basic_type(this);
24267 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24268 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24269 $xtmp$$XMMRegister);
24270 %}
24271 ins_pipe( pipe_slow );
24272 %}
24273
24274 instruct vreverse_byte_reg(vec dst, vec src) %{
24275 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24276 match(Set dst (ReverseBytesV src));
24277 effect(TEMP dst);
24278 format %{ "vector_reverse_byte $dst, $src" %}
24279 ins_encode %{
24280 int vec_enc = vector_length_encoding(this);
24281 BasicType bt = Matcher::vector_element_basic_type(this);
24282 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24283 %}
24284 ins_pipe( pipe_slow );
24285 %}
24286
24287 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24288 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24289 match(Set dst (ReverseBytesV src));
24290 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24291 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24292 ins_encode %{
24293 int vec_enc = vector_length_encoding(this);
24294 BasicType bt = Matcher::vector_element_basic_type(this);
24295 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24296 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24297 %}
24298 ins_pipe( pipe_slow );
24299 %}
24300
24301 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24302
24303 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24304 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24305 Matcher::vector_length_in_bytes(n->in(1))));
24306 match(Set dst (CountLeadingZerosV src));
24307 format %{ "vector_count_leading_zeros $dst, $src" %}
24308 ins_encode %{
24309 int vlen_enc = vector_length_encoding(this, $src);
24310 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24311 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24312 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24313 %}
24314 ins_pipe( pipe_slow );
24315 %}
24316
24317 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24318 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24319 Matcher::vector_length_in_bytes(n->in(1))));
24320 match(Set dst (CountLeadingZerosV src mask));
24321 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24322 ins_encode %{
24323 int vlen_enc = vector_length_encoding(this, $src);
24324 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24325 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24326 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24327 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24328 %}
24329 ins_pipe( pipe_slow );
24330 %}
24331
24332 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24333 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24334 VM_Version::supports_avx512cd() &&
24335 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24336 match(Set dst (CountLeadingZerosV src));
24337 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24338 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24339 ins_encode %{
24340 int vlen_enc = vector_length_encoding(this, $src);
24341 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24342 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24343 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24344 %}
24345 ins_pipe( pipe_slow );
24346 %}
24347
24348 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24349 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24350 match(Set dst (CountLeadingZerosV src));
24351 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24352 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24353 ins_encode %{
24354 int vlen_enc = vector_length_encoding(this, $src);
24355 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24356 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24357 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24358 $rtmp$$Register, true, vlen_enc);
24359 %}
24360 ins_pipe( pipe_slow );
24361 %}
24362
24363 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24364 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24365 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24366 match(Set dst (CountLeadingZerosV src));
24367 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24368 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24369 ins_encode %{
24370 int vlen_enc = vector_length_encoding(this, $src);
24371 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24372 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24373 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24374 %}
24375 ins_pipe( pipe_slow );
24376 %}
24377
24378 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24379 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24380 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24381 match(Set dst (CountLeadingZerosV src));
24382 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24383 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24384 ins_encode %{
24385 int vlen_enc = vector_length_encoding(this, $src);
24386 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24387 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24388 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24389 %}
24390 ins_pipe( pipe_slow );
24391 %}
24392
24393 // ---------------------------------- Vector Masked Operations ------------------------------------
24394
24395 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24396 match(Set dst (AddVB (Binary dst src2) mask));
24397 match(Set dst (AddVS (Binary dst src2) mask));
24398 match(Set dst (AddVI (Binary dst src2) mask));
24399 match(Set dst (AddVL (Binary dst src2) mask));
24400 match(Set dst (AddVF (Binary dst src2) mask));
24401 match(Set dst (AddVD (Binary dst src2) mask));
24402 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24403 ins_encode %{
24404 int vlen_enc = vector_length_encoding(this);
24405 BasicType bt = Matcher::vector_element_basic_type(this);
24406 int opc = this->ideal_Opcode();
24407 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24409 %}
24410 ins_pipe( pipe_slow );
24411 %}
24412
24413 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24414 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24415 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24416 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24417 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24418 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24419 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24420 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24421 ins_encode %{
24422 int vlen_enc = vector_length_encoding(this);
24423 BasicType bt = Matcher::vector_element_basic_type(this);
24424 int opc = this->ideal_Opcode();
24425 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24427 %}
24428 ins_pipe( pipe_slow );
24429 %}
24430
24431 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24432 match(Set dst (XorV (Binary dst src2) mask));
24433 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24434 ins_encode %{
24435 int vlen_enc = vector_length_encoding(this);
24436 BasicType bt = Matcher::vector_element_basic_type(this);
24437 int opc = this->ideal_Opcode();
24438 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24439 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24440 %}
24441 ins_pipe( pipe_slow );
24442 %}
24443
24444 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24445 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24446 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24447 ins_encode %{
24448 int vlen_enc = vector_length_encoding(this);
24449 BasicType bt = Matcher::vector_element_basic_type(this);
24450 int opc = this->ideal_Opcode();
24451 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24452 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24453 %}
24454 ins_pipe( pipe_slow );
24455 %}
24456
24457 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24458 match(Set dst (OrV (Binary dst src2) mask));
24459 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24460 ins_encode %{
24461 int vlen_enc = vector_length_encoding(this);
24462 BasicType bt = Matcher::vector_element_basic_type(this);
24463 int opc = this->ideal_Opcode();
24464 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24465 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24466 %}
24467 ins_pipe( pipe_slow );
24468 %}
24469
24470 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24471 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24472 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24473 ins_encode %{
24474 int vlen_enc = vector_length_encoding(this);
24475 BasicType bt = Matcher::vector_element_basic_type(this);
24476 int opc = this->ideal_Opcode();
24477 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24479 %}
24480 ins_pipe( pipe_slow );
24481 %}
24482
24483 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24484 match(Set dst (AndV (Binary dst src2) mask));
24485 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24486 ins_encode %{
24487 int vlen_enc = vector_length_encoding(this);
24488 BasicType bt = Matcher::vector_element_basic_type(this);
24489 int opc = this->ideal_Opcode();
24490 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24492 %}
24493 ins_pipe( pipe_slow );
24494 %}
24495
24496 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24497 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24498 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24499 ins_encode %{
24500 int vlen_enc = vector_length_encoding(this);
24501 BasicType bt = Matcher::vector_element_basic_type(this);
24502 int opc = this->ideal_Opcode();
24503 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24504 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24505 %}
24506 ins_pipe( pipe_slow );
24507 %}
24508
24509 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24510 match(Set dst (SubVB (Binary dst src2) mask));
24511 match(Set dst (SubVS (Binary dst src2) mask));
24512 match(Set dst (SubVI (Binary dst src2) mask));
24513 match(Set dst (SubVL (Binary dst src2) mask));
24514 match(Set dst (SubVF (Binary dst src2) mask));
24515 match(Set dst (SubVD (Binary dst src2) mask));
24516 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24517 ins_encode %{
24518 int vlen_enc = vector_length_encoding(this);
24519 BasicType bt = Matcher::vector_element_basic_type(this);
24520 int opc = this->ideal_Opcode();
24521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24523 %}
24524 ins_pipe( pipe_slow );
24525 %}
24526
24527 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24528 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24529 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24530 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24531 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24532 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24533 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24534 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24535 ins_encode %{
24536 int vlen_enc = vector_length_encoding(this);
24537 BasicType bt = Matcher::vector_element_basic_type(this);
24538 int opc = this->ideal_Opcode();
24539 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24540 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24541 %}
24542 ins_pipe( pipe_slow );
24543 %}
24544
24545 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24546 match(Set dst (MulVS (Binary dst src2) mask));
24547 match(Set dst (MulVI (Binary dst src2) mask));
24548 match(Set dst (MulVL (Binary dst src2) mask));
24549 match(Set dst (MulVF (Binary dst src2) mask));
24550 match(Set dst (MulVD (Binary dst src2) mask));
24551 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24552 ins_encode %{
24553 int vlen_enc = vector_length_encoding(this);
24554 BasicType bt = Matcher::vector_element_basic_type(this);
24555 int opc = this->ideal_Opcode();
24556 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24557 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24558 %}
24559 ins_pipe( pipe_slow );
24560 %}
24561
24562 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24563 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24564 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24565 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24566 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24567 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24568 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24569 ins_encode %{
24570 int vlen_enc = vector_length_encoding(this);
24571 BasicType bt = Matcher::vector_element_basic_type(this);
24572 int opc = this->ideal_Opcode();
24573 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24574 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24575 %}
24576 ins_pipe( pipe_slow );
24577 %}
24578
24579 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24580 match(Set dst (SqrtVF dst mask));
24581 match(Set dst (SqrtVD dst mask));
24582 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24583 ins_encode %{
24584 int vlen_enc = vector_length_encoding(this);
24585 BasicType bt = Matcher::vector_element_basic_type(this);
24586 int opc = this->ideal_Opcode();
24587 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24588 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24589 %}
24590 ins_pipe( pipe_slow );
24591 %}
24592
24593 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24594 match(Set dst (DivVF (Binary dst src2) mask));
24595 match(Set dst (DivVD (Binary dst src2) mask));
24596 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24597 ins_encode %{
24598 int vlen_enc = vector_length_encoding(this);
24599 BasicType bt = Matcher::vector_element_basic_type(this);
24600 int opc = this->ideal_Opcode();
24601 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24603 %}
24604 ins_pipe( pipe_slow );
24605 %}
24606
24607 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24608 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24609 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24610 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24611 ins_encode %{
24612 int vlen_enc = vector_length_encoding(this);
24613 BasicType bt = Matcher::vector_element_basic_type(this);
24614 int opc = this->ideal_Opcode();
24615 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24616 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24617 %}
24618 ins_pipe( pipe_slow );
24619 %}
24620
24621
24622 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24623 match(Set dst (RotateLeftV (Binary dst shift) mask));
24624 match(Set dst (RotateRightV (Binary dst shift) mask));
24625 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24626 ins_encode %{
24627 int vlen_enc = vector_length_encoding(this);
24628 BasicType bt = Matcher::vector_element_basic_type(this);
24629 int opc = this->ideal_Opcode();
24630 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24631 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24632 %}
24633 ins_pipe( pipe_slow );
24634 %}
24635
24636 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24637 match(Set dst (RotateLeftV (Binary dst src2) mask));
24638 match(Set dst (RotateRightV (Binary dst src2) mask));
24639 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24640 ins_encode %{
24641 int vlen_enc = vector_length_encoding(this);
24642 BasicType bt = Matcher::vector_element_basic_type(this);
24643 int opc = this->ideal_Opcode();
24644 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24645 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24646 %}
24647 ins_pipe( pipe_slow );
24648 %}
24649
24650 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24651 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24652 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24653 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24654 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24655 ins_encode %{
24656 int vlen_enc = vector_length_encoding(this);
24657 BasicType bt = Matcher::vector_element_basic_type(this);
24658 int opc = this->ideal_Opcode();
24659 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24660 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24661 %}
24662 ins_pipe( pipe_slow );
24663 %}
24664
24665 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24666 predicate(!n->as_ShiftV()->is_var_shift());
24667 match(Set dst (LShiftVS (Binary dst src2) mask));
24668 match(Set dst (LShiftVI (Binary dst src2) mask));
24669 match(Set dst (LShiftVL (Binary dst src2) mask));
24670 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24671 ins_encode %{
24672 int vlen_enc = vector_length_encoding(this);
24673 BasicType bt = Matcher::vector_element_basic_type(this);
24674 int opc = this->ideal_Opcode();
24675 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24676 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24677 %}
24678 ins_pipe( pipe_slow );
24679 %}
24680
24681 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24682 predicate(n->as_ShiftV()->is_var_shift());
24683 match(Set dst (LShiftVS (Binary dst src2) mask));
24684 match(Set dst (LShiftVI (Binary dst src2) mask));
24685 match(Set dst (LShiftVL (Binary dst src2) mask));
24686 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24687 ins_encode %{
24688 int vlen_enc = vector_length_encoding(this);
24689 BasicType bt = Matcher::vector_element_basic_type(this);
24690 int opc = this->ideal_Opcode();
24691 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24692 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24693 %}
24694 ins_pipe( pipe_slow );
24695 %}
24696
24697 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24698 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24699 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24700 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24701 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24702 ins_encode %{
24703 int vlen_enc = vector_length_encoding(this);
24704 BasicType bt = Matcher::vector_element_basic_type(this);
24705 int opc = this->ideal_Opcode();
24706 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24707 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24708 %}
24709 ins_pipe( pipe_slow );
24710 %}
24711
24712 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24713 predicate(!n->as_ShiftV()->is_var_shift());
24714 match(Set dst (RShiftVS (Binary dst src2) mask));
24715 match(Set dst (RShiftVI (Binary dst src2) mask));
24716 match(Set dst (RShiftVL (Binary dst src2) mask));
24717 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24718 ins_encode %{
24719 int vlen_enc = vector_length_encoding(this);
24720 BasicType bt = Matcher::vector_element_basic_type(this);
24721 int opc = this->ideal_Opcode();
24722 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24724 %}
24725 ins_pipe( pipe_slow );
24726 %}
24727
24728 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24729 predicate(n->as_ShiftV()->is_var_shift());
24730 match(Set dst (RShiftVS (Binary dst src2) mask));
24731 match(Set dst (RShiftVI (Binary dst src2) mask));
24732 match(Set dst (RShiftVL (Binary dst src2) mask));
24733 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24734 ins_encode %{
24735 int vlen_enc = vector_length_encoding(this);
24736 BasicType bt = Matcher::vector_element_basic_type(this);
24737 int opc = this->ideal_Opcode();
24738 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24739 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24740 %}
24741 ins_pipe( pipe_slow );
24742 %}
24743
24744 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24745 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24746 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24747 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24748 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24749 ins_encode %{
24750 int vlen_enc = vector_length_encoding(this);
24751 BasicType bt = Matcher::vector_element_basic_type(this);
24752 int opc = this->ideal_Opcode();
24753 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24754 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24755 %}
24756 ins_pipe( pipe_slow );
24757 %}
24758
24759 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24760 predicate(!n->as_ShiftV()->is_var_shift());
24761 match(Set dst (URShiftVS (Binary dst src2) mask));
24762 match(Set dst (URShiftVI (Binary dst src2) mask));
24763 match(Set dst (URShiftVL (Binary dst src2) mask));
24764 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24765 ins_encode %{
24766 int vlen_enc = vector_length_encoding(this);
24767 BasicType bt = Matcher::vector_element_basic_type(this);
24768 int opc = this->ideal_Opcode();
24769 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24770 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24771 %}
24772 ins_pipe( pipe_slow );
24773 %}
24774
24775 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24776 predicate(n->as_ShiftV()->is_var_shift());
24777 match(Set dst (URShiftVS (Binary dst src2) mask));
24778 match(Set dst (URShiftVI (Binary dst src2) mask));
24779 match(Set dst (URShiftVL (Binary dst src2) mask));
24780 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24781 ins_encode %{
24782 int vlen_enc = vector_length_encoding(this);
24783 BasicType bt = Matcher::vector_element_basic_type(this);
24784 int opc = this->ideal_Opcode();
24785 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24787 %}
24788 ins_pipe( pipe_slow );
24789 %}
24790
24791 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24792 match(Set dst (MaxV (Binary dst src2) mask));
24793 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24794 ins_encode %{
24795 int vlen_enc = vector_length_encoding(this);
24796 BasicType bt = Matcher::vector_element_basic_type(this);
24797 int opc = this->ideal_Opcode();
24798 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24800 %}
24801 ins_pipe( pipe_slow );
24802 %}
24803
24804 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24805 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24806 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24807 ins_encode %{
24808 int vlen_enc = vector_length_encoding(this);
24809 BasicType bt = Matcher::vector_element_basic_type(this);
24810 int opc = this->ideal_Opcode();
24811 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24812 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24813 %}
24814 ins_pipe( pipe_slow );
24815 %}
24816
24817 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24818 match(Set dst (MinV (Binary dst src2) mask));
24819 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24820 ins_encode %{
24821 int vlen_enc = vector_length_encoding(this);
24822 BasicType bt = Matcher::vector_element_basic_type(this);
24823 int opc = this->ideal_Opcode();
24824 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24825 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24826 %}
24827 ins_pipe( pipe_slow );
24828 %}
24829
24830 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24831 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24832 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24833 ins_encode %{
24834 int vlen_enc = vector_length_encoding(this);
24835 BasicType bt = Matcher::vector_element_basic_type(this);
24836 int opc = this->ideal_Opcode();
24837 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24838 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24839 %}
24840 ins_pipe( pipe_slow );
24841 %}
24842
24843 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24844 match(Set dst (VectorRearrange (Binary dst src2) mask));
24845 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24846 ins_encode %{
24847 int vlen_enc = vector_length_encoding(this);
24848 BasicType bt = Matcher::vector_element_basic_type(this);
24849 int opc = this->ideal_Opcode();
24850 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24852 %}
24853 ins_pipe( pipe_slow );
24854 %}
24855
24856 instruct vabs_masked(vec dst, kReg mask) %{
24857 match(Set dst (AbsVB dst mask));
24858 match(Set dst (AbsVS dst mask));
24859 match(Set dst (AbsVI dst mask));
24860 match(Set dst (AbsVL dst mask));
24861 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24862 ins_encode %{
24863 int vlen_enc = vector_length_encoding(this);
24864 BasicType bt = Matcher::vector_element_basic_type(this);
24865 int opc = this->ideal_Opcode();
24866 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24868 %}
24869 ins_pipe( pipe_slow );
24870 %}
24871
24872 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24873 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24874 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24875 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24876 ins_encode %{
24877 assert(UseFMA, "Needs FMA instructions support.");
24878 int vlen_enc = vector_length_encoding(this);
24879 BasicType bt = Matcher::vector_element_basic_type(this);
24880 int opc = this->ideal_Opcode();
24881 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24882 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24883 %}
24884 ins_pipe( pipe_slow );
24885 %}
24886
24887 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24888 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24889 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24890 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24891 ins_encode %{
24892 assert(UseFMA, "Needs FMA instructions support.");
24893 int vlen_enc = vector_length_encoding(this);
24894 BasicType bt = Matcher::vector_element_basic_type(this);
24895 int opc = this->ideal_Opcode();
24896 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24897 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24898 %}
24899 ins_pipe( pipe_slow );
24900 %}
24901
24902 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24903 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24904 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24905 ins_encode %{
24906 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24907 int vlen_enc = vector_length_encoding(this, $src1);
24908 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24909
24910 // Comparison i
24911 switch (src1_elem_bt) {
24912 case T_BYTE: {
24913 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24914 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24915 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24916 break;
24917 }
24918 case T_SHORT: {
24919 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24920 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24921 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24922 break;
24923 }
24924 case T_INT: {
24925 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24926 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24927 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24928 break;
24929 }
24930 case T_LONG: {
24931 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24932 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24933 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24934 break;
24935 }
24936 case T_FLOAT: {
24937 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24938 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24939 break;
24940 }
24941 case T_DOUBLE: {
24942 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24943 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24944 break;
24945 }
24946 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24947 }
24948 %}
24949 ins_pipe( pipe_slow );
24950 %}
24951
24952 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24953 predicate(Matcher::vector_length(n) <= 32);
24954 match(Set dst (MaskAll src));
24955 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24956 ins_encode %{
24957 int mask_len = Matcher::vector_length(this);
24958 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24959 %}
24960 ins_pipe( pipe_slow );
24961 %}
24962
24963 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24964 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24965 match(Set dst (XorVMask src (MaskAll cnt)));
24966 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24967 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24968 ins_encode %{
24969 uint masklen = Matcher::vector_length(this);
24970 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24971 %}
24972 ins_pipe( pipe_slow );
24973 %}
24974
24975 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24976 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24977 (Matcher::vector_length(n) == 16) ||
24978 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24979 match(Set dst (XorVMask src (MaskAll cnt)));
24980 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24981 ins_encode %{
24982 uint masklen = Matcher::vector_length(this);
24983 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24984 %}
24985 ins_pipe( pipe_slow );
24986 %}
24987
24988 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24989 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24990 match(Set dst (VectorLongToMask src));
24991 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24992 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24993 ins_encode %{
24994 int mask_len = Matcher::vector_length(this);
24995 int vec_enc = vector_length_encoding(mask_len);
24996 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24997 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24998 %}
24999 ins_pipe( pipe_slow );
25000 %}
25001
25002
25003 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25004 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25005 match(Set dst (VectorLongToMask src));
25006 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25007 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25008 ins_encode %{
25009 int mask_len = Matcher::vector_length(this);
25010 assert(mask_len <= 32, "invalid mask length");
25011 int vec_enc = vector_length_encoding(mask_len);
25012 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25013 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25014 %}
25015 ins_pipe( pipe_slow );
25016 %}
25017
25018 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25019 predicate(n->bottom_type()->isa_vectmask());
25020 match(Set dst (VectorLongToMask src));
25021 format %{ "long_to_mask_evex $dst, $src\t!" %}
25022 ins_encode %{
25023 __ kmov($dst$$KRegister, $src$$Register);
25024 %}
25025 ins_pipe( pipe_slow );
25026 %}
25027
25028 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25029 match(Set dst (AndVMask src1 src2));
25030 match(Set dst (OrVMask src1 src2));
25031 match(Set dst (XorVMask src1 src2));
25032 effect(TEMP kscratch);
25033 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25034 ins_encode %{
25035 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25036 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25037 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25038 uint masklen = Matcher::vector_length(this);
25039 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25040 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25041 %}
25042 ins_pipe( pipe_slow );
25043 %}
25044
25045 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25046 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25047 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25048 ins_encode %{
25049 int vlen_enc = vector_length_encoding(this);
25050 BasicType bt = Matcher::vector_element_basic_type(this);
25051 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25052 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25053 %}
25054 ins_pipe( pipe_slow );
25055 %}
25056
25057 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25058 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25059 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25060 ins_encode %{
25061 int vlen_enc = vector_length_encoding(this);
25062 BasicType bt = Matcher::vector_element_basic_type(this);
25063 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25064 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25065 %}
25066 ins_pipe( pipe_slow );
25067 %}
25068
25069 instruct castMM(kReg dst)
25070 %{
25071 match(Set dst (CastVV dst));
25072
25073 size(0);
25074 format %{ "# castVV of $dst" %}
25075 ins_encode(/* empty encoding */);
25076 ins_cost(0);
25077 ins_pipe(empty);
25078 %}
25079
25080 instruct castVV(vec dst)
25081 %{
25082 match(Set dst (CastVV dst));
25083
25084 size(0);
25085 format %{ "# castVV of $dst" %}
25086 ins_encode(/* empty encoding */);
25087 ins_cost(0);
25088 ins_pipe(empty);
25089 %}
25090
25091 instruct castVVLeg(legVec dst)
25092 %{
25093 match(Set dst (CastVV dst));
25094
25095 size(0);
25096 format %{ "# castVV of $dst" %}
25097 ins_encode(/* empty encoding */);
25098 ins_cost(0);
25099 ins_pipe(empty);
25100 %}
25101
25102 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25103 %{
25104 match(Set dst (IsInfiniteF src));
25105 effect(TEMP ktmp, KILL cr);
25106 format %{ "float_class_check $dst, $src" %}
25107 ins_encode %{
25108 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25109 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25110 %}
25111 ins_pipe(pipe_slow);
25112 %}
25113
25114 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25115 %{
25116 match(Set dst (IsInfiniteD src));
25117 effect(TEMP ktmp, KILL cr);
25118 format %{ "double_class_check $dst, $src" %}
25119 ins_encode %{
25120 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25121 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25122 %}
25123 ins_pipe(pipe_slow);
25124 %}
25125
25126 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25127 %{
25128 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25129 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25130 match(Set dst (SaturatingAddV src1 src2));
25131 match(Set dst (SaturatingSubV src1 src2));
25132 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25133 ins_encode %{
25134 int vlen_enc = vector_length_encoding(this);
25135 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25136 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25137 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25138 %}
25139 ins_pipe(pipe_slow);
25140 %}
25141
25142 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25143 %{
25144 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25145 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25146 match(Set dst (SaturatingAddV src1 src2));
25147 match(Set dst (SaturatingSubV src1 src2));
25148 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25149 ins_encode %{
25150 int vlen_enc = vector_length_encoding(this);
25151 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25152 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25153 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25154 %}
25155 ins_pipe(pipe_slow);
25156 %}
25157
25158 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25159 %{
25160 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25161 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25162 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25163 match(Set dst (SaturatingAddV src1 src2));
25164 match(Set dst (SaturatingSubV src1 src2));
25165 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25166 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25167 ins_encode %{
25168 int vlen_enc = vector_length_encoding(this);
25169 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25170 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25171 $src1$$XMMRegister, $src2$$XMMRegister,
25172 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25173 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25174 %}
25175 ins_pipe(pipe_slow);
25176 %}
25177
25178 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25179 %{
25180 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25181 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25182 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25183 match(Set dst (SaturatingAddV src1 src2));
25184 match(Set dst (SaturatingSubV src1 src2));
25185 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25186 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25187 ins_encode %{
25188 int vlen_enc = vector_length_encoding(this);
25189 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25190 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25191 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25192 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25193 %}
25194 ins_pipe(pipe_slow);
25195 %}
25196
25197 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25198 %{
25199 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25200 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25201 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25202 match(Set dst (SaturatingAddV src1 src2));
25203 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25204 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25205 ins_encode %{
25206 int vlen_enc = vector_length_encoding(this);
25207 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25208 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25209 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25210 %}
25211 ins_pipe(pipe_slow);
25212 %}
25213
25214 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25215 %{
25216 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25217 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25218 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25219 match(Set dst (SaturatingAddV src1 src2));
25220 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25221 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25222 ins_encode %{
25223 int vlen_enc = vector_length_encoding(this);
25224 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25226 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25227 %}
25228 ins_pipe(pipe_slow);
25229 %}
25230
25231 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25232 %{
25233 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25234 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25235 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25236 match(Set dst (SaturatingSubV src1 src2));
25237 effect(TEMP ktmp);
25238 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25239 ins_encode %{
25240 int vlen_enc = vector_length_encoding(this);
25241 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25242 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25243 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25244 %}
25245 ins_pipe(pipe_slow);
25246 %}
25247
25248 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25249 %{
25250 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25251 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25252 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25253 match(Set dst (SaturatingSubV src1 src2));
25254 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25255 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25256 ins_encode %{
25257 int vlen_enc = vector_length_encoding(this);
25258 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25259 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25260 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25261 %}
25262 ins_pipe(pipe_slow);
25263 %}
25264
25265 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25266 %{
25267 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25269 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25270 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25271 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25272 ins_encode %{
25273 int vlen_enc = vector_length_encoding(this);
25274 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25276 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25277 %}
25278 ins_pipe(pipe_slow);
25279 %}
25280
25281 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25282 %{
25283 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25284 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25285 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25286 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25287 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25288 ins_encode %{
25289 int vlen_enc = vector_length_encoding(this);
25290 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25291 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25292 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25293 %}
25294 ins_pipe(pipe_slow);
25295 %}
25296
25297 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25298 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25299 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25300 match(Set dst (SaturatingAddV (Binary dst src) mask));
25301 match(Set dst (SaturatingSubV (Binary dst src) mask));
25302 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25303 ins_encode %{
25304 int vlen_enc = vector_length_encoding(this);
25305 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25306 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25307 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25308 %}
25309 ins_pipe( pipe_slow );
25310 %}
25311
25312 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25313 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25314 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25315 match(Set dst (SaturatingAddV (Binary dst src) mask));
25316 match(Set dst (SaturatingSubV (Binary dst src) mask));
25317 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25318 ins_encode %{
25319 int vlen_enc = vector_length_encoding(this);
25320 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25321 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25322 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25323 %}
25324 ins_pipe( pipe_slow );
25325 %}
25326
25327 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25328 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25329 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25330 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25331 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25332 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25333 ins_encode %{
25334 int vlen_enc = vector_length_encoding(this);
25335 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25336 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25337 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25338 %}
25339 ins_pipe( pipe_slow );
25340 %}
25341
25342 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25343 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25344 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25345 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25346 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25347 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25348 ins_encode %{
25349 int vlen_enc = vector_length_encoding(this);
25350 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25351 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25352 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25353 %}
25354 ins_pipe( pipe_slow );
25355 %}
25356
25357 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25358 %{
25359 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25360 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25361 ins_encode %{
25362 int vlen_enc = vector_length_encoding(this);
25363 BasicType bt = Matcher::vector_element_basic_type(this);
25364 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25365 %}
25366 ins_pipe(pipe_slow);
25367 %}
25368
25369 instruct reinterpretS2HF(regF dst, rRegI src)
25370 %{
25371 match(Set dst (ReinterpretS2HF src));
25372 format %{ "vmovw $dst, $src" %}
25373 ins_encode %{
25374 __ vmovw($dst$$XMMRegister, $src$$Register);
25375 %}
25376 ins_pipe(pipe_slow);
25377 %}
25378
25379 instruct reinterpretHF2S(rRegI dst, regF src)
25380 %{
25381 match(Set dst (ReinterpretHF2S src));
25382 format %{ "vmovw $dst, $src" %}
25383 ins_encode %{
25384 __ vmovw($dst$$Register, $src$$XMMRegister);
25385 %}
25386 ins_pipe(pipe_slow);
25387 %}
25388
25389 instruct convF2HFAndS2HF(regF dst, regF src)
25390 %{
25391 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25392 format %{ "convF2HFAndS2HF $dst, $src" %}
25393 ins_encode %{
25394 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25395 %}
25396 ins_pipe(pipe_slow);
25397 %}
25398
25399 instruct convHF2SAndHF2F(regF dst, regF src)
25400 %{
25401 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25402 format %{ "convHF2SAndHF2F $dst, $src" %}
25403 ins_encode %{
25404 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25405 %}
25406 ins_pipe(pipe_slow);
25407 %}
25408
25409 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25410 %{
25411 match(Set dst (SqrtHF src));
25412 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25413 ins_encode %{
25414 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25415 %}
25416 ins_pipe(pipe_slow);
25417 %}
25418
25419 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25420 %{
25421 match(Set dst (AddHF src1 src2));
25422 match(Set dst (DivHF src1 src2));
25423 match(Set dst (MulHF src1 src2));
25424 match(Set dst (SubHF src1 src2));
25425 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25426 ins_encode %{
25427 int opcode = this->ideal_Opcode();
25428 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25429 %}
25430 ins_pipe(pipe_slow);
25431 %}
25432
25433 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25434 %{
25435 predicate(VM_Version::supports_avx10_2());
25436 match(Set dst (MaxHF src1 src2));
25437 match(Set dst (MinHF src1 src2));
25438 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25439 ins_encode %{
25440 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25441 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25442 %}
25443 ins_pipe( pipe_slow );
25444 %}
25445
25446 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25447 %{
25448 predicate(!VM_Version::supports_avx10_2());
25449 match(Set dst (MaxHF src1 src2));
25450 match(Set dst (MinHF src1 src2));
25451 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25452 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25453 ins_encode %{
25454 int opcode = this->ideal_Opcode();
25455 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25456 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25457 %}
25458 ins_pipe( pipe_slow );
25459 %}
25460
25461 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25462 %{
25463 match(Set dst (FmaHF src2 (Binary dst src1)));
25464 effect(DEF dst);
25465 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25466 ins_encode %{
25467 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25468 %}
25469 ins_pipe( pipe_slow );
25470 %}
25471
25472
25473 instruct vector_sqrt_HF_reg(vec dst, vec src)
25474 %{
25475 match(Set dst (SqrtVHF src));
25476 format %{ "vector_sqrt_fp16 $dst, $src" %}
25477 ins_encode %{
25478 int vlen_enc = vector_length_encoding(this);
25479 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25480 %}
25481 ins_pipe(pipe_slow);
25482 %}
25483
25484 instruct vector_sqrt_HF_mem(vec dst, memory src)
25485 %{
25486 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25487 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25488 ins_encode %{
25489 int vlen_enc = vector_length_encoding(this);
25490 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25491 %}
25492 ins_pipe(pipe_slow);
25493 %}
25494
25495 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25496 %{
25497 match(Set dst (AddVHF src1 src2));
25498 match(Set dst (DivVHF src1 src2));
25499 match(Set dst (MulVHF src1 src2));
25500 match(Set dst (SubVHF src1 src2));
25501 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25502 ins_encode %{
25503 int vlen_enc = vector_length_encoding(this);
25504 int opcode = this->ideal_Opcode();
25505 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25506 %}
25507 ins_pipe(pipe_slow);
25508 %}
25509
25510
25511 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25512 %{
25513 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25514 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25515 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25516 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25517 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25518 ins_encode %{
25519 int vlen_enc = vector_length_encoding(this);
25520 int opcode = this->ideal_Opcode();
25521 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25522 %}
25523 ins_pipe(pipe_slow);
25524 %}
25525
25526 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25527 %{
25528 match(Set dst (FmaVHF src2 (Binary dst src1)));
25529 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25530 ins_encode %{
25531 int vlen_enc = vector_length_encoding(this);
25532 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25533 %}
25534 ins_pipe( pipe_slow );
25535 %}
25536
25537 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25538 %{
25539 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25540 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25541 ins_encode %{
25542 int vlen_enc = vector_length_encoding(this);
25543 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25544 %}
25545 ins_pipe( pipe_slow );
25546 %}
25547
25548 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25549 %{
25550 predicate(VM_Version::supports_avx10_2());
25551 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25552 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25553 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25554 ins_encode %{
25555 int vlen_enc = vector_length_encoding(this);
25556 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25557 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25558 %}
25559 ins_pipe( pipe_slow );
25560 %}
25561
25562 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25563 %{
25564 predicate(VM_Version::supports_avx10_2());
25565 match(Set dst (MinVHF src1 src2));
25566 match(Set dst (MaxVHF src1 src2));
25567 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25568 ins_encode %{
25569 int vlen_enc = vector_length_encoding(this);
25570 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25571 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25572 %}
25573 ins_pipe( pipe_slow );
25574 %}
25575
25576 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25577 %{
25578 predicate(!VM_Version::supports_avx10_2());
25579 match(Set dst (MinVHF src1 src2));
25580 match(Set dst (MaxVHF src1 src2));
25581 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25582 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25583 ins_encode %{
25584 int vlen_enc = vector_length_encoding(this);
25585 int opcode = this->ideal_Opcode();
25586 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25587 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25588 %}
25589 ins_pipe( pipe_slow );
25590 %}
25591
25592 //----------PEEPHOLE RULES-----------------------------------------------------
25593 // These must follow all instruction definitions as they use the names
25594 // defined in the instructions definitions.
25595 //
25596 // peeppredicate ( rule_predicate );
25597 // // the predicate unless which the peephole rule will be ignored
25598 //
25599 // peepmatch ( root_instr_name [preceding_instruction]* );
25600 //
25601 // peepprocedure ( procedure_name );
25602 // // provide a procedure name to perform the optimization, the procedure should
25603 // // reside in the architecture dependent peephole file, the method has the
25604 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25605 // // with the arguments being the basic block, the current node index inside the
25606 // // block, the register allocator, the functions upon invoked return a new node
25607 // // defined in peepreplace, and the rules of the nodes appearing in the
25608 // // corresponding peepmatch, the function return true if successful, else
25609 // // return false
25610 //
25611 // peepconstraint %{
25612 // (instruction_number.operand_name relational_op instruction_number.operand_name
25613 // [, ...] );
25614 // // instruction numbers are zero-based using left to right order in peepmatch
25615 //
25616 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25617 // // provide an instruction_number.operand_name for each operand that appears
25618 // // in the replacement instruction's match rule
25619 //
25620 // ---------VM FLAGS---------------------------------------------------------
25621 //
25622 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25623 //
25624 // Each peephole rule is given an identifying number starting with zero and
25625 // increasing by one in the order seen by the parser. An individual peephole
25626 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25627 // on the command-line.
25628 //
25629 // ---------CURRENT LIMITATIONS----------------------------------------------
25630 //
25631 // Only transformations inside a basic block (do we need more for peephole)
25632 //
25633 // ---------EXAMPLE----------------------------------------------------------
25634 //
25635 // // pertinent parts of existing instructions in architecture description
25636 // instruct movI(rRegI dst, rRegI src)
25637 // %{
25638 // match(Set dst (CopyI src));
25639 // %}
25640 //
25641 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25642 // %{
25643 // match(Set dst (AddI dst src));
25644 // effect(KILL cr);
25645 // %}
25646 //
25647 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25648 // %{
25649 // match(Set dst (AddI dst src));
25650 // %}
25651 //
25652 // 1. Simple replacement
25653 // - Only match adjacent instructions in same basic block
25654 // - Only equality constraints
25655 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25656 // - Only one replacement instruction
25657 //
25658 // // Change (inc mov) to lea
25659 // peephole %{
25660 // // lea should only be emitted when beneficial
25661 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25662 // // increment preceded by register-register move
25663 // peepmatch ( incI_rReg movI );
25664 // // require that the destination register of the increment
25665 // // match the destination register of the move
25666 // peepconstraint ( 0.dst == 1.dst );
25667 // // construct a replacement instruction that sets
25668 // // the destination to ( move's source register + one )
25669 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25670 // %}
25671 //
25672 // 2. Procedural replacement
25673 // - More flexible finding relevent nodes
25674 // - More flexible constraints
25675 // - More flexible transformations
25676 // - May utilise architecture-dependent API more effectively
25677 // - Currently only one replacement instruction due to adlc parsing capabilities
25678 //
25679 // // Change (inc mov) to lea
25680 // peephole %{
25681 // // lea should only be emitted when beneficial
25682 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25683 // // the rule numbers of these nodes inside are passed into the function below
25684 // peepmatch ( incI_rReg movI );
25685 // // the method that takes the responsibility of transformation
25686 // peepprocedure ( inc_mov_to_lea );
25687 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25688 // // node is passed into the function above
25689 // peepreplace ( leaI_rReg_immI() );
25690 // %}
25691
25692 // These instructions is not matched by the matcher but used by the peephole
25693 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25694 %{
25695 predicate(false);
25696 match(Set dst (AddI src1 src2));
25697 format %{ "leal $dst, [$src1 + $src2]" %}
25698 ins_encode %{
25699 Register dst = $dst$$Register;
25700 Register src1 = $src1$$Register;
25701 Register src2 = $src2$$Register;
25702 if (src1 != rbp && src1 != r13) {
25703 __ leal(dst, Address(src1, src2, Address::times_1));
25704 } else {
25705 assert(src2 != rbp && src2 != r13, "");
25706 __ leal(dst, Address(src2, src1, Address::times_1));
25707 }
25708 %}
25709 ins_pipe(ialu_reg_reg);
25710 %}
25711
25712 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25713 %{
25714 predicate(false);
25715 match(Set dst (AddI src1 src2));
25716 format %{ "leal $dst, [$src1 + $src2]" %}
25717 ins_encode %{
25718 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25719 %}
25720 ins_pipe(ialu_reg_reg);
25721 %}
25722
25723 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25724 %{
25725 predicate(false);
25726 match(Set dst (LShiftI src shift));
25727 format %{ "leal $dst, [$src << $shift]" %}
25728 ins_encode %{
25729 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25730 Register src = $src$$Register;
25731 if (scale == Address::times_2 && src != rbp && src != r13) {
25732 __ leal($dst$$Register, Address(src, src, Address::times_1));
25733 } else {
25734 __ leal($dst$$Register, Address(noreg, src, scale));
25735 }
25736 %}
25737 ins_pipe(ialu_reg_reg);
25738 %}
25739
25740 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25741 %{
25742 predicate(false);
25743 match(Set dst (AddL src1 src2));
25744 format %{ "leaq $dst, [$src1 + $src2]" %}
25745 ins_encode %{
25746 Register dst = $dst$$Register;
25747 Register src1 = $src1$$Register;
25748 Register src2 = $src2$$Register;
25749 if (src1 != rbp && src1 != r13) {
25750 __ leaq(dst, Address(src1, src2, Address::times_1));
25751 } else {
25752 assert(src2 != rbp && src2 != r13, "");
25753 __ leaq(dst, Address(src2, src1, Address::times_1));
25754 }
25755 %}
25756 ins_pipe(ialu_reg_reg);
25757 %}
25758
25759 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25760 %{
25761 predicate(false);
25762 match(Set dst (AddL src1 src2));
25763 format %{ "leaq $dst, [$src1 + $src2]" %}
25764 ins_encode %{
25765 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25766 %}
25767 ins_pipe(ialu_reg_reg);
25768 %}
25769
25770 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25771 %{
25772 predicate(false);
25773 match(Set dst (LShiftL src shift));
25774 format %{ "leaq $dst, [$src << $shift]" %}
25775 ins_encode %{
25776 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25777 Register src = $src$$Register;
25778 if (scale == Address::times_2 && src != rbp && src != r13) {
25779 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25780 } else {
25781 __ leaq($dst$$Register, Address(noreg, src, scale));
25782 }
25783 %}
25784 ins_pipe(ialu_reg_reg);
25785 %}
25786
25787 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25788 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25789 // processors with at least partial ALU support for lea
25790 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25791 // beneficial for processors with full ALU support
25792 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25793
25794 peephole
25795 %{
25796 peeppredicate(VM_Version::supports_fast_2op_lea());
25797 peepmatch (addI_rReg);
25798 peepprocedure (lea_coalesce_reg);
25799 peepreplace (leaI_rReg_rReg_peep());
25800 %}
25801
25802 peephole
25803 %{
25804 peeppredicate(VM_Version::supports_fast_2op_lea());
25805 peepmatch (addI_rReg_imm);
25806 peepprocedure (lea_coalesce_imm);
25807 peepreplace (leaI_rReg_immI_peep());
25808 %}
25809
25810 peephole
25811 %{
25812 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25813 VM_Version::is_intel_cascade_lake());
25814 peepmatch (incI_rReg);
25815 peepprocedure (lea_coalesce_imm);
25816 peepreplace (leaI_rReg_immI_peep());
25817 %}
25818
25819 peephole
25820 %{
25821 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25822 VM_Version::is_intel_cascade_lake());
25823 peepmatch (decI_rReg);
25824 peepprocedure (lea_coalesce_imm);
25825 peepreplace (leaI_rReg_immI_peep());
25826 %}
25827
25828 peephole
25829 %{
25830 peeppredicate(VM_Version::supports_fast_2op_lea());
25831 peepmatch (salI_rReg_immI2);
25832 peepprocedure (lea_coalesce_imm);
25833 peepreplace (leaI_rReg_immI2_peep());
25834 %}
25835
25836 peephole
25837 %{
25838 peeppredicate(VM_Version::supports_fast_2op_lea());
25839 peepmatch (addL_rReg);
25840 peepprocedure (lea_coalesce_reg);
25841 peepreplace (leaL_rReg_rReg_peep());
25842 %}
25843
25844 peephole
25845 %{
25846 peeppredicate(VM_Version::supports_fast_2op_lea());
25847 peepmatch (addL_rReg_imm);
25848 peepprocedure (lea_coalesce_imm);
25849 peepreplace (leaL_rReg_immL32_peep());
25850 %}
25851
25852 peephole
25853 %{
25854 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25855 VM_Version::is_intel_cascade_lake());
25856 peepmatch (incL_rReg);
25857 peepprocedure (lea_coalesce_imm);
25858 peepreplace (leaL_rReg_immL32_peep());
25859 %}
25860
25861 peephole
25862 %{
25863 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25864 VM_Version::is_intel_cascade_lake());
25865 peepmatch (decL_rReg);
25866 peepprocedure (lea_coalesce_imm);
25867 peepreplace (leaL_rReg_immL32_peep());
25868 %}
25869
25870 peephole
25871 %{
25872 peeppredicate(VM_Version::supports_fast_2op_lea());
25873 peepmatch (salL_rReg_immI2);
25874 peepprocedure (lea_coalesce_imm);
25875 peepreplace (leaL_rReg_immI2_peep());
25876 %}
25877
25878 peephole
25879 %{
25880 peepmatch (leaPCompressedOopOffset);
25881 peepprocedure (lea_remove_redundant);
25882 %}
25883
25884 peephole
25885 %{
25886 peepmatch (leaP8Narrow);
25887 peepprocedure (lea_remove_redundant);
25888 %}
25889
25890 peephole
25891 %{
25892 peepmatch (leaP32Narrow);
25893 peepprocedure (lea_remove_redundant);
25894 %}
25895
25896 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25897 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25898
25899 //int variant
25900 peephole
25901 %{
25902 peepmatch (testI_reg);
25903 peepprocedure (test_may_remove);
25904 %}
25905
25906 //long variant
25907 peephole
25908 %{
25909 peepmatch (testL_reg);
25910 peepprocedure (test_may_remove);
25911 %}
25912
25913
25914 //----------SMARTSPILL RULES---------------------------------------------------
25915 // These must follow all instruction definitions as they use the names
25916 // defined in the instructions definitions.