1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 static bool is_ndd_demotable(const MachNode* mdef) {
2637 return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
2638 }
2639
2640 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
2641 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
2642 }
2643
2644 static bool is_demotion_candidate(const MachNode* mdef) {
2645 return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
2646 }
2647
2648 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2649 int oper_index) {
2650 if (mdef == nullptr) {
2651 return false;
2652 }
2653
2654 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2655 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2656 assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
2657 assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
2658 return false;
2659 }
2660
2661 // Complex memory operand covers multiple incoming edges needed for
2662 // address computation. Biasing def towards any address component will not
2663 // result in NDD demotion by assembler.
2664 if (mdef->operand_num_edges(oper_index) != 1) {
2665 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2666 return false;
2667 }
2668
2669 // Demotion candidate must be register mask compatible with definition.
2670 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2671 if (!oper_mask.overlap(mdef->out_RegMask())) {
2672 assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
2673 return false;
2674 }
2675
2676 switch (oper_index) {
2677 // First operand of MachNode corresponding to Intel APX NDD selection
2678 // pattern can share its assigned register with definition operand if
2679 // their live ranges do not overlap. In such a scenario we can demote
2680 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2681 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2682 // are decorated with a special flag by instruction selector.
2683 case 1:
2684 return is_demotion_candidate(mdef);
2685
2686 // Definition operand of commutative operation can be biased towards second
2687 // operand.
2688 case 2:
2689 return is_ndd_demotable_commutative(mdef);
2690
2691 // Current scheme only selects up to two biasing candidates
2692 default:
2693 assert(false, "unhandled operand index: %s", mdef->Name());
2694 break;
2695 }
2696
2697 return false;
2698 }
2699
2700 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2701 assert(EnableVectorSupport, "sanity");
2702 int lo = XMM0_num;
2703 int hi = XMM0b_num;
2704 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2705 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2706 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2707 return OptoRegPair(hi, lo);
2708 }
2709
2710 // Is this branch offset short enough that a short branch can be used?
2711 //
2712 // NOTE: If the platform does not provide any short branch variants, then
2713 // this method should return false for offset 0.
2714 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2715 // The passed offset is relative to address of the branch.
2716 // On 86 a branch displacement is calculated relative to address
2717 // of a next instruction.
2718 offset -= br_size;
2719
2720 // the short version of jmpConUCF2 contains multiple branches,
2721 // making the reach slightly less
2722 if (rule == jmpConUCF2_rule)
2723 return (-126 <= offset && offset <= 125);
2724 return (-128 <= offset && offset <= 127);
2725 }
2726
2727 // Return whether or not this register is ever used as an argument.
2728 // This function is used on startup to build the trampoline stubs in
2729 // generateOptoStub. Registers not mentioned will be killed by the VM
2730 // call in the trampoline, and arguments in those registers not be
2731 // available to the callee.
2732 bool Matcher::can_be_java_arg(int reg)
2733 {
2734 return
2735 reg == RDI_num || reg == RDI_H_num ||
2736 reg == RSI_num || reg == RSI_H_num ||
2737 reg == RDX_num || reg == RDX_H_num ||
2738 reg == RCX_num || reg == RCX_H_num ||
2739 reg == R8_num || reg == R8_H_num ||
2740 reg == R9_num || reg == R9_H_num ||
2741 reg == R12_num || reg == R12_H_num ||
2742 reg == XMM0_num || reg == XMM0b_num ||
2743 reg == XMM1_num || reg == XMM1b_num ||
2744 reg == XMM2_num || reg == XMM2b_num ||
2745 reg == XMM3_num || reg == XMM3b_num ||
2746 reg == XMM4_num || reg == XMM4b_num ||
2747 reg == XMM5_num || reg == XMM5b_num ||
2748 reg == XMM6_num || reg == XMM6b_num ||
2749 reg == XMM7_num || reg == XMM7b_num;
2750 }
2751
2752 bool Matcher::is_spillable_arg(int reg)
2753 {
2754 return can_be_java_arg(reg);
2755 }
2756
2757 uint Matcher::int_pressure_limit()
2758 {
2759 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2760 }
2761
2762 uint Matcher::float_pressure_limit()
2763 {
2764 // After experiment around with different values, the following default threshold
2765 // works best for LCM's register pressure scheduling on x64.
2766 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2767 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2768 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2769 }
2770
2771 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2772 // In 64 bit mode a code which use multiply when
2773 // devisor is constant is faster than hardware
2774 // DIV instruction (it uses MulHiL).
2775 return false;
2776 }
2777
2778 // Register for DIVI projection of divmodI
2779 const RegMask& Matcher::divI_proj_mask() {
2780 return INT_RAX_REG_mask();
2781 }
2782
2783 // Register for MODI projection of divmodI
2784 const RegMask& Matcher::modI_proj_mask() {
2785 return INT_RDX_REG_mask();
2786 }
2787
2788 // Register for DIVL projection of divmodL
2789 const RegMask& Matcher::divL_proj_mask() {
2790 return LONG_RAX_REG_mask();
2791 }
2792
2793 // Register for MODL projection of divmodL
2794 const RegMask& Matcher::modL_proj_mask() {
2795 return LONG_RDX_REG_mask();
2796 }
2797
2798 %}
2799
2800 source_hpp %{
2801 // Header information of the source block.
2802 // Method declarations/definitions which are used outside
2803 // the ad-scope can conveniently be defined here.
2804 //
2805 // To keep related declarations/definitions/uses close together,
2806 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2807
2808 #include "runtime/vm_version.hpp"
2809
2810 class NativeJump;
2811
2812 class CallStubImpl {
2813
2814 //--------------------------------------------------------------
2815 //---< Used for optimization in Compile::shorten_branches >---
2816 //--------------------------------------------------------------
2817
2818 public:
2819 // Size of call trampoline stub.
2820 static uint size_call_trampoline() {
2821 return 0; // no call trampolines on this platform
2822 }
2823
2824 // number of relocations needed by a call trampoline stub
2825 static uint reloc_call_trampoline() {
2826 return 0; // no call trampolines on this platform
2827 }
2828 };
2829
2830 class HandlerImpl {
2831
2832 public:
2833
2834 static int emit_deopt_handler(C2_MacroAssembler* masm);
2835
2836 static uint size_deopt_handler() {
2837 // one call and one jmp.
2838 return 7;
2839 }
2840 };
2841
2842 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2843 switch(bytes) {
2844 case 4: // fall-through
2845 case 8: // fall-through
2846 case 16: return Assembler::AVX_128bit;
2847 case 32: return Assembler::AVX_256bit;
2848 case 64: return Assembler::AVX_512bit;
2849
2850 default: {
2851 ShouldNotReachHere();
2852 return Assembler::AVX_NoVec;
2853 }
2854 }
2855 }
2856
2857 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2858 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2859 }
2860
2861 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2862 uint def_idx = use->operand_index(opnd);
2863 Node* def = use->in(def_idx);
2864 return vector_length_encoding(def);
2865 }
2866
2867 static inline bool is_vector_popcount_predicate(BasicType bt) {
2868 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2869 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2870 }
2871
2872 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2873 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2874 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2875 }
2876
2877 class Node::PD {
2878 public:
2879 enum NodeFlags : uint64_t {
2880 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2881 Flag_sets_carry_flag = Node::_last_flag << 2,
2882 Flag_sets_parity_flag = Node::_last_flag << 3,
2883 Flag_sets_zero_flag = Node::_last_flag << 4,
2884 Flag_sets_overflow_flag = Node::_last_flag << 5,
2885 Flag_sets_sign_flag = Node::_last_flag << 6,
2886 Flag_clears_carry_flag = Node::_last_flag << 7,
2887 Flag_clears_parity_flag = Node::_last_flag << 8,
2888 Flag_clears_zero_flag = Node::_last_flag << 9,
2889 Flag_clears_overflow_flag = Node::_last_flag << 10,
2890 Flag_clears_sign_flag = Node::_last_flag << 11,
2891 Flag_ndd_demotable = Node::_last_flag << 12,
2892 Flag_ndd_demotable_commutative = Node::_last_flag << 13,
2893 _last_flag = Flag_ndd_demotable_commutative
2894 };
2895 };
2896
2897 %} // end source_hpp
2898
2899 source %{
2900
2901 #include "opto/addnode.hpp"
2902 #include "c2_intelJccErratum_x86.hpp"
2903
2904 void PhaseOutput::pd_perform_mach_node_analysis() {
2905 if (VM_Version::has_intel_jcc_erratum()) {
2906 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2907 _buf_sizes._code += extra_padding;
2908 }
2909 }
2910
2911 int MachNode::pd_alignment_required() const {
2912 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2913 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2914 return IntelJccErratum::largest_jcc_size() + 1;
2915 } else {
2916 return 1;
2917 }
2918 }
2919
2920 int MachNode::compute_padding(int current_offset) const {
2921 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2922 Compile* C = Compile::current();
2923 PhaseOutput* output = C->output();
2924 Block* block = output->block();
2925 int index = output->index();
2926 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2927 } else {
2928 return 0;
2929 }
2930 }
2931
2932 // Emit deopt handler code.
2933 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2934
2935 // Note that the code buffer's insts_mark is always relative to insts.
2936 // That's why we must use the macroassembler to generate a handler.
2937 address base = __ start_a_stub(size_deopt_handler());
2938 if (base == nullptr) {
2939 ciEnv::current()->record_failure("CodeCache is full");
2940 return 0; // CodeBuffer::expand failed
2941 }
2942 int offset = __ offset();
2943
2944 Label start;
2945 __ bind(start);
2946
2947 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2948
2949 int entry_offset = __ offset();
2950
2951 __ jmp(start);
2952
2953 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2954 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2955 "out of bounds read in post-call NOP check");
2956 __ end_a_stub();
2957 return entry_offset;
2958 }
2959
2960 static Assembler::Width widthForType(BasicType bt) {
2961 if (bt == T_BYTE) {
2962 return Assembler::B;
2963 } else if (bt == T_SHORT) {
2964 return Assembler::W;
2965 } else if (bt == T_INT) {
2966 return Assembler::D;
2967 } else {
2968 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2969 return Assembler::Q;
2970 }
2971 }
2972
2973 //=============================================================================
2974
2975 // Float masks come from different places depending on platform.
2976 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2977 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2978 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2979 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2980 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2981 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2982 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2983 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2984 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2985 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2986 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2987 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2988 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2989 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2990 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2991 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2992 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2993 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2994 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2995
2996 //=============================================================================
2997 bool Matcher::match_rule_supported(int opcode) {
2998 if (!has_match_rule(opcode)) {
2999 return false; // no match rule present
3000 }
3001 switch (opcode) {
3002 case Op_AbsVL:
3003 case Op_StoreVectorScatter:
3004 if (UseAVX < 3) {
3005 return false;
3006 }
3007 break;
3008 case Op_PopCountI:
3009 case Op_PopCountL:
3010 if (!UsePopCountInstruction) {
3011 return false;
3012 }
3013 break;
3014 case Op_PopCountVI:
3015 if (UseAVX < 2) {
3016 return false;
3017 }
3018 break;
3019 case Op_CompressV:
3020 case Op_ExpandV:
3021 case Op_PopCountVL:
3022 if (UseAVX < 2) {
3023 return false;
3024 }
3025 break;
3026 case Op_MulVI:
3027 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3028 return false;
3029 }
3030 break;
3031 case Op_MulVL:
3032 if (UseSSE < 4) { // only with SSE4_1 or AVX
3033 return false;
3034 }
3035 break;
3036 case Op_MulReductionVL:
3037 if (VM_Version::supports_avx512dq() == false) {
3038 return false;
3039 }
3040 break;
3041 case Op_AbsVB:
3042 case Op_AbsVS:
3043 case Op_AbsVI:
3044 case Op_AddReductionVI:
3045 case Op_AndReductionV:
3046 case Op_OrReductionV:
3047 case Op_XorReductionV:
3048 if (UseSSE < 3) { // requires at least SSSE3
3049 return false;
3050 }
3051 break;
3052 case Op_MaxHF:
3053 case Op_MinHF:
3054 if (!VM_Version::supports_avx512vlbw()) {
3055 return false;
3056 } // fallthrough
3057 case Op_AddHF:
3058 case Op_DivHF:
3059 case Op_FmaHF:
3060 case Op_MulHF:
3061 case Op_ReinterpretS2HF:
3062 case Op_ReinterpretHF2S:
3063 case Op_SubHF:
3064 case Op_SqrtHF:
3065 if (!VM_Version::supports_avx512_fp16()) {
3066 return false;
3067 }
3068 break;
3069 case Op_VectorLoadShuffle:
3070 case Op_VectorRearrange:
3071 case Op_MulReductionVI:
3072 if (UseSSE < 4) { // requires at least SSE4
3073 return false;
3074 }
3075 break;
3076 case Op_IsInfiniteF:
3077 case Op_IsInfiniteD:
3078 if (!VM_Version::supports_avx512dq()) {
3079 return false;
3080 }
3081 break;
3082 case Op_SqrtVD:
3083 case Op_SqrtVF:
3084 case Op_VectorMaskCmp:
3085 case Op_VectorCastB2X:
3086 case Op_VectorCastS2X:
3087 case Op_VectorCastI2X:
3088 case Op_VectorCastL2X:
3089 case Op_VectorCastF2X:
3090 case Op_VectorCastD2X:
3091 case Op_VectorUCastB2X:
3092 case Op_VectorUCastS2X:
3093 case Op_VectorUCastI2X:
3094 case Op_VectorMaskCast:
3095 if (UseAVX < 1) { // enabled for AVX only
3096 return false;
3097 }
3098 break;
3099 case Op_PopulateIndex:
3100 if (UseAVX < 2) {
3101 return false;
3102 }
3103 break;
3104 case Op_RoundVF:
3105 if (UseAVX < 2) { // enabled for AVX2 only
3106 return false;
3107 }
3108 break;
3109 case Op_RoundVD:
3110 if (UseAVX < 3) {
3111 return false; // enabled for AVX3 only
3112 }
3113 break;
3114 case Op_CompareAndSwapL:
3115 case Op_CompareAndSwapP:
3116 break;
3117 case Op_StrIndexOf:
3118 if (!UseSSE42Intrinsics) {
3119 return false;
3120 }
3121 break;
3122 case Op_StrIndexOfChar:
3123 if (!UseSSE42Intrinsics) {
3124 return false;
3125 }
3126 break;
3127 case Op_OnSpinWait:
3128 if (VM_Version::supports_on_spin_wait() == false) {
3129 return false;
3130 }
3131 break;
3132 case Op_MulVB:
3133 case Op_LShiftVB:
3134 case Op_RShiftVB:
3135 case Op_URShiftVB:
3136 case Op_VectorInsert:
3137 case Op_VectorLoadMask:
3138 case Op_VectorStoreMask:
3139 case Op_VectorBlend:
3140 if (UseSSE < 4) {
3141 return false;
3142 }
3143 break;
3144 case Op_MaxD:
3145 case Op_MaxF:
3146 case Op_MinD:
3147 case Op_MinF:
3148 if (UseAVX < 1) { // enabled for AVX only
3149 return false;
3150 }
3151 break;
3152 case Op_CacheWB:
3153 case Op_CacheWBPreSync:
3154 case Op_CacheWBPostSync:
3155 if (!VM_Version::supports_data_cache_line_flush()) {
3156 return false;
3157 }
3158 break;
3159 case Op_ExtractB:
3160 case Op_ExtractL:
3161 case Op_ExtractI:
3162 case Op_RoundDoubleMode:
3163 if (UseSSE < 4) {
3164 return false;
3165 }
3166 break;
3167 case Op_RoundDoubleModeV:
3168 if (VM_Version::supports_avx() == false) {
3169 return false; // 128bit vroundpd is not available
3170 }
3171 break;
3172 case Op_LoadVectorGather:
3173 case Op_LoadVectorGatherMasked:
3174 if (UseAVX < 2) {
3175 return false;
3176 }
3177 break;
3178 case Op_FmaF:
3179 case Op_FmaD:
3180 case Op_FmaVD:
3181 case Op_FmaVF:
3182 if (!UseFMA) {
3183 return false;
3184 }
3185 break;
3186 case Op_MacroLogicV:
3187 if (UseAVX < 3 || !UseVectorMacroLogic) {
3188 return false;
3189 }
3190 break;
3191
3192 case Op_VectorCmpMasked:
3193 case Op_VectorMaskGen:
3194 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3195 return false;
3196 }
3197 break;
3198 case Op_VectorMaskFirstTrue:
3199 case Op_VectorMaskLastTrue:
3200 case Op_VectorMaskTrueCount:
3201 case Op_VectorMaskToLong:
3202 if (UseAVX < 1) {
3203 return false;
3204 }
3205 break;
3206 case Op_RoundF:
3207 case Op_RoundD:
3208 break;
3209 case Op_CopySignD:
3210 case Op_CopySignF:
3211 if (UseAVX < 3) {
3212 return false;
3213 }
3214 if (!VM_Version::supports_avx512vl()) {
3215 return false;
3216 }
3217 break;
3218 case Op_CompressBits:
3219 case Op_ExpandBits:
3220 if (!VM_Version::supports_bmi2()) {
3221 return false;
3222 }
3223 break;
3224 case Op_CompressM:
3225 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3226 return false;
3227 }
3228 break;
3229 case Op_ConvF2HF:
3230 case Op_ConvHF2F:
3231 if (!VM_Version::supports_float16()) {
3232 return false;
3233 }
3234 break;
3235 case Op_VectorCastF2HF:
3236 case Op_VectorCastHF2F:
3237 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3238 return false;
3239 }
3240 break;
3241 }
3242 return true; // Match rules are supported by default.
3243 }
3244
3245 //------------------------------------------------------------------------
3246
3247 static inline bool is_pop_count_instr_target(BasicType bt) {
3248 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3249 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3250 }
3251
3252 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3253 return match_rule_supported_vector(opcode, vlen, bt);
3254 }
3255
3256 // Identify extra cases that we might want to provide match rules for vector nodes and
3257 // other intrinsics guarded with vector length (vlen) and element type (bt).
3258 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3259 if (!match_rule_supported(opcode)) {
3260 return false;
3261 }
3262 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3263 // * SSE2 supports 128bit vectors for all types;
3264 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3265 // * AVX2 supports 256bit vectors for all types;
3266 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3267 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3268 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3269 // And MaxVectorSize is taken into account as well.
3270 if (!vector_size_supported(bt, vlen)) {
3271 return false;
3272 }
3273 // Special cases which require vector length follow:
3274 // * implementation limitations
3275 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3276 // * 128bit vroundpd instruction is present only in AVX1
3277 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3278 switch (opcode) {
3279 case Op_MaxVHF:
3280 case Op_MinVHF:
3281 if (!VM_Version::supports_avx512bw()) {
3282 return false;
3283 }
3284 case Op_AddVHF:
3285 case Op_DivVHF:
3286 case Op_FmaVHF:
3287 case Op_MulVHF:
3288 case Op_SubVHF:
3289 case Op_SqrtVHF:
3290 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3291 return false;
3292 }
3293 if (!VM_Version::supports_avx512_fp16()) {
3294 return false;
3295 }
3296 break;
3297 case Op_AbsVF:
3298 case Op_NegVF:
3299 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3300 return false; // 512bit vandps and vxorps are not available
3301 }
3302 break;
3303 case Op_AbsVD:
3304 case Op_NegVD:
3305 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3306 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3307 }
3308 break;
3309 case Op_RotateRightV:
3310 case Op_RotateLeftV:
3311 if (bt != T_INT && bt != T_LONG) {
3312 return false;
3313 } // fallthrough
3314 case Op_MacroLogicV:
3315 if (!VM_Version::supports_evex() ||
3316 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3317 return false;
3318 }
3319 break;
3320 case Op_ClearArray:
3321 case Op_VectorMaskGen:
3322 case Op_VectorCmpMasked:
3323 if (!VM_Version::supports_avx512bw()) {
3324 return false;
3325 }
3326 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3327 return false;
3328 }
3329 break;
3330 case Op_LoadVectorMasked:
3331 case Op_StoreVectorMasked:
3332 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3333 return false;
3334 }
3335 break;
3336 case Op_UMinV:
3337 case Op_UMaxV:
3338 if (UseAVX == 0) {
3339 return false;
3340 }
3341 break;
3342 case Op_MaxV:
3343 case Op_MinV:
3344 if (UseSSE < 4 && is_integral_type(bt)) {
3345 return false;
3346 }
3347 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3348 // Float/Double intrinsics are enabled for AVX family currently.
3349 if (UseAVX == 0) {
3350 return false;
3351 }
3352 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3353 return false;
3354 }
3355 }
3356 break;
3357 case Op_CallLeafVector:
3358 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3359 return false;
3360 }
3361 break;
3362 case Op_AddReductionVI:
3363 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3364 return false;
3365 }
3366 // fallthrough
3367 case Op_AndReductionV:
3368 case Op_OrReductionV:
3369 case Op_XorReductionV:
3370 if (is_subword_type(bt) && (UseSSE < 4)) {
3371 return false;
3372 }
3373 break;
3374 case Op_MinReductionV:
3375 case Op_MaxReductionV:
3376 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3377 return false;
3378 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3379 return false;
3380 }
3381 // Float/Double intrinsics enabled for AVX family.
3382 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3383 return false;
3384 }
3385 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3386 return false;
3387 }
3388 break;
3389 case Op_VectorBlend:
3390 if (UseAVX == 0 && size_in_bits < 128) {
3391 return false;
3392 }
3393 break;
3394 case Op_VectorTest:
3395 if (UseSSE < 4) {
3396 return false; // Implementation limitation
3397 } else if (size_in_bits < 32) {
3398 return false; // Implementation limitation
3399 }
3400 break;
3401 case Op_VectorLoadShuffle:
3402 case Op_VectorRearrange:
3403 if(vlen == 2) {
3404 return false; // Implementation limitation due to how shuffle is loaded
3405 } else if (size_in_bits == 256 && UseAVX < 2) {
3406 return false; // Implementation limitation
3407 }
3408 break;
3409 case Op_VectorLoadMask:
3410 case Op_VectorMaskCast:
3411 if (size_in_bits == 256 && UseAVX < 2) {
3412 return false; // Implementation limitation
3413 }
3414 // fallthrough
3415 case Op_VectorStoreMask:
3416 if (vlen == 2) {
3417 return false; // Implementation limitation
3418 }
3419 break;
3420 case Op_PopulateIndex:
3421 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3422 return false;
3423 }
3424 break;
3425 case Op_VectorCastB2X:
3426 case Op_VectorCastS2X:
3427 case Op_VectorCastI2X:
3428 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3429 return false;
3430 }
3431 break;
3432 case Op_VectorCastL2X:
3433 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3434 return false;
3435 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3436 return false;
3437 }
3438 break;
3439 case Op_VectorCastF2X: {
3440 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3441 // happen after intermediate conversion to integer and special handling
3442 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3443 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3444 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3445 return false;
3446 }
3447 }
3448 // fallthrough
3449 case Op_VectorCastD2X:
3450 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3451 return false;
3452 }
3453 break;
3454 case Op_VectorCastF2HF:
3455 case Op_VectorCastHF2F:
3456 if (!VM_Version::supports_f16c() &&
3457 ((!VM_Version::supports_evex() ||
3458 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3459 return false;
3460 }
3461 break;
3462 case Op_RoundVD:
3463 if (!VM_Version::supports_avx512dq()) {
3464 return false;
3465 }
3466 break;
3467 case Op_MulReductionVI:
3468 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3469 return false;
3470 }
3471 break;
3472 case Op_LoadVectorGatherMasked:
3473 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3474 return false;
3475 }
3476 if (is_subword_type(bt) &&
3477 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3478 (size_in_bits < 64) ||
3479 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3480 return false;
3481 }
3482 break;
3483 case Op_StoreVectorScatterMasked:
3484 case Op_StoreVectorScatter:
3485 if (is_subword_type(bt)) {
3486 return false;
3487 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3488 return false;
3489 }
3490 // fallthrough
3491 case Op_LoadVectorGather:
3492 if (!is_subword_type(bt) && size_in_bits == 64) {
3493 return false;
3494 }
3495 if (is_subword_type(bt) && size_in_bits < 64) {
3496 return false;
3497 }
3498 break;
3499 case Op_SaturatingAddV:
3500 case Op_SaturatingSubV:
3501 if (UseAVX < 1) {
3502 return false; // Implementation limitation
3503 }
3504 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3505 return false;
3506 }
3507 break;
3508 case Op_SelectFromTwoVector:
3509 if (size_in_bits < 128) {
3510 return false;
3511 }
3512 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3513 return false;
3514 }
3515 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3516 return false;
3517 }
3518 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3519 return false;
3520 }
3521 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3522 return false;
3523 }
3524 break;
3525 case Op_MaskAll:
3526 if (!VM_Version::supports_evex()) {
3527 return false;
3528 }
3529 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3530 return false;
3531 }
3532 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3533 return false;
3534 }
3535 break;
3536 case Op_VectorMaskCmp:
3537 if (vlen < 2 || size_in_bits < 32) {
3538 return false;
3539 }
3540 break;
3541 case Op_CompressM:
3542 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3543 return false;
3544 }
3545 break;
3546 case Op_CompressV:
3547 case Op_ExpandV:
3548 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3549 return false;
3550 }
3551 if (size_in_bits < 128 ) {
3552 return false;
3553 }
3554 case Op_VectorLongToMask:
3555 if (UseAVX < 1) {
3556 return false;
3557 }
3558 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3559 return false;
3560 }
3561 break;
3562 case Op_SignumVD:
3563 case Op_SignumVF:
3564 if (UseAVX < 1) {
3565 return false;
3566 }
3567 break;
3568 case Op_PopCountVI:
3569 case Op_PopCountVL: {
3570 if (!is_pop_count_instr_target(bt) &&
3571 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3572 return false;
3573 }
3574 }
3575 break;
3576 case Op_ReverseV:
3577 case Op_ReverseBytesV:
3578 if (UseAVX < 2) {
3579 return false;
3580 }
3581 break;
3582 case Op_CountTrailingZerosV:
3583 case Op_CountLeadingZerosV:
3584 if (UseAVX < 2) {
3585 return false;
3586 }
3587 break;
3588 }
3589 return true; // Per default match rules are supported.
3590 }
3591
3592 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3593 // ADLC based match_rule_supported routine checks for the existence of pattern based
3594 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3595 // of their non-masked counterpart with mask edge being the differentiator.
3596 // This routine does a strict check on the existence of masked operation patterns
3597 // by returning a default false value for all the other opcodes apart from the
3598 // ones whose masked instruction patterns are defined in this file.
3599 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3600 return false;
3601 }
3602
3603 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3604 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3605 return false;
3606 }
3607 switch(opcode) {
3608 // Unary masked operations
3609 case Op_AbsVB:
3610 case Op_AbsVS:
3611 if(!VM_Version::supports_avx512bw()) {
3612 return false; // Implementation limitation
3613 }
3614 case Op_AbsVI:
3615 case Op_AbsVL:
3616 return true;
3617
3618 // Ternary masked operations
3619 case Op_FmaVF:
3620 case Op_FmaVD:
3621 return true;
3622
3623 case Op_MacroLogicV:
3624 if(bt != T_INT && bt != T_LONG) {
3625 return false;
3626 }
3627 return true;
3628
3629 // Binary masked operations
3630 case Op_AddVB:
3631 case Op_AddVS:
3632 case Op_SubVB:
3633 case Op_SubVS:
3634 case Op_MulVS:
3635 case Op_LShiftVS:
3636 case Op_RShiftVS:
3637 case Op_URShiftVS:
3638 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3639 if (!VM_Version::supports_avx512bw()) {
3640 return false; // Implementation limitation
3641 }
3642 return true;
3643
3644 case Op_MulVL:
3645 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3646 if (!VM_Version::supports_avx512dq()) {
3647 return false; // Implementation limitation
3648 }
3649 return true;
3650
3651 case Op_AndV:
3652 case Op_OrV:
3653 case Op_XorV:
3654 case Op_RotateRightV:
3655 case Op_RotateLeftV:
3656 if (bt != T_INT && bt != T_LONG) {
3657 return false; // Implementation limitation
3658 }
3659 return true;
3660
3661 case Op_VectorLoadMask:
3662 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3663 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3664 return false;
3665 }
3666 return true;
3667
3668 case Op_AddVI:
3669 case Op_AddVL:
3670 case Op_AddVF:
3671 case Op_AddVD:
3672 case Op_SubVI:
3673 case Op_SubVL:
3674 case Op_SubVF:
3675 case Op_SubVD:
3676 case Op_MulVI:
3677 case Op_MulVF:
3678 case Op_MulVD:
3679 case Op_DivVF:
3680 case Op_DivVD:
3681 case Op_SqrtVF:
3682 case Op_SqrtVD:
3683 case Op_LShiftVI:
3684 case Op_LShiftVL:
3685 case Op_RShiftVI:
3686 case Op_RShiftVL:
3687 case Op_URShiftVI:
3688 case Op_URShiftVL:
3689 case Op_LoadVectorMasked:
3690 case Op_StoreVectorMasked:
3691 case Op_LoadVectorGatherMasked:
3692 case Op_StoreVectorScatterMasked:
3693 return true;
3694
3695 case Op_UMinV:
3696 case Op_UMaxV:
3697 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3698 return false;
3699 } // fallthrough
3700 case Op_MaxV:
3701 case Op_MinV:
3702 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3703 return false; // Implementation limitation
3704 }
3705 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3706 return false; // Implementation limitation
3707 }
3708 return true;
3709 case Op_SaturatingAddV:
3710 case Op_SaturatingSubV:
3711 if (!is_subword_type(bt)) {
3712 return false;
3713 }
3714 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3715 return false; // Implementation limitation
3716 }
3717 return true;
3718
3719 case Op_VectorMaskCmp:
3720 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3721 return false; // Implementation limitation
3722 }
3723 return true;
3724
3725 case Op_VectorRearrange:
3726 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3727 return false; // Implementation limitation
3728 }
3729 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3730 return false; // Implementation limitation
3731 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3732 return false; // Implementation limitation
3733 }
3734 return true;
3735
3736 // Binary Logical operations
3737 case Op_AndVMask:
3738 case Op_OrVMask:
3739 case Op_XorVMask:
3740 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3741 return false; // Implementation limitation
3742 }
3743 return true;
3744
3745 case Op_PopCountVI:
3746 case Op_PopCountVL:
3747 if (!is_pop_count_instr_target(bt)) {
3748 return false;
3749 }
3750 return true;
3751
3752 case Op_MaskAll:
3753 return true;
3754
3755 case Op_CountLeadingZerosV:
3756 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3757 return true;
3758 }
3759 default:
3760 return false;
3761 }
3762 }
3763
3764 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3765 return false;
3766 }
3767
3768 // Return true if Vector::rearrange needs preparation of the shuffle argument
3769 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3770 switch (elem_bt) {
3771 case T_BYTE: return false;
3772 case T_SHORT: return !VM_Version::supports_avx512bw();
3773 case T_INT: return !VM_Version::supports_avx();
3774 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3775 default:
3776 ShouldNotReachHere();
3777 return false;
3778 }
3779 }
3780
3781 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3782 // Prefer predicate if the mask type is "TypeVectMask".
3783 return vt->isa_vectmask() != nullptr;
3784 }
3785
3786 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3787 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3788 bool legacy = (generic_opnd->opcode() == LEGVEC);
3789 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3790 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3791 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3792 return new legVecZOper();
3793 }
3794 if (legacy) {
3795 switch (ideal_reg) {
3796 case Op_VecS: return new legVecSOper();
3797 case Op_VecD: return new legVecDOper();
3798 case Op_VecX: return new legVecXOper();
3799 case Op_VecY: return new legVecYOper();
3800 case Op_VecZ: return new legVecZOper();
3801 }
3802 } else {
3803 switch (ideal_reg) {
3804 case Op_VecS: return new vecSOper();
3805 case Op_VecD: return new vecDOper();
3806 case Op_VecX: return new vecXOper();
3807 case Op_VecY: return new vecYOper();
3808 case Op_VecZ: return new vecZOper();
3809 }
3810 }
3811 ShouldNotReachHere();
3812 return nullptr;
3813 }
3814
3815 bool Matcher::is_reg2reg_move(MachNode* m) {
3816 switch (m->rule()) {
3817 case MoveVec2Leg_rule:
3818 case MoveLeg2Vec_rule:
3819 case MoveF2VL_rule:
3820 case MoveF2LEG_rule:
3821 case MoveVL2F_rule:
3822 case MoveLEG2F_rule:
3823 case MoveD2VL_rule:
3824 case MoveD2LEG_rule:
3825 case MoveVL2D_rule:
3826 case MoveLEG2D_rule:
3827 return true;
3828 default:
3829 return false;
3830 }
3831 }
3832
3833 bool Matcher::is_generic_vector(MachOper* opnd) {
3834 switch (opnd->opcode()) {
3835 case VEC:
3836 case LEGVEC:
3837 return true;
3838 default:
3839 return false;
3840 }
3841 }
3842
3843 //------------------------------------------------------------------------
3844
3845 const RegMask* Matcher::predicate_reg_mask(void) {
3846 return &_VECTMASK_REG_mask;
3847 }
3848
3849 // Max vector size in bytes. 0 if not supported.
3850 int Matcher::vector_width_in_bytes(BasicType bt) {
3851 assert(is_java_primitive(bt), "only primitive type vectors");
3852 // SSE2 supports 128bit vectors for all types.
3853 // AVX2 supports 256bit vectors for all types.
3854 // AVX2/EVEX supports 512bit vectors for all types.
3855 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3856 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3857 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3858 size = (UseAVX > 2) ? 64 : 32;
3859 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3860 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3861 // Use flag to limit vector size.
3862 size = MIN2(size,(int)MaxVectorSize);
3863 // Minimum 2 values in vector (or 4 for bytes).
3864 switch (bt) {
3865 case T_DOUBLE:
3866 case T_LONG:
3867 if (size < 16) return 0;
3868 break;
3869 case T_FLOAT:
3870 case T_INT:
3871 if (size < 8) return 0;
3872 break;
3873 case T_BOOLEAN:
3874 if (size < 4) return 0;
3875 break;
3876 case T_CHAR:
3877 if (size < 4) return 0;
3878 break;
3879 case T_BYTE:
3880 if (size < 4) return 0;
3881 break;
3882 case T_SHORT:
3883 if (size < 4) return 0;
3884 break;
3885 default:
3886 ShouldNotReachHere();
3887 }
3888 return size;
3889 }
3890
3891 // Limits on vector size (number of elements) loaded into vector.
3892 int Matcher::max_vector_size(const BasicType bt) {
3893 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3894 }
3895 int Matcher::min_vector_size(const BasicType bt) {
3896 int max_size = max_vector_size(bt);
3897 // Min size which can be loaded into vector is 4 bytes.
3898 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3899 // Support for calling svml double64 vectors
3900 if (bt == T_DOUBLE) {
3901 size = 1;
3902 }
3903 return MIN2(size,max_size);
3904 }
3905
3906 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3907 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3908 // by default on Cascade Lake
3909 if (VM_Version::is_default_intel_cascade_lake()) {
3910 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3911 }
3912 return Matcher::max_vector_size(bt);
3913 }
3914
3915 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3916 return -1;
3917 }
3918
3919 // Vector ideal reg corresponding to specified size in bytes
3920 uint Matcher::vector_ideal_reg(int size) {
3921 assert(MaxVectorSize >= size, "");
3922 switch(size) {
3923 case 4: return Op_VecS;
3924 case 8: return Op_VecD;
3925 case 16: return Op_VecX;
3926 case 32: return Op_VecY;
3927 case 64: return Op_VecZ;
3928 }
3929 ShouldNotReachHere();
3930 return 0;
3931 }
3932
3933 // Check for shift by small constant as well
3934 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3935 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3936 shift->in(2)->get_int() <= 3 &&
3937 // Are there other uses besides address expressions?
3938 !matcher->is_visited(shift)) {
3939 address_visited.set(shift->_idx); // Flag as address_visited
3940 mstack.push(shift->in(2), Matcher::Visit);
3941 Node *conv = shift->in(1);
3942 // Allow Matcher to match the rule which bypass
3943 // ConvI2L operation for an array index on LP64
3944 // if the index value is positive.
3945 if (conv->Opcode() == Op_ConvI2L &&
3946 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3947 // Are there other uses besides address expressions?
3948 !matcher->is_visited(conv)) {
3949 address_visited.set(conv->_idx); // Flag as address_visited
3950 mstack.push(conv->in(1), Matcher::Pre_Visit);
3951 } else {
3952 mstack.push(conv, Matcher::Pre_Visit);
3953 }
3954 return true;
3955 }
3956 return false;
3957 }
3958
3959 // This function identifies sub-graphs in which a 'load' node is
3960 // input to two different nodes, and such that it can be matched
3961 // with BMI instructions like blsi, blsr, etc.
3962 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3963 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3964 // refers to the same node.
3965 //
3966 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3967 // This is a temporary solution until we make DAGs expressible in ADL.
3968 template<typename ConType>
3969 class FusedPatternMatcher {
3970 Node* _op1_node;
3971 Node* _mop_node;
3972 int _con_op;
3973
3974 static int match_next(Node* n, int next_op, int next_op_idx) {
3975 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3976 return -1;
3977 }
3978
3979 if (next_op_idx == -1) { // n is commutative, try rotations
3980 if (n->in(1)->Opcode() == next_op) {
3981 return 1;
3982 } else if (n->in(2)->Opcode() == next_op) {
3983 return 2;
3984 }
3985 } else {
3986 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3987 if (n->in(next_op_idx)->Opcode() == next_op) {
3988 return next_op_idx;
3989 }
3990 }
3991 return -1;
3992 }
3993
3994 public:
3995 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3996 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3997
3998 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3999 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4000 typename ConType::NativeType con_value) {
4001 if (_op1_node->Opcode() != op1) {
4002 return false;
4003 }
4004 if (_mop_node->outcnt() > 2) {
4005 return false;
4006 }
4007 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4008 if (op1_op2_idx == -1) {
4009 return false;
4010 }
4011 // Memory operation must be the other edge
4012 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4013
4014 // Check that the mop node is really what we want
4015 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4016 Node* op2_node = _op1_node->in(op1_op2_idx);
4017 if (op2_node->outcnt() > 1) {
4018 return false;
4019 }
4020 assert(op2_node->Opcode() == op2, "Should be");
4021 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4022 if (op2_con_idx == -1) {
4023 return false;
4024 }
4025 // Memory operation must be the other edge
4026 int op2_mop_idx = (op2_con_idx & 1) + 1;
4027 // Check that the memory operation is the same node
4028 if (op2_node->in(op2_mop_idx) == _mop_node) {
4029 // Now check the constant
4030 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4031 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4032 return true;
4033 }
4034 }
4035 }
4036 return false;
4037 }
4038 };
4039
4040 static bool is_bmi_pattern(Node* n, Node* m) {
4041 assert(UseBMI1Instructions, "sanity");
4042 if (n != nullptr && m != nullptr) {
4043 if (m->Opcode() == Op_LoadI) {
4044 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4045 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4046 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4047 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4048 } else if (m->Opcode() == Op_LoadL) {
4049 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4050 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4051 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4052 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4053 }
4054 }
4055 return false;
4056 }
4057
4058 // Should the matcher clone input 'm' of node 'n'?
4059 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4060 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4061 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4062 mstack.push(m, Visit);
4063 return true;
4064 }
4065 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4066 mstack.push(m, Visit); // m = ShiftCntV
4067 return true;
4068 }
4069 if (is_encode_and_store_pattern(n, m)) {
4070 mstack.push(m, Visit);
4071 return true;
4072 }
4073 return false;
4074 }
4075
4076 // Should the Matcher clone shifts on addressing modes, expecting them
4077 // to be subsumed into complex addressing expressions or compute them
4078 // into registers?
4079 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4080 Node *off = m->in(AddPNode::Offset);
4081 if (off->is_Con()) {
4082 address_visited.test_set(m->_idx); // Flag as address_visited
4083 Node *adr = m->in(AddPNode::Address);
4084
4085 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4086 // AtomicAdd is not an addressing expression.
4087 // Cheap to find it by looking for screwy base.
4088 if (adr->is_AddP() &&
4089 !adr->in(AddPNode::Base)->is_top() &&
4090 !adr->in(AddPNode::Offset)->is_Con() &&
4091 off->get_long() == (int) (off->get_long()) && // immL32
4092 // Are there other uses besides address expressions?
4093 !is_visited(adr)) {
4094 address_visited.set(adr->_idx); // Flag as address_visited
4095 Node *shift = adr->in(AddPNode::Offset);
4096 if (!clone_shift(shift, this, mstack, address_visited)) {
4097 mstack.push(shift, Pre_Visit);
4098 }
4099 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4100 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4101 } else {
4102 mstack.push(adr, Pre_Visit);
4103 }
4104
4105 // Clone X+offset as it also folds into most addressing expressions
4106 mstack.push(off, Visit);
4107 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4108 return true;
4109 } else if (clone_shift(off, this, mstack, address_visited)) {
4110 address_visited.test_set(m->_idx); // Flag as address_visited
4111 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4112 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4113 return true;
4114 }
4115 return false;
4116 }
4117
4118 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4119 switch (bt) {
4120 case BoolTest::eq:
4121 return Assembler::eq;
4122 case BoolTest::ne:
4123 return Assembler::neq;
4124 case BoolTest::le:
4125 case BoolTest::ule:
4126 return Assembler::le;
4127 case BoolTest::ge:
4128 case BoolTest::uge:
4129 return Assembler::nlt;
4130 case BoolTest::lt:
4131 case BoolTest::ult:
4132 return Assembler::lt;
4133 case BoolTest::gt:
4134 case BoolTest::ugt:
4135 return Assembler::nle;
4136 default : ShouldNotReachHere(); return Assembler::_false;
4137 }
4138 }
4139
4140 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4141 switch (bt) {
4142 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4143 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4144 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4145 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4146 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4147 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4148 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4149 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4150 }
4151 }
4152
4153 // Helper methods for MachSpillCopyNode::implementation().
4154 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4155 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4156 assert(ireg == Op_VecS || // 32bit vector
4157 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4158 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4159 "no non-adjacent vector moves" );
4160 if (masm) {
4161 switch (ireg) {
4162 case Op_VecS: // copy whole register
4163 case Op_VecD:
4164 case Op_VecX:
4165 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4166 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4167 } else {
4168 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4169 }
4170 break;
4171 case Op_VecY:
4172 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4173 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4174 } else {
4175 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4176 }
4177 break;
4178 case Op_VecZ:
4179 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4180 break;
4181 default:
4182 ShouldNotReachHere();
4183 }
4184 #ifndef PRODUCT
4185 } else {
4186 switch (ireg) {
4187 case Op_VecS:
4188 case Op_VecD:
4189 case Op_VecX:
4190 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4191 break;
4192 case Op_VecY:
4193 case Op_VecZ:
4194 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4195 break;
4196 default:
4197 ShouldNotReachHere();
4198 }
4199 #endif
4200 }
4201 }
4202
4203 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4204 int stack_offset, int reg, uint ireg, outputStream* st) {
4205 if (masm) {
4206 if (is_load) {
4207 switch (ireg) {
4208 case Op_VecS:
4209 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4210 break;
4211 case Op_VecD:
4212 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4213 break;
4214 case Op_VecX:
4215 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4216 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4217 } else {
4218 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4219 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4220 }
4221 break;
4222 case Op_VecY:
4223 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4224 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4225 } else {
4226 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4227 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4228 }
4229 break;
4230 case Op_VecZ:
4231 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4232 break;
4233 default:
4234 ShouldNotReachHere();
4235 }
4236 } else { // store
4237 switch (ireg) {
4238 case Op_VecS:
4239 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4240 break;
4241 case Op_VecD:
4242 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4243 break;
4244 case Op_VecX:
4245 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4246 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4247 }
4248 else {
4249 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4250 }
4251 break;
4252 case Op_VecY:
4253 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4254 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4255 }
4256 else {
4257 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4258 }
4259 break;
4260 case Op_VecZ:
4261 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4262 break;
4263 default:
4264 ShouldNotReachHere();
4265 }
4266 }
4267 #ifndef PRODUCT
4268 } else {
4269 if (is_load) {
4270 switch (ireg) {
4271 case Op_VecS:
4272 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4273 break;
4274 case Op_VecD:
4275 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4276 break;
4277 case Op_VecX:
4278 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4279 break;
4280 case Op_VecY:
4281 case Op_VecZ:
4282 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 default:
4285 ShouldNotReachHere();
4286 }
4287 } else { // store
4288 switch (ireg) {
4289 case Op_VecS:
4290 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4291 break;
4292 case Op_VecD:
4293 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4294 break;
4295 case Op_VecX:
4296 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4297 break;
4298 case Op_VecY:
4299 case Op_VecZ:
4300 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 default:
4303 ShouldNotReachHere();
4304 }
4305 }
4306 #endif
4307 }
4308 }
4309
4310 template <class T>
4311 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4312 int size = type2aelembytes(bt) * len;
4313 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4314 for (int i = 0; i < len; i++) {
4315 int offset = i * type2aelembytes(bt);
4316 switch (bt) {
4317 case T_BYTE: val->at(i) = con; break;
4318 case T_SHORT: {
4319 jshort c = con;
4320 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4321 break;
4322 }
4323 case T_INT: {
4324 jint c = con;
4325 memcpy(val->adr_at(offset), &c, sizeof(jint));
4326 break;
4327 }
4328 case T_LONG: {
4329 jlong c = con;
4330 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4331 break;
4332 }
4333 case T_FLOAT: {
4334 jfloat c = con;
4335 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4336 break;
4337 }
4338 case T_DOUBLE: {
4339 jdouble c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4341 break;
4342 }
4343 default: assert(false, "%s", type2name(bt));
4344 }
4345 }
4346 return val;
4347 }
4348
4349 static inline jlong high_bit_set(BasicType bt) {
4350 switch (bt) {
4351 case T_BYTE: return 0x8080808080808080;
4352 case T_SHORT: return 0x8000800080008000;
4353 case T_INT: return 0x8000000080000000;
4354 case T_LONG: return 0x8000000000000000;
4355 default:
4356 ShouldNotReachHere();
4357 return 0;
4358 }
4359 }
4360
4361 #ifndef PRODUCT
4362 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4363 st->print("nop \t# %d bytes pad for loops and calls", _count);
4364 }
4365 #endif
4366
4367 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4368 __ nop(_count);
4369 }
4370
4371 uint MachNopNode::size(PhaseRegAlloc*) const {
4372 return _count;
4373 }
4374
4375 #ifndef PRODUCT
4376 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4377 st->print("# breakpoint");
4378 }
4379 #endif
4380
4381 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4382 __ int3();
4383 }
4384
4385 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4386 return MachNode::size(ra_);
4387 }
4388
4389 %}
4390
4391 //----------ENCODING BLOCK-----------------------------------------------------
4392 // This block specifies the encoding classes used by the compiler to
4393 // output byte streams. Encoding classes are parameterized macros
4394 // used by Machine Instruction Nodes in order to generate the bit
4395 // encoding of the instruction. Operands specify their base encoding
4396 // interface with the interface keyword. There are currently
4397 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4398 // COND_INTER. REG_INTER causes an operand to generate a function
4399 // which returns its register number when queried. CONST_INTER causes
4400 // an operand to generate a function which returns the value of the
4401 // constant when queried. MEMORY_INTER causes an operand to generate
4402 // four functions which return the Base Register, the Index Register,
4403 // the Scale Value, and the Offset Value of the operand when queried.
4404 // COND_INTER causes an operand to generate six functions which return
4405 // the encoding code (ie - encoding bits for the instruction)
4406 // associated with each basic boolean condition for a conditional
4407 // instruction.
4408 //
4409 // Instructions specify two basic values for encoding. Again, a
4410 // function is available to check if the constant displacement is an
4411 // oop. They use the ins_encode keyword to specify their encoding
4412 // classes (which must be a sequence of enc_class names, and their
4413 // parameters, specified in the encoding block), and they use the
4414 // opcode keyword to specify, in order, their primary, secondary, and
4415 // tertiary opcode. Only the opcode sections which a particular
4416 // instruction needs for encoding need to be specified.
4417 encode %{
4418 enc_class cdql_enc(no_rax_rdx_RegI div)
4419 %{
4420 // Full implementation of Java idiv and irem; checks for
4421 // special case as described in JVM spec., p.243 & p.271.
4422 //
4423 // normal case special case
4424 //
4425 // input : rax: dividend min_int
4426 // reg: divisor -1
4427 //
4428 // output: rax: quotient (= rax idiv reg) min_int
4429 // rdx: remainder (= rax irem reg) 0
4430 //
4431 // Code sequnce:
4432 //
4433 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4434 // 5: 75 07/08 jne e <normal>
4435 // 7: 33 d2 xor %edx,%edx
4436 // [div >= 8 -> offset + 1]
4437 // [REX_B]
4438 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4439 // c: 74 03/04 je 11 <done>
4440 // 000000000000000e <normal>:
4441 // e: 99 cltd
4442 // [div >= 8 -> offset + 1]
4443 // [REX_B]
4444 // f: f7 f9 idiv $div
4445 // 0000000000000011 <done>:
4446 Label normal;
4447 Label done;
4448
4449 // cmp $0x80000000,%eax
4450 __ cmpl(as_Register(RAX_enc), 0x80000000);
4451
4452 // jne e <normal>
4453 __ jccb(Assembler::notEqual, normal);
4454
4455 // xor %edx,%edx
4456 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4457
4458 // cmp $0xffffffffffffffff,%ecx
4459 __ cmpl($div$$Register, -1);
4460
4461 // je 11 <done>
4462 __ jccb(Assembler::equal, done);
4463
4464 // <normal>
4465 // cltd
4466 __ bind(normal);
4467 __ cdql();
4468
4469 // idivl
4470 // <done>
4471 __ idivl($div$$Register);
4472 __ bind(done);
4473 %}
4474
4475 enc_class cdqq_enc(no_rax_rdx_RegL div)
4476 %{
4477 // Full implementation of Java ldiv and lrem; checks for
4478 // special case as described in JVM spec., p.243 & p.271.
4479 //
4480 // normal case special case
4481 //
4482 // input : rax: dividend min_long
4483 // reg: divisor -1
4484 //
4485 // output: rax: quotient (= rax idiv reg) min_long
4486 // rdx: remainder (= rax irem reg) 0
4487 //
4488 // Code sequnce:
4489 //
4490 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4491 // 7: 00 00 80
4492 // a: 48 39 d0 cmp %rdx,%rax
4493 // d: 75 08 jne 17 <normal>
4494 // f: 33 d2 xor %edx,%edx
4495 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4496 // 15: 74 05 je 1c <done>
4497 // 0000000000000017 <normal>:
4498 // 17: 48 99 cqto
4499 // 19: 48 f7 f9 idiv $div
4500 // 000000000000001c <done>:
4501 Label normal;
4502 Label done;
4503
4504 // mov $0x8000000000000000,%rdx
4505 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4506
4507 // cmp %rdx,%rax
4508 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4509
4510 // jne 17 <normal>
4511 __ jccb(Assembler::notEqual, normal);
4512
4513 // xor %edx,%edx
4514 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4515
4516 // cmp $0xffffffffffffffff,$div
4517 __ cmpq($div$$Register, -1);
4518
4519 // je 1e <done>
4520 __ jccb(Assembler::equal, done);
4521
4522 // <normal>
4523 // cqto
4524 __ bind(normal);
4525 __ cdqq();
4526
4527 // idivq (note: must be emitted by the user of this rule)
4528 // <done>
4529 __ idivq($div$$Register);
4530 __ bind(done);
4531 %}
4532
4533 enc_class clear_avx %{
4534 DEBUG_ONLY(int off0 = __ offset());
4535 if (generate_vzeroupper(Compile::current())) {
4536 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4537 // Clear upper bits of YMM registers when current compiled code uses
4538 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4539 __ vzeroupper();
4540 }
4541 DEBUG_ONLY(int off1 = __ offset());
4542 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4543 %}
4544
4545 enc_class Java_To_Runtime(method meth) %{
4546 __ lea(r10, RuntimeAddress((address)$meth$$method));
4547 __ call(r10);
4548 __ post_call_nop();
4549 %}
4550
4551 enc_class Java_Static_Call(method meth)
4552 %{
4553 // JAVA STATIC CALL
4554 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4555 // determine who we intended to call.
4556 if (!_method) {
4557 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4558 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4559 // The NOP here is purely to ensure that eliding a call to
4560 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4561 __ addr_nop_5();
4562 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4563 } else {
4564 int method_index = resolved_method_index(masm);
4565 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4566 : static_call_Relocation::spec(method_index);
4567 address mark = __ pc();
4568 int call_offset = __ offset();
4569 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4570 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4571 // Calls of the same statically bound method can share
4572 // a stub to the interpreter.
4573 __ code()->shared_stub_to_interp_for(_method, call_offset);
4574 } else {
4575 // Emit stubs for static call.
4576 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4577 __ clear_inst_mark();
4578 if (stub == nullptr) {
4579 ciEnv::current()->record_failure("CodeCache is full");
4580 return;
4581 }
4582 }
4583 }
4584 __ post_call_nop();
4585 %}
4586
4587 enc_class Java_Dynamic_Call(method meth) %{
4588 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4589 __ post_call_nop();
4590 %}
4591
4592 enc_class call_epilog %{
4593 if (VerifyStackAtCalls) {
4594 // Check that stack depth is unchanged: find majik cookie on stack
4595 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4596 Label L;
4597 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4598 __ jccb(Assembler::equal, L);
4599 // Die if stack mismatch
4600 __ int3();
4601 __ bind(L);
4602 }
4603 %}
4604
4605 %}
4606
4607 //----------FRAME--------------------------------------------------------------
4608 // Definition of frame structure and management information.
4609 //
4610 // S T A C K L A Y O U T Allocators stack-slot number
4611 // | (to get allocators register number
4612 // G Owned by | | v add OptoReg::stack0())
4613 // r CALLER | |
4614 // o | +--------+ pad to even-align allocators stack-slot
4615 // w V | pad0 | numbers; owned by CALLER
4616 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4617 // h ^ | in | 5
4618 // | | args | 4 Holes in incoming args owned by SELF
4619 // | | | | 3
4620 // | | +--------+
4621 // V | | old out| Empty on Intel, window on Sparc
4622 // | old |preserve| Must be even aligned.
4623 // | SP-+--------+----> Matcher::_old_SP, even aligned
4624 // | | in | 3 area for Intel ret address
4625 // Owned by |preserve| Empty on Sparc.
4626 // SELF +--------+
4627 // | | pad2 | 2 pad to align old SP
4628 // | +--------+ 1
4629 // | | locks | 0
4630 // | +--------+----> OptoReg::stack0(), even aligned
4631 // | | pad1 | 11 pad to align new SP
4632 // | +--------+
4633 // | | | 10
4634 // | | spills | 9 spills
4635 // V | | 8 (pad0 slot for callee)
4636 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4637 // ^ | out | 7
4638 // | | args | 6 Holes in outgoing args owned by CALLEE
4639 // Owned by +--------+
4640 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4641 // | new |preserve| Must be even-aligned.
4642 // | SP-+--------+----> Matcher::_new_SP, even aligned
4643 // | | |
4644 //
4645 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4646 // known from SELF's arguments and the Java calling convention.
4647 // Region 6-7 is determined per call site.
4648 // Note 2: If the calling convention leaves holes in the incoming argument
4649 // area, those holes are owned by SELF. Holes in the outgoing area
4650 // are owned by the CALLEE. Holes should not be necessary in the
4651 // incoming area, as the Java calling convention is completely under
4652 // the control of the AD file. Doubles can be sorted and packed to
4653 // avoid holes. Holes in the outgoing arguments may be necessary for
4654 // varargs C calling conventions.
4655 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4656 // even aligned with pad0 as needed.
4657 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4658 // region 6-11 is even aligned; it may be padded out more so that
4659 // the region from SP to FP meets the minimum stack alignment.
4660 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4661 // alignment. Region 11, pad1, may be dynamically extended so that
4662 // SP meets the minimum alignment.
4663
4664 frame
4665 %{
4666 // These three registers define part of the calling convention
4667 // between compiled code and the interpreter.
4668 inline_cache_reg(RAX); // Inline Cache Register
4669
4670 // Optional: name the operand used by cisc-spilling to access
4671 // [stack_pointer + offset]
4672 cisc_spilling_operand_name(indOffset32);
4673
4674 // Number of stack slots consumed by locking an object
4675 sync_stack_slots(2);
4676
4677 // Compiled code's Frame Pointer
4678 frame_pointer(RSP);
4679
4680 // Interpreter stores its frame pointer in a register which is
4681 // stored to the stack by I2CAdaptors.
4682 // I2CAdaptors convert from interpreted java to compiled java.
4683 interpreter_frame_pointer(RBP);
4684
4685 // Stack alignment requirement
4686 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4687
4688 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4689 // for calls to C. Supports the var-args backing area for register parms.
4690 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4691
4692 // The after-PROLOG location of the return address. Location of
4693 // return address specifies a type (REG or STACK) and a number
4694 // representing the register number (i.e. - use a register name) or
4695 // stack slot.
4696 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4697 // Otherwise, it is above the locks and verification slot and alignment word
4698 return_addr(STACK - 2 +
4699 align_up((Compile::current()->in_preserve_stack_slots() +
4700 Compile::current()->fixed_slots()),
4701 stack_alignment_in_slots()));
4702
4703 // Location of compiled Java return values. Same as C for now.
4704 return_value
4705 %{
4706 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4707 "only return normal values");
4708
4709 static const int lo[Op_RegL + 1] = {
4710 0,
4711 0,
4712 RAX_num, // Op_RegN
4713 RAX_num, // Op_RegI
4714 RAX_num, // Op_RegP
4715 XMM0_num, // Op_RegF
4716 XMM0_num, // Op_RegD
4717 RAX_num // Op_RegL
4718 };
4719 static const int hi[Op_RegL + 1] = {
4720 0,
4721 0,
4722 OptoReg::Bad, // Op_RegN
4723 OptoReg::Bad, // Op_RegI
4724 RAX_H_num, // Op_RegP
4725 OptoReg::Bad, // Op_RegF
4726 XMM0b_num, // Op_RegD
4727 RAX_H_num // Op_RegL
4728 };
4729 // Excluded flags and vector registers.
4730 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4731 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4732 %}
4733 %}
4734
4735 //----------ATTRIBUTES---------------------------------------------------------
4736 //----------Operand Attributes-------------------------------------------------
4737 op_attrib op_cost(0); // Required cost attribute
4738
4739 //----------Instruction Attributes---------------------------------------------
4740 ins_attrib ins_cost(100); // Required cost attribute
4741 ins_attrib ins_size(8); // Required size attribute (in bits)
4742 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4743 // a non-matching short branch variant
4744 // of some long branch?
4745 ins_attrib ins_alignment(1); // Required alignment attribute (must
4746 // be a power of 2) specifies the
4747 // alignment that some part of the
4748 // instruction (not necessarily the
4749 // start) requires. If > 1, a
4750 // compute_padding() function must be
4751 // provided for the instruction
4752
4753 // Whether this node is expanded during code emission into a sequence of
4754 // instructions and the first instruction can perform an implicit null check.
4755 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4756
4757 //----------OPERANDS-----------------------------------------------------------
4758 // Operand definitions must precede instruction definitions for correct parsing
4759 // in the ADLC because operands constitute user defined types which are used in
4760 // instruction definitions.
4761
4762 //----------Simple Operands----------------------------------------------------
4763 // Immediate Operands
4764 // Integer Immediate
4765 operand immI()
4766 %{
4767 match(ConI);
4768
4769 op_cost(10);
4770 format %{ %}
4771 interface(CONST_INTER);
4772 %}
4773
4774 // Constant for test vs zero
4775 operand immI_0()
4776 %{
4777 predicate(n->get_int() == 0);
4778 match(ConI);
4779
4780 op_cost(0);
4781 format %{ %}
4782 interface(CONST_INTER);
4783 %}
4784
4785 // Constant for increment
4786 operand immI_1()
4787 %{
4788 predicate(n->get_int() == 1);
4789 match(ConI);
4790
4791 op_cost(0);
4792 format %{ %}
4793 interface(CONST_INTER);
4794 %}
4795
4796 // Constant for decrement
4797 operand immI_M1()
4798 %{
4799 predicate(n->get_int() == -1);
4800 match(ConI);
4801
4802 op_cost(0);
4803 format %{ %}
4804 interface(CONST_INTER);
4805 %}
4806
4807 operand immI_2()
4808 %{
4809 predicate(n->get_int() == 2);
4810 match(ConI);
4811
4812 op_cost(0);
4813 format %{ %}
4814 interface(CONST_INTER);
4815 %}
4816
4817 operand immI_4()
4818 %{
4819 predicate(n->get_int() == 4);
4820 match(ConI);
4821
4822 op_cost(0);
4823 format %{ %}
4824 interface(CONST_INTER);
4825 %}
4826
4827 operand immI_8()
4828 %{
4829 predicate(n->get_int() == 8);
4830 match(ConI);
4831
4832 op_cost(0);
4833 format %{ %}
4834 interface(CONST_INTER);
4835 %}
4836
4837 // Valid scale values for addressing modes
4838 operand immI2()
4839 %{
4840 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4841 match(ConI);
4842
4843 format %{ %}
4844 interface(CONST_INTER);
4845 %}
4846
4847 operand immU7()
4848 %{
4849 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4850 match(ConI);
4851
4852 op_cost(5);
4853 format %{ %}
4854 interface(CONST_INTER);
4855 %}
4856
4857 operand immI8()
4858 %{
4859 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4860 match(ConI);
4861
4862 op_cost(5);
4863 format %{ %}
4864 interface(CONST_INTER);
4865 %}
4866
4867 operand immU8()
4868 %{
4869 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4870 match(ConI);
4871
4872 op_cost(5);
4873 format %{ %}
4874 interface(CONST_INTER);
4875 %}
4876
4877 operand immI16()
4878 %{
4879 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4880 match(ConI);
4881
4882 op_cost(10);
4883 format %{ %}
4884 interface(CONST_INTER);
4885 %}
4886
4887 // Int Immediate non-negative
4888 operand immU31()
4889 %{
4890 predicate(n->get_int() >= 0);
4891 match(ConI);
4892
4893 op_cost(0);
4894 format %{ %}
4895 interface(CONST_INTER);
4896 %}
4897
4898 // Pointer Immediate
4899 operand immP()
4900 %{
4901 match(ConP);
4902
4903 op_cost(10);
4904 format %{ %}
4905 interface(CONST_INTER);
4906 %}
4907
4908 // Null Pointer Immediate
4909 operand immP0()
4910 %{
4911 predicate(n->get_ptr() == 0);
4912 match(ConP);
4913
4914 op_cost(5);
4915 format %{ %}
4916 interface(CONST_INTER);
4917 %}
4918
4919 // Pointer Immediate
4920 operand immN() %{
4921 match(ConN);
4922
4923 op_cost(10);
4924 format %{ %}
4925 interface(CONST_INTER);
4926 %}
4927
4928 operand immNKlass() %{
4929 match(ConNKlass);
4930
4931 op_cost(10);
4932 format %{ %}
4933 interface(CONST_INTER);
4934 %}
4935
4936 // Null Pointer Immediate
4937 operand immN0() %{
4938 predicate(n->get_narrowcon() == 0);
4939 match(ConN);
4940
4941 op_cost(5);
4942 format %{ %}
4943 interface(CONST_INTER);
4944 %}
4945
4946 operand immP31()
4947 %{
4948 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4949 && (n->get_ptr() >> 31) == 0);
4950 match(ConP);
4951
4952 op_cost(5);
4953 format %{ %}
4954 interface(CONST_INTER);
4955 %}
4956
4957
4958 // Long Immediate
4959 operand immL()
4960 %{
4961 match(ConL);
4962
4963 op_cost(20);
4964 format %{ %}
4965 interface(CONST_INTER);
4966 %}
4967
4968 // Long Immediate 8-bit
4969 operand immL8()
4970 %{
4971 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4972 match(ConL);
4973
4974 op_cost(5);
4975 format %{ %}
4976 interface(CONST_INTER);
4977 %}
4978
4979 // Long Immediate 32-bit unsigned
4980 operand immUL32()
4981 %{
4982 predicate(n->get_long() == (unsigned int) (n->get_long()));
4983 match(ConL);
4984
4985 op_cost(10);
4986 format %{ %}
4987 interface(CONST_INTER);
4988 %}
4989
4990 // Long Immediate 32-bit signed
4991 operand immL32()
4992 %{
4993 predicate(n->get_long() == (int) (n->get_long()));
4994 match(ConL);
4995
4996 op_cost(15);
4997 format %{ %}
4998 interface(CONST_INTER);
4999 %}
5000
5001 operand immL_Pow2()
5002 %{
5003 predicate(is_power_of_2((julong)n->get_long()));
5004 match(ConL);
5005
5006 op_cost(15);
5007 format %{ %}
5008 interface(CONST_INTER);
5009 %}
5010
5011 operand immL_NotPow2()
5012 %{
5013 predicate(is_power_of_2((julong)~n->get_long()));
5014 match(ConL);
5015
5016 op_cost(15);
5017 format %{ %}
5018 interface(CONST_INTER);
5019 %}
5020
5021 // Long Immediate zero
5022 operand immL0()
5023 %{
5024 predicate(n->get_long() == 0L);
5025 match(ConL);
5026
5027 op_cost(10);
5028 format %{ %}
5029 interface(CONST_INTER);
5030 %}
5031
5032 // Constant for increment
5033 operand immL1()
5034 %{
5035 predicate(n->get_long() == 1);
5036 match(ConL);
5037
5038 format %{ %}
5039 interface(CONST_INTER);
5040 %}
5041
5042 // Constant for decrement
5043 operand immL_M1()
5044 %{
5045 predicate(n->get_long() == -1);
5046 match(ConL);
5047
5048 format %{ %}
5049 interface(CONST_INTER);
5050 %}
5051
5052 // Long Immediate: low 32-bit mask
5053 operand immL_32bits()
5054 %{
5055 predicate(n->get_long() == 0xFFFFFFFFL);
5056 match(ConL);
5057 op_cost(20);
5058
5059 format %{ %}
5060 interface(CONST_INTER);
5061 %}
5062
5063 // Int Immediate: 2^n-1, positive
5064 operand immI_Pow2M1()
5065 %{
5066 predicate((n->get_int() > 0)
5067 && is_power_of_2((juint)n->get_int() + 1));
5068 match(ConI);
5069
5070 op_cost(20);
5071 format %{ %}
5072 interface(CONST_INTER);
5073 %}
5074
5075 // Float Immediate zero
5076 operand immF0()
5077 %{
5078 predicate(jint_cast(n->getf()) == 0);
5079 match(ConF);
5080
5081 op_cost(5);
5082 format %{ %}
5083 interface(CONST_INTER);
5084 %}
5085
5086 // Float Immediate
5087 operand immF()
5088 %{
5089 match(ConF);
5090
5091 op_cost(15);
5092 format %{ %}
5093 interface(CONST_INTER);
5094 %}
5095
5096 // Half Float Immediate
5097 operand immH()
5098 %{
5099 match(ConH);
5100
5101 op_cost(15);
5102 format %{ %}
5103 interface(CONST_INTER);
5104 %}
5105
5106 // Double Immediate zero
5107 operand immD0()
5108 %{
5109 predicate(jlong_cast(n->getd()) == 0);
5110 match(ConD);
5111
5112 op_cost(5);
5113 format %{ %}
5114 interface(CONST_INTER);
5115 %}
5116
5117 // Double Immediate
5118 operand immD()
5119 %{
5120 match(ConD);
5121
5122 op_cost(15);
5123 format %{ %}
5124 interface(CONST_INTER);
5125 %}
5126
5127 // Immediates for special shifts (sign extend)
5128
5129 // Constants for increment
5130 operand immI_16()
5131 %{
5132 predicate(n->get_int() == 16);
5133 match(ConI);
5134
5135 format %{ %}
5136 interface(CONST_INTER);
5137 %}
5138
5139 operand immI_24()
5140 %{
5141 predicate(n->get_int() == 24);
5142 match(ConI);
5143
5144 format %{ %}
5145 interface(CONST_INTER);
5146 %}
5147
5148 // Constant for byte-wide masking
5149 operand immI_255()
5150 %{
5151 predicate(n->get_int() == 255);
5152 match(ConI);
5153
5154 format %{ %}
5155 interface(CONST_INTER);
5156 %}
5157
5158 // Constant for short-wide masking
5159 operand immI_65535()
5160 %{
5161 predicate(n->get_int() == 65535);
5162 match(ConI);
5163
5164 format %{ %}
5165 interface(CONST_INTER);
5166 %}
5167
5168 // Constant for byte-wide masking
5169 operand immL_255()
5170 %{
5171 predicate(n->get_long() == 255);
5172 match(ConL);
5173
5174 format %{ %}
5175 interface(CONST_INTER);
5176 %}
5177
5178 // Constant for short-wide masking
5179 operand immL_65535()
5180 %{
5181 predicate(n->get_long() == 65535);
5182 match(ConL);
5183
5184 format %{ %}
5185 interface(CONST_INTER);
5186 %}
5187
5188 operand kReg()
5189 %{
5190 constraint(ALLOC_IN_RC(vectmask_reg));
5191 match(RegVectMask);
5192 format %{%}
5193 interface(REG_INTER);
5194 %}
5195
5196 // Register Operands
5197 // Integer Register
5198 operand rRegI()
5199 %{
5200 constraint(ALLOC_IN_RC(int_reg));
5201 match(RegI);
5202
5203 match(rax_RegI);
5204 match(rbx_RegI);
5205 match(rcx_RegI);
5206 match(rdx_RegI);
5207 match(rdi_RegI);
5208
5209 format %{ %}
5210 interface(REG_INTER);
5211 %}
5212
5213 // Special Registers
5214 operand rax_RegI()
5215 %{
5216 constraint(ALLOC_IN_RC(int_rax_reg));
5217 match(RegI);
5218 match(rRegI);
5219
5220 format %{ "RAX" %}
5221 interface(REG_INTER);
5222 %}
5223
5224 // Special Registers
5225 operand rbx_RegI()
5226 %{
5227 constraint(ALLOC_IN_RC(int_rbx_reg));
5228 match(RegI);
5229 match(rRegI);
5230
5231 format %{ "RBX" %}
5232 interface(REG_INTER);
5233 %}
5234
5235 operand rcx_RegI()
5236 %{
5237 constraint(ALLOC_IN_RC(int_rcx_reg));
5238 match(RegI);
5239 match(rRegI);
5240
5241 format %{ "RCX" %}
5242 interface(REG_INTER);
5243 %}
5244
5245 operand rdx_RegI()
5246 %{
5247 constraint(ALLOC_IN_RC(int_rdx_reg));
5248 match(RegI);
5249 match(rRegI);
5250
5251 format %{ "RDX" %}
5252 interface(REG_INTER);
5253 %}
5254
5255 operand rdi_RegI()
5256 %{
5257 constraint(ALLOC_IN_RC(int_rdi_reg));
5258 match(RegI);
5259 match(rRegI);
5260
5261 format %{ "RDI" %}
5262 interface(REG_INTER);
5263 %}
5264
5265 operand no_rax_rdx_RegI()
5266 %{
5267 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5268 match(RegI);
5269 match(rbx_RegI);
5270 match(rcx_RegI);
5271 match(rdi_RegI);
5272
5273 format %{ %}
5274 interface(REG_INTER);
5275 %}
5276
5277 operand no_rbp_r13_RegI()
5278 %{
5279 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5280 match(RegI);
5281 match(rRegI);
5282 match(rax_RegI);
5283 match(rbx_RegI);
5284 match(rcx_RegI);
5285 match(rdx_RegI);
5286 match(rdi_RegI);
5287
5288 format %{ %}
5289 interface(REG_INTER);
5290 %}
5291
5292 // Pointer Register
5293 operand any_RegP()
5294 %{
5295 constraint(ALLOC_IN_RC(any_reg));
5296 match(RegP);
5297 match(rax_RegP);
5298 match(rbx_RegP);
5299 match(rdi_RegP);
5300 match(rsi_RegP);
5301 match(rbp_RegP);
5302 match(r15_RegP);
5303 match(rRegP);
5304
5305 format %{ %}
5306 interface(REG_INTER);
5307 %}
5308
5309 operand rRegP()
5310 %{
5311 constraint(ALLOC_IN_RC(ptr_reg));
5312 match(RegP);
5313 match(rax_RegP);
5314 match(rbx_RegP);
5315 match(rdi_RegP);
5316 match(rsi_RegP);
5317 match(rbp_RegP); // See Q&A below about
5318 match(r15_RegP); // r15_RegP and rbp_RegP.
5319
5320 format %{ %}
5321 interface(REG_INTER);
5322 %}
5323
5324 operand rRegN() %{
5325 constraint(ALLOC_IN_RC(int_reg));
5326 match(RegN);
5327
5328 format %{ %}
5329 interface(REG_INTER);
5330 %}
5331
5332 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5333 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5334 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5335 // The output of an instruction is controlled by the allocator, which respects
5336 // register class masks, not match rules. Unless an instruction mentions
5337 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5338 // by the allocator as an input.
5339 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5340 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5341 // result, RBP is not included in the output of the instruction either.
5342
5343 // This operand is not allowed to use RBP even if
5344 // RBP is not used to hold the frame pointer.
5345 operand no_rbp_RegP()
5346 %{
5347 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5348 match(RegP);
5349 match(rbx_RegP);
5350 match(rsi_RegP);
5351 match(rdi_RegP);
5352
5353 format %{ %}
5354 interface(REG_INTER);
5355 %}
5356
5357 // Special Registers
5358 // Return a pointer value
5359 operand rax_RegP()
5360 %{
5361 constraint(ALLOC_IN_RC(ptr_rax_reg));
5362 match(RegP);
5363 match(rRegP);
5364
5365 format %{ %}
5366 interface(REG_INTER);
5367 %}
5368
5369 // Special Registers
5370 // Return a compressed pointer value
5371 operand rax_RegN()
5372 %{
5373 constraint(ALLOC_IN_RC(int_rax_reg));
5374 match(RegN);
5375 match(rRegN);
5376
5377 format %{ %}
5378 interface(REG_INTER);
5379 %}
5380
5381 // Used in AtomicAdd
5382 operand rbx_RegP()
5383 %{
5384 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5385 match(RegP);
5386 match(rRegP);
5387
5388 format %{ %}
5389 interface(REG_INTER);
5390 %}
5391
5392 operand rsi_RegP()
5393 %{
5394 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5395 match(RegP);
5396 match(rRegP);
5397
5398 format %{ %}
5399 interface(REG_INTER);
5400 %}
5401
5402 operand rbp_RegP()
5403 %{
5404 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5405 match(RegP);
5406 match(rRegP);
5407
5408 format %{ %}
5409 interface(REG_INTER);
5410 %}
5411
5412 // Used in rep stosq
5413 operand rdi_RegP()
5414 %{
5415 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5416 match(RegP);
5417 match(rRegP);
5418
5419 format %{ %}
5420 interface(REG_INTER);
5421 %}
5422
5423 operand r15_RegP()
5424 %{
5425 constraint(ALLOC_IN_RC(ptr_r15_reg));
5426 match(RegP);
5427 match(rRegP);
5428
5429 format %{ %}
5430 interface(REG_INTER);
5431 %}
5432
5433 operand rRegL()
5434 %{
5435 constraint(ALLOC_IN_RC(long_reg));
5436 match(RegL);
5437 match(rax_RegL);
5438 match(rdx_RegL);
5439
5440 format %{ %}
5441 interface(REG_INTER);
5442 %}
5443
5444 // Special Registers
5445 operand no_rax_rdx_RegL()
5446 %{
5447 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5448 match(RegL);
5449 match(rRegL);
5450
5451 format %{ %}
5452 interface(REG_INTER);
5453 %}
5454
5455 operand rax_RegL()
5456 %{
5457 constraint(ALLOC_IN_RC(long_rax_reg));
5458 match(RegL);
5459 match(rRegL);
5460
5461 format %{ "RAX" %}
5462 interface(REG_INTER);
5463 %}
5464
5465 operand rcx_RegL()
5466 %{
5467 constraint(ALLOC_IN_RC(long_rcx_reg));
5468 match(RegL);
5469 match(rRegL);
5470
5471 format %{ %}
5472 interface(REG_INTER);
5473 %}
5474
5475 operand rdx_RegL()
5476 %{
5477 constraint(ALLOC_IN_RC(long_rdx_reg));
5478 match(RegL);
5479 match(rRegL);
5480
5481 format %{ %}
5482 interface(REG_INTER);
5483 %}
5484
5485 operand r11_RegL()
5486 %{
5487 constraint(ALLOC_IN_RC(long_r11_reg));
5488 match(RegL);
5489 match(rRegL);
5490
5491 format %{ %}
5492 interface(REG_INTER);
5493 %}
5494
5495 operand no_rbp_r13_RegL()
5496 %{
5497 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5498 match(RegL);
5499 match(rRegL);
5500 match(rax_RegL);
5501 match(rcx_RegL);
5502 match(rdx_RegL);
5503
5504 format %{ %}
5505 interface(REG_INTER);
5506 %}
5507
5508 // Flags register, used as output of compare instructions
5509 operand rFlagsReg()
5510 %{
5511 constraint(ALLOC_IN_RC(int_flags));
5512 match(RegFlags);
5513
5514 format %{ "RFLAGS" %}
5515 interface(REG_INTER);
5516 %}
5517
5518 // Flags register, used as output of FLOATING POINT compare instructions
5519 operand rFlagsRegU()
5520 %{
5521 constraint(ALLOC_IN_RC(int_flags));
5522 match(RegFlags);
5523
5524 format %{ "RFLAGS_U" %}
5525 interface(REG_INTER);
5526 %}
5527
5528 operand rFlagsRegUCF() %{
5529 constraint(ALLOC_IN_RC(int_flags));
5530 match(RegFlags);
5531 predicate(false);
5532
5533 format %{ "RFLAGS_U_CF" %}
5534 interface(REG_INTER);
5535 %}
5536
5537 // Float register operands
5538 operand regF() %{
5539 constraint(ALLOC_IN_RC(float_reg));
5540 match(RegF);
5541
5542 format %{ %}
5543 interface(REG_INTER);
5544 %}
5545
5546 // Float register operands
5547 operand legRegF() %{
5548 constraint(ALLOC_IN_RC(float_reg_legacy));
5549 match(RegF);
5550
5551 format %{ %}
5552 interface(REG_INTER);
5553 %}
5554
5555 // Float register operands
5556 operand vlRegF() %{
5557 constraint(ALLOC_IN_RC(float_reg_vl));
5558 match(RegF);
5559
5560 format %{ %}
5561 interface(REG_INTER);
5562 %}
5563
5564 // Double register operands
5565 operand regD() %{
5566 constraint(ALLOC_IN_RC(double_reg));
5567 match(RegD);
5568
5569 format %{ %}
5570 interface(REG_INTER);
5571 %}
5572
5573 // Double register operands
5574 operand legRegD() %{
5575 constraint(ALLOC_IN_RC(double_reg_legacy));
5576 match(RegD);
5577
5578 format %{ %}
5579 interface(REG_INTER);
5580 %}
5581
5582 // Double register operands
5583 operand vlRegD() %{
5584 constraint(ALLOC_IN_RC(double_reg_vl));
5585 match(RegD);
5586
5587 format %{ %}
5588 interface(REG_INTER);
5589 %}
5590
5591 //----------Memory Operands----------------------------------------------------
5592 // Direct Memory Operand
5593 // operand direct(immP addr)
5594 // %{
5595 // match(addr);
5596
5597 // format %{ "[$addr]" %}
5598 // interface(MEMORY_INTER) %{
5599 // base(0xFFFFFFFF);
5600 // index(0x4);
5601 // scale(0x0);
5602 // disp($addr);
5603 // %}
5604 // %}
5605
5606 // Indirect Memory Operand
5607 operand indirect(any_RegP reg)
5608 %{
5609 constraint(ALLOC_IN_RC(ptr_reg));
5610 match(reg);
5611
5612 format %{ "[$reg]" %}
5613 interface(MEMORY_INTER) %{
5614 base($reg);
5615 index(0x4);
5616 scale(0x0);
5617 disp(0x0);
5618 %}
5619 %}
5620
5621 // Indirect Memory Plus Short Offset Operand
5622 operand indOffset8(any_RegP reg, immL8 off)
5623 %{
5624 constraint(ALLOC_IN_RC(ptr_reg));
5625 match(AddP reg off);
5626
5627 format %{ "[$reg + $off (8-bit)]" %}
5628 interface(MEMORY_INTER) %{
5629 base($reg);
5630 index(0x4);
5631 scale(0x0);
5632 disp($off);
5633 %}
5634 %}
5635
5636 // Indirect Memory Plus Long Offset Operand
5637 operand indOffset32(any_RegP reg, immL32 off)
5638 %{
5639 constraint(ALLOC_IN_RC(ptr_reg));
5640 match(AddP reg off);
5641
5642 format %{ "[$reg + $off (32-bit)]" %}
5643 interface(MEMORY_INTER) %{
5644 base($reg);
5645 index(0x4);
5646 scale(0x0);
5647 disp($off);
5648 %}
5649 %}
5650
5651 // Indirect Memory Plus Index Register Plus Offset Operand
5652 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5653 %{
5654 constraint(ALLOC_IN_RC(ptr_reg));
5655 match(AddP (AddP reg lreg) off);
5656
5657 op_cost(10);
5658 format %{"[$reg + $off + $lreg]" %}
5659 interface(MEMORY_INTER) %{
5660 base($reg);
5661 index($lreg);
5662 scale(0x0);
5663 disp($off);
5664 %}
5665 %}
5666
5667 // Indirect Memory Plus Index Register Plus Offset Operand
5668 operand indIndex(any_RegP reg, rRegL lreg)
5669 %{
5670 constraint(ALLOC_IN_RC(ptr_reg));
5671 match(AddP reg lreg);
5672
5673 op_cost(10);
5674 format %{"[$reg + $lreg]" %}
5675 interface(MEMORY_INTER) %{
5676 base($reg);
5677 index($lreg);
5678 scale(0x0);
5679 disp(0x0);
5680 %}
5681 %}
5682
5683 // Indirect Memory Times Scale Plus Index Register
5684 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5685 %{
5686 constraint(ALLOC_IN_RC(ptr_reg));
5687 match(AddP reg (LShiftL lreg scale));
5688
5689 op_cost(10);
5690 format %{"[$reg + $lreg << $scale]" %}
5691 interface(MEMORY_INTER) %{
5692 base($reg);
5693 index($lreg);
5694 scale($scale);
5695 disp(0x0);
5696 %}
5697 %}
5698
5699 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5700 %{
5701 constraint(ALLOC_IN_RC(ptr_reg));
5702 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5703 match(AddP reg (LShiftL (ConvI2L idx) scale));
5704
5705 op_cost(10);
5706 format %{"[$reg + pos $idx << $scale]" %}
5707 interface(MEMORY_INTER) %{
5708 base($reg);
5709 index($idx);
5710 scale($scale);
5711 disp(0x0);
5712 %}
5713 %}
5714
5715 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5716 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5717 %{
5718 constraint(ALLOC_IN_RC(ptr_reg));
5719 match(AddP (AddP reg (LShiftL lreg scale)) off);
5720
5721 op_cost(10);
5722 format %{"[$reg + $off + $lreg << $scale]" %}
5723 interface(MEMORY_INTER) %{
5724 base($reg);
5725 index($lreg);
5726 scale($scale);
5727 disp($off);
5728 %}
5729 %}
5730
5731 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5732 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5733 %{
5734 constraint(ALLOC_IN_RC(ptr_reg));
5735 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5736 match(AddP (AddP reg (ConvI2L idx)) off);
5737
5738 op_cost(10);
5739 format %{"[$reg + $off + $idx]" %}
5740 interface(MEMORY_INTER) %{
5741 base($reg);
5742 index($idx);
5743 scale(0x0);
5744 disp($off);
5745 %}
5746 %}
5747
5748 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5749 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5750 %{
5751 constraint(ALLOC_IN_RC(ptr_reg));
5752 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5753 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5754
5755 op_cost(10);
5756 format %{"[$reg + $off + $idx << $scale]" %}
5757 interface(MEMORY_INTER) %{
5758 base($reg);
5759 index($idx);
5760 scale($scale);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Narrow Oop Plus Offset Operand
5766 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5767 // we can't free r12 even with CompressedOops::base() == nullptr.
5768 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5769 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5770 constraint(ALLOC_IN_RC(ptr_reg));
5771 match(AddP (DecodeN reg) off);
5772
5773 op_cost(10);
5774 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5775 interface(MEMORY_INTER) %{
5776 base(0xc); // R12
5777 index($reg);
5778 scale(0x3);
5779 disp($off);
5780 %}
5781 %}
5782
5783 // Indirect Memory Operand
5784 operand indirectNarrow(rRegN reg)
5785 %{
5786 predicate(CompressedOops::shift() == 0);
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(DecodeN reg);
5789
5790 format %{ "[$reg]" %}
5791 interface(MEMORY_INTER) %{
5792 base($reg);
5793 index(0x4);
5794 scale(0x0);
5795 disp(0x0);
5796 %}
5797 %}
5798
5799 // Indirect Memory Plus Short Offset Operand
5800 operand indOffset8Narrow(rRegN reg, immL8 off)
5801 %{
5802 predicate(CompressedOops::shift() == 0);
5803 constraint(ALLOC_IN_RC(ptr_reg));
5804 match(AddP (DecodeN reg) off);
5805
5806 format %{ "[$reg + $off (8-bit)]" %}
5807 interface(MEMORY_INTER) %{
5808 base($reg);
5809 index(0x4);
5810 scale(0x0);
5811 disp($off);
5812 %}
5813 %}
5814
5815 // Indirect Memory Plus Long Offset Operand
5816 operand indOffset32Narrow(rRegN reg, immL32 off)
5817 %{
5818 predicate(CompressedOops::shift() == 0);
5819 constraint(ALLOC_IN_RC(ptr_reg));
5820 match(AddP (DecodeN reg) off);
5821
5822 format %{ "[$reg + $off (32-bit)]" %}
5823 interface(MEMORY_INTER) %{
5824 base($reg);
5825 index(0x4);
5826 scale(0x0);
5827 disp($off);
5828 %}
5829 %}
5830
5831 // Indirect Memory Plus Index Register Plus Offset Operand
5832 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5833 %{
5834 predicate(CompressedOops::shift() == 0);
5835 constraint(ALLOC_IN_RC(ptr_reg));
5836 match(AddP (AddP (DecodeN reg) lreg) off);
5837
5838 op_cost(10);
5839 format %{"[$reg + $off + $lreg]" %}
5840 interface(MEMORY_INTER) %{
5841 base($reg);
5842 index($lreg);
5843 scale(0x0);
5844 disp($off);
5845 %}
5846 %}
5847
5848 // Indirect Memory Plus Index Register Plus Offset Operand
5849 operand indIndexNarrow(rRegN reg, rRegL lreg)
5850 %{
5851 predicate(CompressedOops::shift() == 0);
5852 constraint(ALLOC_IN_RC(ptr_reg));
5853 match(AddP (DecodeN reg) lreg);
5854
5855 op_cost(10);
5856 format %{"[$reg + $lreg]" %}
5857 interface(MEMORY_INTER) %{
5858 base($reg);
5859 index($lreg);
5860 scale(0x0);
5861 disp(0x0);
5862 %}
5863 %}
5864
5865 // Indirect Memory Times Scale Plus Index Register
5866 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5867 %{
5868 predicate(CompressedOops::shift() == 0);
5869 constraint(ALLOC_IN_RC(ptr_reg));
5870 match(AddP (DecodeN reg) (LShiftL lreg scale));
5871
5872 op_cost(10);
5873 format %{"[$reg + $lreg << $scale]" %}
5874 interface(MEMORY_INTER) %{
5875 base($reg);
5876 index($lreg);
5877 scale($scale);
5878 disp(0x0);
5879 %}
5880 %}
5881
5882 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5883 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5884 %{
5885 predicate(CompressedOops::shift() == 0);
5886 constraint(ALLOC_IN_RC(ptr_reg));
5887 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5888
5889 op_cost(10);
5890 format %{"[$reg + $off + $lreg << $scale]" %}
5891 interface(MEMORY_INTER) %{
5892 base($reg);
5893 index($lreg);
5894 scale($scale);
5895 disp($off);
5896 %}
5897 %}
5898
5899 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5900 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5901 %{
5902 constraint(ALLOC_IN_RC(ptr_reg));
5903 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5904 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5905
5906 op_cost(10);
5907 format %{"[$reg + $off + $idx]" %}
5908 interface(MEMORY_INTER) %{
5909 base($reg);
5910 index($idx);
5911 scale(0x0);
5912 disp($off);
5913 %}
5914 %}
5915
5916 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5917 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5918 %{
5919 constraint(ALLOC_IN_RC(ptr_reg));
5920 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5921 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5922
5923 op_cost(10);
5924 format %{"[$reg + $off + $idx << $scale]" %}
5925 interface(MEMORY_INTER) %{
5926 base($reg);
5927 index($idx);
5928 scale($scale);
5929 disp($off);
5930 %}
5931 %}
5932
5933 //----------Special Memory Operands--------------------------------------------
5934 // Stack Slot Operand - This operand is used for loading and storing temporary
5935 // values on the stack where a match requires a value to
5936 // flow through memory.
5937 operand stackSlotP(sRegP reg)
5938 %{
5939 constraint(ALLOC_IN_RC(stack_slots));
5940 // No match rule because this operand is only generated in matching
5941
5942 format %{ "[$reg]" %}
5943 interface(MEMORY_INTER) %{
5944 base(0x4); // RSP
5945 index(0x4); // No Index
5946 scale(0x0); // No Scale
5947 disp($reg); // Stack Offset
5948 %}
5949 %}
5950
5951 operand stackSlotI(sRegI reg)
5952 %{
5953 constraint(ALLOC_IN_RC(stack_slots));
5954 // No match rule because this operand is only generated in matching
5955
5956 format %{ "[$reg]" %}
5957 interface(MEMORY_INTER) %{
5958 base(0x4); // RSP
5959 index(0x4); // No Index
5960 scale(0x0); // No Scale
5961 disp($reg); // Stack Offset
5962 %}
5963 %}
5964
5965 operand stackSlotF(sRegF reg)
5966 %{
5967 constraint(ALLOC_IN_RC(stack_slots));
5968 // No match rule because this operand is only generated in matching
5969
5970 format %{ "[$reg]" %}
5971 interface(MEMORY_INTER) %{
5972 base(0x4); // RSP
5973 index(0x4); // No Index
5974 scale(0x0); // No Scale
5975 disp($reg); // Stack Offset
5976 %}
5977 %}
5978
5979 operand stackSlotD(sRegD reg)
5980 %{
5981 constraint(ALLOC_IN_RC(stack_slots));
5982 // No match rule because this operand is only generated in matching
5983
5984 format %{ "[$reg]" %}
5985 interface(MEMORY_INTER) %{
5986 base(0x4); // RSP
5987 index(0x4); // No Index
5988 scale(0x0); // No Scale
5989 disp($reg); // Stack Offset
5990 %}
5991 %}
5992 operand stackSlotL(sRegL reg)
5993 %{
5994 constraint(ALLOC_IN_RC(stack_slots));
5995 // No match rule because this operand is only generated in matching
5996
5997 format %{ "[$reg]" %}
5998 interface(MEMORY_INTER) %{
5999 base(0x4); // RSP
6000 index(0x4); // No Index
6001 scale(0x0); // No Scale
6002 disp($reg); // Stack Offset
6003 %}
6004 %}
6005
6006 //----------Conditional Branch Operands----------------------------------------
6007 // Comparison Op - This is the operation of the comparison, and is limited to
6008 // the following set of codes:
6009 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6010 //
6011 // Other attributes of the comparison, such as unsignedness, are specified
6012 // by the comparison instruction that sets a condition code flags register.
6013 // That result is represented by a flags operand whose subtype is appropriate
6014 // to the unsignedness (etc.) of the comparison.
6015 //
6016 // Later, the instruction which matches both the Comparison Op (a Bool) and
6017 // the flags (produced by the Cmp) specifies the coding of the comparison op
6018 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6019
6020 // Comparison Code
6021 operand cmpOp()
6022 %{
6023 match(Bool);
6024
6025 format %{ "" %}
6026 interface(COND_INTER) %{
6027 equal(0x4, "e");
6028 not_equal(0x5, "ne");
6029 less(0xC, "l");
6030 greater_equal(0xD, "ge");
6031 less_equal(0xE, "le");
6032 greater(0xF, "g");
6033 overflow(0x0, "o");
6034 no_overflow(0x1, "no");
6035 %}
6036 %}
6037
6038 // Comparison Code, unsigned compare. Used by FP also, with
6039 // C2 (unordered) turned into GT or LT already. The other bits
6040 // C0 and C3 are turned into Carry & Zero flags.
6041 operand cmpOpU()
6042 %{
6043 match(Bool);
6044
6045 format %{ "" %}
6046 interface(COND_INTER) %{
6047 equal(0x4, "e");
6048 not_equal(0x5, "ne");
6049 less(0x2, "b");
6050 greater_equal(0x3, "ae");
6051 less_equal(0x6, "be");
6052 greater(0x7, "a");
6053 overflow(0x0, "o");
6054 no_overflow(0x1, "no");
6055 %}
6056 %}
6057
6058
6059 // Floating comparisons that don't require any fixup for the unordered case,
6060 // If both inputs of the comparison are the same, ZF is always set so we
6061 // don't need to use cmpOpUCF2 for eq/ne
6062 operand cmpOpUCF() %{
6063 match(Bool);
6064 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6065 n->as_Bool()->_test._test == BoolTest::ge ||
6066 n->as_Bool()->_test._test == BoolTest::le ||
6067 n->as_Bool()->_test._test == BoolTest::gt ||
6068 n->in(1)->in(1) == n->in(1)->in(2));
6069 format %{ "" %}
6070 interface(COND_INTER) %{
6071 equal(0xb, "np");
6072 not_equal(0xa, "p");
6073 less(0x2, "b");
6074 greater_equal(0x3, "ae");
6075 less_equal(0x6, "be");
6076 greater(0x7, "a");
6077 overflow(0x0, "o");
6078 no_overflow(0x1, "no");
6079 %}
6080 %}
6081
6082
6083 // Floating comparisons that can be fixed up with extra conditional jumps
6084 operand cmpOpUCF2() %{
6085 match(Bool);
6086 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6087 n->as_Bool()->_test._test == BoolTest::eq) &&
6088 n->in(1)->in(1) != n->in(1)->in(2));
6089 format %{ "" %}
6090 interface(COND_INTER) %{
6091 equal(0x4, "e");
6092 not_equal(0x5, "ne");
6093 less(0x2, "b");
6094 greater_equal(0x3, "ae");
6095 less_equal(0x6, "be");
6096 greater(0x7, "a");
6097 overflow(0x0, "o");
6098 no_overflow(0x1, "no");
6099 %}
6100 %}
6101
6102 // Operands for bound floating pointer register arguments
6103 operand rxmm0() %{
6104 constraint(ALLOC_IN_RC(xmm0_reg));
6105 match(VecX);
6106 format%{%}
6107 interface(REG_INTER);
6108 %}
6109
6110 // Vectors
6111
6112 // Dummy generic vector class. Should be used for all vector operands.
6113 // Replaced with vec[SDXYZ] during post-selection pass.
6114 operand vec() %{
6115 constraint(ALLOC_IN_RC(dynamic));
6116 match(VecX);
6117 match(VecY);
6118 match(VecZ);
6119 match(VecS);
6120 match(VecD);
6121
6122 format %{ %}
6123 interface(REG_INTER);
6124 %}
6125
6126 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6127 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6128 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6129 // runtime code generation via reg_class_dynamic.
6130 operand legVec() %{
6131 constraint(ALLOC_IN_RC(dynamic));
6132 match(VecX);
6133 match(VecY);
6134 match(VecZ);
6135 match(VecS);
6136 match(VecD);
6137
6138 format %{ %}
6139 interface(REG_INTER);
6140 %}
6141
6142 // Replaces vec during post-selection cleanup. See above.
6143 operand vecS() %{
6144 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6145 match(VecS);
6146
6147 format %{ %}
6148 interface(REG_INTER);
6149 %}
6150
6151 // Replaces legVec during post-selection cleanup. See above.
6152 operand legVecS() %{
6153 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6154 match(VecS);
6155
6156 format %{ %}
6157 interface(REG_INTER);
6158 %}
6159
6160 // Replaces vec during post-selection cleanup. See above.
6161 operand vecD() %{
6162 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6163 match(VecD);
6164
6165 format %{ %}
6166 interface(REG_INTER);
6167 %}
6168
6169 // Replaces legVec during post-selection cleanup. See above.
6170 operand legVecD() %{
6171 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6172 match(VecD);
6173
6174 format %{ %}
6175 interface(REG_INTER);
6176 %}
6177
6178 // Replaces vec during post-selection cleanup. See above.
6179 operand vecX() %{
6180 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6181 match(VecX);
6182
6183 format %{ %}
6184 interface(REG_INTER);
6185 %}
6186
6187 // Replaces legVec during post-selection cleanup. See above.
6188 operand legVecX() %{
6189 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6190 match(VecX);
6191
6192 format %{ %}
6193 interface(REG_INTER);
6194 %}
6195
6196 // Replaces vec during post-selection cleanup. See above.
6197 operand vecY() %{
6198 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6199 match(VecY);
6200
6201 format %{ %}
6202 interface(REG_INTER);
6203 %}
6204
6205 // Replaces legVec during post-selection cleanup. See above.
6206 operand legVecY() %{
6207 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6208 match(VecY);
6209
6210 format %{ %}
6211 interface(REG_INTER);
6212 %}
6213
6214 // Replaces vec during post-selection cleanup. See above.
6215 operand vecZ() %{
6216 constraint(ALLOC_IN_RC(vectorz_reg));
6217 match(VecZ);
6218
6219 format %{ %}
6220 interface(REG_INTER);
6221 %}
6222
6223 // Replaces legVec during post-selection cleanup. See above.
6224 operand legVecZ() %{
6225 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6226 match(VecZ);
6227
6228 format %{ %}
6229 interface(REG_INTER);
6230 %}
6231
6232 //----------OPERAND CLASSES----------------------------------------------------
6233 // Operand Classes are groups of operands that are used as to simplify
6234 // instruction definitions by not requiring the AD writer to specify separate
6235 // instructions for every form of operand when the instruction accepts
6236 // multiple operand types with the same basic encoding and format. The classic
6237 // case of this is memory operands.
6238
6239 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6240 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6241 indCompressedOopOffset,
6242 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6243 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6244 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6245
6246 //----------PIPELINE-----------------------------------------------------------
6247 // Rules which define the behavior of the target architectures pipeline.
6248 pipeline %{
6249
6250 //----------ATTRIBUTES---------------------------------------------------------
6251 attributes %{
6252 variable_size_instructions; // Fixed size instructions
6253 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6254 instruction_unit_size = 1; // An instruction is 1 bytes long
6255 instruction_fetch_unit_size = 16; // The processor fetches one line
6256 instruction_fetch_units = 1; // of 16 bytes
6257 %}
6258
6259 //----------RESOURCES----------------------------------------------------------
6260 // Resources are the functional units available to the machine
6261
6262 // Generic P2/P3 pipeline
6263 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6264 // 3 instructions decoded per cycle.
6265 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6266 // 3 ALU op, only ALU0 handles mul instructions.
6267 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6268 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6269 BR, FPU,
6270 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6271
6272 //----------PIPELINE DESCRIPTION-----------------------------------------------
6273 // Pipeline Description specifies the stages in the machine's pipeline
6274
6275 // Generic P2/P3 pipeline
6276 pipe_desc(S0, S1, S2, S3, S4, S5);
6277
6278 //----------PIPELINE CLASSES---------------------------------------------------
6279 // Pipeline Classes describe the stages in which input and output are
6280 // referenced by the hardware pipeline.
6281
6282 // Naming convention: ialu or fpu
6283 // Then: _reg
6284 // Then: _reg if there is a 2nd register
6285 // Then: _long if it's a pair of instructions implementing a long
6286 // Then: _fat if it requires the big decoder
6287 // Or: _mem if it requires the big decoder and a memory unit.
6288
6289 // Integer ALU reg operation
6290 pipe_class ialu_reg(rRegI dst)
6291 %{
6292 single_instruction;
6293 dst : S4(write);
6294 dst : S3(read);
6295 DECODE : S0; // any decoder
6296 ALU : S3; // any alu
6297 %}
6298
6299 // Long ALU reg operation
6300 pipe_class ialu_reg_long(rRegL dst)
6301 %{
6302 instruction_count(2);
6303 dst : S4(write);
6304 dst : S3(read);
6305 DECODE : S0(2); // any 2 decoders
6306 ALU : S3(2); // both alus
6307 %}
6308
6309 // Integer ALU reg operation using big decoder
6310 pipe_class ialu_reg_fat(rRegI dst)
6311 %{
6312 single_instruction;
6313 dst : S4(write);
6314 dst : S3(read);
6315 D0 : S0; // big decoder only
6316 ALU : S3; // any alu
6317 %}
6318
6319 // Integer ALU reg-reg operation
6320 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6321 %{
6322 single_instruction;
6323 dst : S4(write);
6324 src : S3(read);
6325 DECODE : S0; // any decoder
6326 ALU : S3; // any alu
6327 %}
6328
6329 // Integer ALU reg-reg operation
6330 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6331 %{
6332 single_instruction;
6333 dst : S4(write);
6334 src : S3(read);
6335 D0 : S0; // big decoder only
6336 ALU : S3; // any alu
6337 %}
6338
6339 // Integer ALU reg-mem operation
6340 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6341 %{
6342 single_instruction;
6343 dst : S5(write);
6344 mem : S3(read);
6345 D0 : S0; // big decoder only
6346 ALU : S4; // any alu
6347 MEM : S3; // any mem
6348 %}
6349
6350 // Integer mem operation (prefetch)
6351 pipe_class ialu_mem(memory mem)
6352 %{
6353 single_instruction;
6354 mem : S3(read);
6355 D0 : S0; // big decoder only
6356 MEM : S3; // any mem
6357 %}
6358
6359 // Integer Store to Memory
6360 pipe_class ialu_mem_reg(memory mem, rRegI src)
6361 %{
6362 single_instruction;
6363 mem : S3(read);
6364 src : S5(read);
6365 D0 : S0; // big decoder only
6366 ALU : S4; // any alu
6367 MEM : S3;
6368 %}
6369
6370 // // Long Store to Memory
6371 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6372 // %{
6373 // instruction_count(2);
6374 // mem : S3(read);
6375 // src : S5(read);
6376 // D0 : S0(2); // big decoder only; twice
6377 // ALU : S4(2); // any 2 alus
6378 // MEM : S3(2); // Both mems
6379 // %}
6380
6381 // Integer Store to Memory
6382 pipe_class ialu_mem_imm(memory mem)
6383 %{
6384 single_instruction;
6385 mem : S3(read);
6386 D0 : S0; // big decoder only
6387 ALU : S4; // any alu
6388 MEM : S3;
6389 %}
6390
6391 // Integer ALU0 reg-reg operation
6392 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6393 %{
6394 single_instruction;
6395 dst : S4(write);
6396 src : S3(read);
6397 D0 : S0; // Big decoder only
6398 ALU0 : S3; // only alu0
6399 %}
6400
6401 // Integer ALU0 reg-mem operation
6402 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6403 %{
6404 single_instruction;
6405 dst : S5(write);
6406 mem : S3(read);
6407 D0 : S0; // big decoder only
6408 ALU0 : S4; // ALU0 only
6409 MEM : S3; // any mem
6410 %}
6411
6412 // Integer ALU reg-reg operation
6413 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6414 %{
6415 single_instruction;
6416 cr : S4(write);
6417 src1 : S3(read);
6418 src2 : S3(read);
6419 DECODE : S0; // any decoder
6420 ALU : S3; // any alu
6421 %}
6422
6423 // Integer ALU reg-imm operation
6424 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6425 %{
6426 single_instruction;
6427 cr : S4(write);
6428 src1 : S3(read);
6429 DECODE : S0; // any decoder
6430 ALU : S3; // any alu
6431 %}
6432
6433 // Integer ALU reg-mem operation
6434 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6435 %{
6436 single_instruction;
6437 cr : S4(write);
6438 src1 : S3(read);
6439 src2 : S3(read);
6440 D0 : S0; // big decoder only
6441 ALU : S4; // any alu
6442 MEM : S3;
6443 %}
6444
6445 // Conditional move reg-reg
6446 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6447 %{
6448 instruction_count(4);
6449 y : S4(read);
6450 q : S3(read);
6451 p : S3(read);
6452 DECODE : S0(4); // any decoder
6453 %}
6454
6455 // Conditional move reg-reg
6456 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6457 %{
6458 single_instruction;
6459 dst : S4(write);
6460 src : S3(read);
6461 cr : S3(read);
6462 DECODE : S0; // any decoder
6463 %}
6464
6465 // Conditional move reg-mem
6466 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6467 %{
6468 single_instruction;
6469 dst : S4(write);
6470 src : S3(read);
6471 cr : S3(read);
6472 DECODE : S0; // any decoder
6473 MEM : S3;
6474 %}
6475
6476 // Conditional move reg-reg long
6477 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6478 %{
6479 single_instruction;
6480 dst : S4(write);
6481 src : S3(read);
6482 cr : S3(read);
6483 DECODE : S0(2); // any 2 decoders
6484 %}
6485
6486 // Float reg-reg operation
6487 pipe_class fpu_reg(regD dst)
6488 %{
6489 instruction_count(2);
6490 dst : S3(read);
6491 DECODE : S0(2); // any 2 decoders
6492 FPU : S3;
6493 %}
6494
6495 // Float reg-reg operation
6496 pipe_class fpu_reg_reg(regD dst, regD src)
6497 %{
6498 instruction_count(2);
6499 dst : S4(write);
6500 src : S3(read);
6501 DECODE : S0(2); // any 2 decoders
6502 FPU : S3;
6503 %}
6504
6505 // Float reg-reg operation
6506 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6507 %{
6508 instruction_count(3);
6509 dst : S4(write);
6510 src1 : S3(read);
6511 src2 : S3(read);
6512 DECODE : S0(3); // any 3 decoders
6513 FPU : S3(2);
6514 %}
6515
6516 // Float reg-reg operation
6517 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6518 %{
6519 instruction_count(4);
6520 dst : S4(write);
6521 src1 : S3(read);
6522 src2 : S3(read);
6523 src3 : S3(read);
6524 DECODE : S0(4); // any 3 decoders
6525 FPU : S3(2);
6526 %}
6527
6528 // Float reg-reg operation
6529 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6530 %{
6531 instruction_count(4);
6532 dst : S4(write);
6533 src1 : S3(read);
6534 src2 : S3(read);
6535 src3 : S3(read);
6536 DECODE : S1(3); // any 3 decoders
6537 D0 : S0; // Big decoder only
6538 FPU : S3(2);
6539 MEM : S3;
6540 %}
6541
6542 // Float reg-mem operation
6543 pipe_class fpu_reg_mem(regD dst, memory mem)
6544 %{
6545 instruction_count(2);
6546 dst : S5(write);
6547 mem : S3(read);
6548 D0 : S0; // big decoder only
6549 DECODE : S1; // any decoder for FPU POP
6550 FPU : S4;
6551 MEM : S3; // any mem
6552 %}
6553
6554 // Float reg-mem operation
6555 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6556 %{
6557 instruction_count(3);
6558 dst : S5(write);
6559 src1 : S3(read);
6560 mem : S3(read);
6561 D0 : S0; // big decoder only
6562 DECODE : S1(2); // any decoder for FPU POP
6563 FPU : S4;
6564 MEM : S3; // any mem
6565 %}
6566
6567 // Float mem-reg operation
6568 pipe_class fpu_mem_reg(memory mem, regD src)
6569 %{
6570 instruction_count(2);
6571 src : S5(read);
6572 mem : S3(read);
6573 DECODE : S0; // any decoder for FPU PUSH
6574 D0 : S1; // big decoder only
6575 FPU : S4;
6576 MEM : S3; // any mem
6577 %}
6578
6579 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6580 %{
6581 instruction_count(3);
6582 src1 : S3(read);
6583 src2 : S3(read);
6584 mem : S3(read);
6585 DECODE : S0(2); // any decoder for FPU PUSH
6586 D0 : S1; // big decoder only
6587 FPU : S4;
6588 MEM : S3; // any mem
6589 %}
6590
6591 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6592 %{
6593 instruction_count(3);
6594 src1 : S3(read);
6595 src2 : S3(read);
6596 mem : S4(read);
6597 DECODE : S0; // any decoder for FPU PUSH
6598 D0 : S0(2); // big decoder only
6599 FPU : S4;
6600 MEM : S3(2); // any mem
6601 %}
6602
6603 pipe_class fpu_mem_mem(memory dst, memory src1)
6604 %{
6605 instruction_count(2);
6606 src1 : S3(read);
6607 dst : S4(read);
6608 D0 : S0(2); // big decoder only
6609 MEM : S3(2); // any mem
6610 %}
6611
6612 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6613 %{
6614 instruction_count(3);
6615 src1 : S3(read);
6616 src2 : S3(read);
6617 dst : S4(read);
6618 D0 : S0(3); // big decoder only
6619 FPU : S4;
6620 MEM : S3(3); // any mem
6621 %}
6622
6623 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6624 %{
6625 instruction_count(3);
6626 src1 : S4(read);
6627 mem : S4(read);
6628 DECODE : S0; // any decoder for FPU PUSH
6629 D0 : S0(2); // big decoder only
6630 FPU : S4;
6631 MEM : S3(2); // any mem
6632 %}
6633
6634 // Float load constant
6635 pipe_class fpu_reg_con(regD dst)
6636 %{
6637 instruction_count(2);
6638 dst : S5(write);
6639 D0 : S0; // big decoder only for the load
6640 DECODE : S1; // any decoder for FPU POP
6641 FPU : S4;
6642 MEM : S3; // any mem
6643 %}
6644
6645 // Float load constant
6646 pipe_class fpu_reg_reg_con(regD dst, regD src)
6647 %{
6648 instruction_count(3);
6649 dst : S5(write);
6650 src : S3(read);
6651 D0 : S0; // big decoder only for the load
6652 DECODE : S1(2); // any decoder for FPU POP
6653 FPU : S4;
6654 MEM : S3; // any mem
6655 %}
6656
6657 // UnConditional branch
6658 pipe_class pipe_jmp(label labl)
6659 %{
6660 single_instruction;
6661 BR : S3;
6662 %}
6663
6664 // Conditional branch
6665 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6666 %{
6667 single_instruction;
6668 cr : S1(read);
6669 BR : S3;
6670 %}
6671
6672 // Allocation idiom
6673 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6674 %{
6675 instruction_count(1); force_serialization;
6676 fixed_latency(6);
6677 heap_ptr : S3(read);
6678 DECODE : S0(3);
6679 D0 : S2;
6680 MEM : S3;
6681 ALU : S3(2);
6682 dst : S5(write);
6683 BR : S5;
6684 %}
6685
6686 // Generic big/slow expanded idiom
6687 pipe_class pipe_slow()
6688 %{
6689 instruction_count(10); multiple_bundles; force_serialization;
6690 fixed_latency(100);
6691 D0 : S0(2);
6692 MEM : S3(2);
6693 %}
6694
6695 // The real do-nothing guy
6696 pipe_class empty()
6697 %{
6698 instruction_count(0);
6699 %}
6700
6701 // Define the class for the Nop node
6702 define
6703 %{
6704 MachNop = empty;
6705 %}
6706
6707 %}
6708
6709 //----------INSTRUCTIONS-------------------------------------------------------
6710 //
6711 // match -- States which machine-independent subtree may be replaced
6712 // by this instruction.
6713 // ins_cost -- The estimated cost of this instruction is used by instruction
6714 // selection to identify a minimum cost tree of machine
6715 // instructions that matches a tree of machine-independent
6716 // instructions.
6717 // format -- A string providing the disassembly for this instruction.
6718 // The value of an instruction's operand may be inserted
6719 // by referring to it with a '$' prefix.
6720 // opcode -- Three instruction opcodes may be provided. These are referred
6721 // to within an encode class as $primary, $secondary, and $tertiary
6722 // rrspectively. The primary opcode is commonly used to
6723 // indicate the type of machine instruction, while secondary
6724 // and tertiary are often used for prefix options or addressing
6725 // modes.
6726 // ins_encode -- A list of encode classes with parameters. The encode class
6727 // name must have been defined in an 'enc_class' specification
6728 // in the encode section of the architecture description.
6729
6730 // ============================================================================
6731
6732 instruct ShouldNotReachHere() %{
6733 match(Halt);
6734 format %{ "stop\t# ShouldNotReachHere" %}
6735 ins_encode %{
6736 if (is_reachable()) {
6737 const char* str = __ code_string(_halt_reason);
6738 __ stop(str);
6739 }
6740 %}
6741 ins_pipe(pipe_slow);
6742 %}
6743
6744 // ============================================================================
6745
6746 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6747 // Load Float
6748 instruct MoveF2VL(vlRegF dst, regF src) %{
6749 match(Set dst src);
6750 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6751 ins_encode %{
6752 ShouldNotReachHere();
6753 %}
6754 ins_pipe( fpu_reg_reg );
6755 %}
6756
6757 // Load Float
6758 instruct MoveF2LEG(legRegF dst, regF src) %{
6759 match(Set dst src);
6760 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6761 ins_encode %{
6762 ShouldNotReachHere();
6763 %}
6764 ins_pipe( fpu_reg_reg );
6765 %}
6766
6767 // Load Float
6768 instruct MoveVL2F(regF dst, vlRegF src) %{
6769 match(Set dst src);
6770 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6771 ins_encode %{
6772 ShouldNotReachHere();
6773 %}
6774 ins_pipe( fpu_reg_reg );
6775 %}
6776
6777 // Load Float
6778 instruct MoveLEG2F(regF dst, legRegF src) %{
6779 match(Set dst src);
6780 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6781 ins_encode %{
6782 ShouldNotReachHere();
6783 %}
6784 ins_pipe( fpu_reg_reg );
6785 %}
6786
6787 // Load Double
6788 instruct MoveD2VL(vlRegD dst, regD src) %{
6789 match(Set dst src);
6790 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6791 ins_encode %{
6792 ShouldNotReachHere();
6793 %}
6794 ins_pipe( fpu_reg_reg );
6795 %}
6796
6797 // Load Double
6798 instruct MoveD2LEG(legRegD dst, regD src) %{
6799 match(Set dst src);
6800 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6801 ins_encode %{
6802 ShouldNotReachHere();
6803 %}
6804 ins_pipe( fpu_reg_reg );
6805 %}
6806
6807 // Load Double
6808 instruct MoveVL2D(regD dst, vlRegD src) %{
6809 match(Set dst src);
6810 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6811 ins_encode %{
6812 ShouldNotReachHere();
6813 %}
6814 ins_pipe( fpu_reg_reg );
6815 %}
6816
6817 // Load Double
6818 instruct MoveLEG2D(regD dst, legRegD src) %{
6819 match(Set dst src);
6820 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6821 ins_encode %{
6822 ShouldNotReachHere();
6823 %}
6824 ins_pipe( fpu_reg_reg );
6825 %}
6826
6827 //----------Load/Store/Move Instructions---------------------------------------
6828 //----------Load Instructions--------------------------------------------------
6829
6830 // Load Byte (8 bit signed)
6831 instruct loadB(rRegI dst, memory mem)
6832 %{
6833 match(Set dst (LoadB mem));
6834
6835 ins_cost(125);
6836 format %{ "movsbl $dst, $mem\t# byte" %}
6837
6838 ins_encode %{
6839 __ movsbl($dst$$Register, $mem$$Address);
6840 %}
6841
6842 ins_pipe(ialu_reg_mem);
6843 %}
6844
6845 // Load Byte (8 bit signed) into Long Register
6846 instruct loadB2L(rRegL dst, memory mem)
6847 %{
6848 match(Set dst (ConvI2L (LoadB mem)));
6849
6850 ins_cost(125);
6851 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6852
6853 ins_encode %{
6854 __ movsbq($dst$$Register, $mem$$Address);
6855 %}
6856
6857 ins_pipe(ialu_reg_mem);
6858 %}
6859
6860 // Load Unsigned Byte (8 bit UNsigned)
6861 instruct loadUB(rRegI dst, memory mem)
6862 %{
6863 match(Set dst (LoadUB mem));
6864
6865 ins_cost(125);
6866 format %{ "movzbl $dst, $mem\t# ubyte" %}
6867
6868 ins_encode %{
6869 __ movzbl($dst$$Register, $mem$$Address);
6870 %}
6871
6872 ins_pipe(ialu_reg_mem);
6873 %}
6874
6875 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6876 instruct loadUB2L(rRegL dst, memory mem)
6877 %{
6878 match(Set dst (ConvI2L (LoadUB mem)));
6879
6880 ins_cost(125);
6881 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6882
6883 ins_encode %{
6884 __ movzbq($dst$$Register, $mem$$Address);
6885 %}
6886
6887 ins_pipe(ialu_reg_mem);
6888 %}
6889
6890 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6891 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6892 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6893 effect(KILL cr);
6894
6895 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6896 "andl $dst, right_n_bits($mask, 8)" %}
6897 ins_encode %{
6898 Register Rdst = $dst$$Register;
6899 __ movzbq(Rdst, $mem$$Address);
6900 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6901 %}
6902 ins_pipe(ialu_reg_mem);
6903 %}
6904
6905 // Load Short (16 bit signed)
6906 instruct loadS(rRegI dst, memory mem)
6907 %{
6908 match(Set dst (LoadS mem));
6909
6910 ins_cost(125);
6911 format %{ "movswl $dst, $mem\t# short" %}
6912
6913 ins_encode %{
6914 __ movswl($dst$$Register, $mem$$Address);
6915 %}
6916
6917 ins_pipe(ialu_reg_mem);
6918 %}
6919
6920 // Load Short (16 bit signed) to Byte (8 bit signed)
6921 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6922 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6923
6924 ins_cost(125);
6925 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6926 ins_encode %{
6927 __ movsbl($dst$$Register, $mem$$Address);
6928 %}
6929 ins_pipe(ialu_reg_mem);
6930 %}
6931
6932 // Load Short (16 bit signed) into Long Register
6933 instruct loadS2L(rRegL dst, memory mem)
6934 %{
6935 match(Set dst (ConvI2L (LoadS mem)));
6936
6937 ins_cost(125);
6938 format %{ "movswq $dst, $mem\t# short -> long" %}
6939
6940 ins_encode %{
6941 __ movswq($dst$$Register, $mem$$Address);
6942 %}
6943
6944 ins_pipe(ialu_reg_mem);
6945 %}
6946
6947 // Load Unsigned Short/Char (16 bit UNsigned)
6948 instruct loadUS(rRegI dst, memory mem)
6949 %{
6950 match(Set dst (LoadUS mem));
6951
6952 ins_cost(125);
6953 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6954
6955 ins_encode %{
6956 __ movzwl($dst$$Register, $mem$$Address);
6957 %}
6958
6959 ins_pipe(ialu_reg_mem);
6960 %}
6961
6962 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6963 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6964 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6965
6966 ins_cost(125);
6967 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6968 ins_encode %{
6969 __ movsbl($dst$$Register, $mem$$Address);
6970 %}
6971 ins_pipe(ialu_reg_mem);
6972 %}
6973
6974 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6975 instruct loadUS2L(rRegL dst, memory mem)
6976 %{
6977 match(Set dst (ConvI2L (LoadUS mem)));
6978
6979 ins_cost(125);
6980 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6981
6982 ins_encode %{
6983 __ movzwq($dst$$Register, $mem$$Address);
6984 %}
6985
6986 ins_pipe(ialu_reg_mem);
6987 %}
6988
6989 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6990 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6991 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6992
6993 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6994 ins_encode %{
6995 __ movzbq($dst$$Register, $mem$$Address);
6996 %}
6997 ins_pipe(ialu_reg_mem);
6998 %}
6999
7000 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7001 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7002 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7003 effect(KILL cr);
7004
7005 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7006 "andl $dst, right_n_bits($mask, 16)" %}
7007 ins_encode %{
7008 Register Rdst = $dst$$Register;
7009 __ movzwq(Rdst, $mem$$Address);
7010 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7011 %}
7012 ins_pipe(ialu_reg_mem);
7013 %}
7014
7015 // Load Integer
7016 instruct loadI(rRegI dst, memory mem)
7017 %{
7018 match(Set dst (LoadI mem));
7019
7020 ins_cost(125);
7021 format %{ "movl $dst, $mem\t# int" %}
7022
7023 ins_encode %{
7024 __ movl($dst$$Register, $mem$$Address);
7025 %}
7026
7027 ins_pipe(ialu_reg_mem);
7028 %}
7029
7030 // Load Integer (32 bit signed) to Byte (8 bit signed)
7031 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7032 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7033
7034 ins_cost(125);
7035 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7036 ins_encode %{
7037 __ movsbl($dst$$Register, $mem$$Address);
7038 %}
7039 ins_pipe(ialu_reg_mem);
7040 %}
7041
7042 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7043 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7044 match(Set dst (AndI (LoadI mem) mask));
7045
7046 ins_cost(125);
7047 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7048 ins_encode %{
7049 __ movzbl($dst$$Register, $mem$$Address);
7050 %}
7051 ins_pipe(ialu_reg_mem);
7052 %}
7053
7054 // Load Integer (32 bit signed) to Short (16 bit signed)
7055 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7056 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7057
7058 ins_cost(125);
7059 format %{ "movswl $dst, $mem\t# int -> short" %}
7060 ins_encode %{
7061 __ movswl($dst$$Register, $mem$$Address);
7062 %}
7063 ins_pipe(ialu_reg_mem);
7064 %}
7065
7066 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7067 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7068 match(Set dst (AndI (LoadI mem) mask));
7069
7070 ins_cost(125);
7071 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7072 ins_encode %{
7073 __ movzwl($dst$$Register, $mem$$Address);
7074 %}
7075 ins_pipe(ialu_reg_mem);
7076 %}
7077
7078 // Load Integer into Long Register
7079 instruct loadI2L(rRegL dst, memory mem)
7080 %{
7081 match(Set dst (ConvI2L (LoadI mem)));
7082
7083 ins_cost(125);
7084 format %{ "movslq $dst, $mem\t# int -> long" %}
7085
7086 ins_encode %{
7087 __ movslq($dst$$Register, $mem$$Address);
7088 %}
7089
7090 ins_pipe(ialu_reg_mem);
7091 %}
7092
7093 // Load Integer with mask 0xFF into Long Register
7094 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7095 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7096
7097 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7098 ins_encode %{
7099 __ movzbq($dst$$Register, $mem$$Address);
7100 %}
7101 ins_pipe(ialu_reg_mem);
7102 %}
7103
7104 // Load Integer with mask 0xFFFF into Long Register
7105 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7106 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7107
7108 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7109 ins_encode %{
7110 __ movzwq($dst$$Register, $mem$$Address);
7111 %}
7112 ins_pipe(ialu_reg_mem);
7113 %}
7114
7115 // Load Integer with a 31-bit mask into Long Register
7116 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7117 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7118 effect(KILL cr);
7119
7120 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7121 "andl $dst, $mask" %}
7122 ins_encode %{
7123 Register Rdst = $dst$$Register;
7124 __ movl(Rdst, $mem$$Address);
7125 __ andl(Rdst, $mask$$constant);
7126 %}
7127 ins_pipe(ialu_reg_mem);
7128 %}
7129
7130 // Load Unsigned Integer into Long Register
7131 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7132 %{
7133 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7134
7135 ins_cost(125);
7136 format %{ "movl $dst, $mem\t# uint -> long" %}
7137
7138 ins_encode %{
7139 __ movl($dst$$Register, $mem$$Address);
7140 %}
7141
7142 ins_pipe(ialu_reg_mem);
7143 %}
7144
7145 // Load Long
7146 instruct loadL(rRegL dst, memory mem)
7147 %{
7148 match(Set dst (LoadL mem));
7149
7150 ins_cost(125);
7151 format %{ "movq $dst, $mem\t# long" %}
7152
7153 ins_encode %{
7154 __ movq($dst$$Register, $mem$$Address);
7155 %}
7156
7157 ins_pipe(ialu_reg_mem); // XXX
7158 %}
7159
7160 // Load Range
7161 instruct loadRange(rRegI dst, memory mem)
7162 %{
7163 match(Set dst (LoadRange mem));
7164
7165 ins_cost(125); // XXX
7166 format %{ "movl $dst, $mem\t# range" %}
7167 ins_encode %{
7168 __ movl($dst$$Register, $mem$$Address);
7169 %}
7170 ins_pipe(ialu_reg_mem);
7171 %}
7172
7173 // Load Pointer
7174 instruct loadP(rRegP dst, memory mem)
7175 %{
7176 match(Set dst (LoadP mem));
7177 predicate(n->as_Load()->barrier_data() == 0);
7178
7179 ins_cost(125); // XXX
7180 format %{ "movq $dst, $mem\t# ptr" %}
7181 ins_encode %{
7182 __ movq($dst$$Register, $mem$$Address);
7183 %}
7184 ins_pipe(ialu_reg_mem); // XXX
7185 %}
7186
7187 // Load Compressed Pointer
7188 instruct loadN(rRegN dst, memory mem)
7189 %{
7190 predicate(n->as_Load()->barrier_data() == 0);
7191 match(Set dst (LoadN mem));
7192
7193 ins_cost(125); // XXX
7194 format %{ "movl $dst, $mem\t# compressed ptr" %}
7195 ins_encode %{
7196 __ movl($dst$$Register, $mem$$Address);
7197 %}
7198 ins_pipe(ialu_reg_mem); // XXX
7199 %}
7200
7201
7202 // Load Klass Pointer
7203 instruct loadKlass(rRegP dst, memory mem)
7204 %{
7205 match(Set dst (LoadKlass mem));
7206
7207 ins_cost(125); // XXX
7208 format %{ "movq $dst, $mem\t# class" %}
7209 ins_encode %{
7210 __ movq($dst$$Register, $mem$$Address);
7211 %}
7212 ins_pipe(ialu_reg_mem); // XXX
7213 %}
7214
7215 // Load narrow Klass Pointer
7216 instruct loadNKlass(rRegN dst, memory mem)
7217 %{
7218 predicate(!UseCompactObjectHeaders);
7219 match(Set dst (LoadNKlass mem));
7220
7221 ins_cost(125); // XXX
7222 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7223 ins_encode %{
7224 __ movl($dst$$Register, $mem$$Address);
7225 %}
7226 ins_pipe(ialu_reg_mem); // XXX
7227 %}
7228
7229 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7230 %{
7231 predicate(UseCompactObjectHeaders);
7232 match(Set dst (LoadNKlass mem));
7233 effect(KILL cr);
7234 ins_cost(125);
7235 format %{
7236 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7237 "shrl $dst, markWord::klass_shift_at_offset"
7238 %}
7239 ins_encode %{
7240 if (UseAPX) {
7241 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7242 }
7243 else {
7244 __ movl($dst$$Register, $mem$$Address);
7245 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7246 }
7247 %}
7248 ins_pipe(ialu_reg_mem);
7249 %}
7250
7251 // Load Float
7252 instruct loadF(regF dst, memory mem)
7253 %{
7254 match(Set dst (LoadF mem));
7255
7256 ins_cost(145); // XXX
7257 format %{ "movss $dst, $mem\t# float" %}
7258 ins_encode %{
7259 __ movflt($dst$$XMMRegister, $mem$$Address);
7260 %}
7261 ins_pipe(pipe_slow); // XXX
7262 %}
7263
7264 // Load Double
7265 instruct loadD_partial(regD dst, memory mem)
7266 %{
7267 predicate(!UseXmmLoadAndClearUpper);
7268 match(Set dst (LoadD mem));
7269
7270 ins_cost(145); // XXX
7271 format %{ "movlpd $dst, $mem\t# double" %}
7272 ins_encode %{
7273 __ movdbl($dst$$XMMRegister, $mem$$Address);
7274 %}
7275 ins_pipe(pipe_slow); // XXX
7276 %}
7277
7278 instruct loadD(regD dst, memory mem)
7279 %{
7280 predicate(UseXmmLoadAndClearUpper);
7281 match(Set dst (LoadD mem));
7282
7283 ins_cost(145); // XXX
7284 format %{ "movsd $dst, $mem\t# double" %}
7285 ins_encode %{
7286 __ movdbl($dst$$XMMRegister, $mem$$Address);
7287 %}
7288 ins_pipe(pipe_slow); // XXX
7289 %}
7290
7291 // max = java.lang.Math.max(float a, float b)
7292 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7293 predicate(VM_Version::supports_avx10_2());
7294 match(Set dst (MaxF a b));
7295 format %{ "maxF $dst, $a, $b" %}
7296 ins_encode %{
7297 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7298 %}
7299 ins_pipe( pipe_slow );
7300 %}
7301
7302 // max = java.lang.Math.max(float a, float b)
7303 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7304 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7305 match(Set dst (MaxF a b));
7306 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7307 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7308 ins_encode %{
7309 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7310 %}
7311 ins_pipe( pipe_slow );
7312 %}
7313
7314 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7315 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7316 match(Set dst (MaxF a b));
7317 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7318
7319 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7320 ins_encode %{
7321 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7322 false /*min*/, true /*single*/);
7323 %}
7324 ins_pipe( pipe_slow );
7325 %}
7326
7327 // max = java.lang.Math.max(double a, double b)
7328 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7329 predicate(VM_Version::supports_avx10_2());
7330 match(Set dst (MaxD a b));
7331 format %{ "maxD $dst, $a, $b" %}
7332 ins_encode %{
7333 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7334 %}
7335 ins_pipe( pipe_slow );
7336 %}
7337
7338 // max = java.lang.Math.max(double a, double b)
7339 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7340 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7341 match(Set dst (MaxD a b));
7342 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7343 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7344 ins_encode %{
7345 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7346 %}
7347 ins_pipe( pipe_slow );
7348 %}
7349
7350 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7351 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7352 match(Set dst (MaxD a b));
7353 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7354
7355 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7356 ins_encode %{
7357 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7358 false /*min*/, false /*single*/);
7359 %}
7360 ins_pipe( pipe_slow );
7361 %}
7362
7363 // max = java.lang.Math.min(float a, float b)
7364 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7365 predicate(VM_Version::supports_avx10_2());
7366 match(Set dst (MinF a b));
7367 format %{ "minF $dst, $a, $b" %}
7368 ins_encode %{
7369 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7370 %}
7371 ins_pipe( pipe_slow );
7372 %}
7373
7374 // min = java.lang.Math.min(float a, float b)
7375 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7376 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7377 match(Set dst (MinF a b));
7378 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7379 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7380 ins_encode %{
7381 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7382 %}
7383 ins_pipe( pipe_slow );
7384 %}
7385
7386 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7387 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7388 match(Set dst (MinF a b));
7389 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7390
7391 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7392 ins_encode %{
7393 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7394 true /*min*/, true /*single*/);
7395 %}
7396 ins_pipe( pipe_slow );
7397 %}
7398
7399 // max = java.lang.Math.min(double a, double b)
7400 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7401 predicate(VM_Version::supports_avx10_2());
7402 match(Set dst (MinD a b));
7403 format %{ "minD $dst, $a, $b" %}
7404 ins_encode %{
7405 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7406 %}
7407 ins_pipe( pipe_slow );
7408 %}
7409
7410 // min = java.lang.Math.min(double a, double b)
7411 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7412 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7413 match(Set dst (MinD a b));
7414 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7415 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7416 ins_encode %{
7417 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7418 %}
7419 ins_pipe( pipe_slow );
7420 %}
7421
7422 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7423 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7424 match(Set dst (MinD a b));
7425 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7426
7427 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7428 ins_encode %{
7429 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7430 true /*min*/, false /*single*/);
7431 %}
7432 ins_pipe( pipe_slow );
7433 %}
7434
7435 // Load Effective Address
7436 instruct leaP8(rRegP dst, indOffset8 mem)
7437 %{
7438 match(Set dst mem);
7439
7440 ins_cost(110); // XXX
7441 format %{ "leaq $dst, $mem\t# ptr 8" %}
7442 ins_encode %{
7443 __ leaq($dst$$Register, $mem$$Address);
7444 %}
7445 ins_pipe(ialu_reg_reg_fat);
7446 %}
7447
7448 instruct leaP32(rRegP dst, indOffset32 mem)
7449 %{
7450 match(Set dst mem);
7451
7452 ins_cost(110);
7453 format %{ "leaq $dst, $mem\t# ptr 32" %}
7454 ins_encode %{
7455 __ leaq($dst$$Register, $mem$$Address);
7456 %}
7457 ins_pipe(ialu_reg_reg_fat);
7458 %}
7459
7460 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7461 %{
7462 match(Set dst mem);
7463
7464 ins_cost(110);
7465 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7466 ins_encode %{
7467 __ leaq($dst$$Register, $mem$$Address);
7468 %}
7469 ins_pipe(ialu_reg_reg_fat);
7470 %}
7471
7472 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7473 %{
7474 match(Set dst mem);
7475
7476 ins_cost(110);
7477 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7478 ins_encode %{
7479 __ leaq($dst$$Register, $mem$$Address);
7480 %}
7481 ins_pipe(ialu_reg_reg_fat);
7482 %}
7483
7484 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7485 %{
7486 match(Set dst mem);
7487
7488 ins_cost(110);
7489 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7490 ins_encode %{
7491 __ leaq($dst$$Register, $mem$$Address);
7492 %}
7493 ins_pipe(ialu_reg_reg_fat);
7494 %}
7495
7496 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7497 %{
7498 match(Set dst mem);
7499
7500 ins_cost(110);
7501 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7502 ins_encode %{
7503 __ leaq($dst$$Register, $mem$$Address);
7504 %}
7505 ins_pipe(ialu_reg_reg_fat);
7506 %}
7507
7508 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7509 %{
7510 match(Set dst mem);
7511
7512 ins_cost(110);
7513 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7514 ins_encode %{
7515 __ leaq($dst$$Register, $mem$$Address);
7516 %}
7517 ins_pipe(ialu_reg_reg_fat);
7518 %}
7519
7520 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7521 %{
7522 match(Set dst mem);
7523
7524 ins_cost(110);
7525 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7526 ins_encode %{
7527 __ leaq($dst$$Register, $mem$$Address);
7528 %}
7529 ins_pipe(ialu_reg_reg_fat);
7530 %}
7531
7532 // Load Effective Address which uses Narrow (32-bits) oop
7533 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7534 %{
7535 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7536 match(Set dst mem);
7537
7538 ins_cost(110);
7539 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7540 ins_encode %{
7541 __ leaq($dst$$Register, $mem$$Address);
7542 %}
7543 ins_pipe(ialu_reg_reg_fat);
7544 %}
7545
7546 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7547 %{
7548 predicate(CompressedOops::shift() == 0);
7549 match(Set dst mem);
7550
7551 ins_cost(110); // XXX
7552 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7553 ins_encode %{
7554 __ leaq($dst$$Register, $mem$$Address);
7555 %}
7556 ins_pipe(ialu_reg_reg_fat);
7557 %}
7558
7559 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7560 %{
7561 predicate(CompressedOops::shift() == 0);
7562 match(Set dst mem);
7563
7564 ins_cost(110);
7565 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7566 ins_encode %{
7567 __ leaq($dst$$Register, $mem$$Address);
7568 %}
7569 ins_pipe(ialu_reg_reg_fat);
7570 %}
7571
7572 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7573 %{
7574 predicate(CompressedOops::shift() == 0);
7575 match(Set dst mem);
7576
7577 ins_cost(110);
7578 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7579 ins_encode %{
7580 __ leaq($dst$$Register, $mem$$Address);
7581 %}
7582 ins_pipe(ialu_reg_reg_fat);
7583 %}
7584
7585 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7586 %{
7587 predicate(CompressedOops::shift() == 0);
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7599 %{
7600 predicate(CompressedOops::shift() == 0);
7601 match(Set dst mem);
7602
7603 ins_cost(110);
7604 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7605 ins_encode %{
7606 __ leaq($dst$$Register, $mem$$Address);
7607 %}
7608 ins_pipe(ialu_reg_reg_fat);
7609 %}
7610
7611 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7612 %{
7613 predicate(CompressedOops::shift() == 0);
7614 match(Set dst mem);
7615
7616 ins_cost(110);
7617 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7618 ins_encode %{
7619 __ leaq($dst$$Register, $mem$$Address);
7620 %}
7621 ins_pipe(ialu_reg_reg_fat);
7622 %}
7623
7624 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7625 %{
7626 predicate(CompressedOops::shift() == 0);
7627 match(Set dst mem);
7628
7629 ins_cost(110);
7630 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7631 ins_encode %{
7632 __ leaq($dst$$Register, $mem$$Address);
7633 %}
7634 ins_pipe(ialu_reg_reg_fat);
7635 %}
7636
7637 instruct loadConI(rRegI dst, immI src)
7638 %{
7639 match(Set dst src);
7640
7641 format %{ "movl $dst, $src\t# int" %}
7642 ins_encode %{
7643 __ movl($dst$$Register, $src$$constant);
7644 %}
7645 ins_pipe(ialu_reg_fat); // XXX
7646 %}
7647
7648 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7649 %{
7650 match(Set dst src);
7651 effect(KILL cr);
7652
7653 ins_cost(50);
7654 format %{ "xorl $dst, $dst\t# int" %}
7655 ins_encode %{
7656 __ xorl($dst$$Register, $dst$$Register);
7657 %}
7658 ins_pipe(ialu_reg);
7659 %}
7660
7661 instruct loadConL(rRegL dst, immL src)
7662 %{
7663 match(Set dst src);
7664
7665 ins_cost(150);
7666 format %{ "movq $dst, $src\t# long" %}
7667 ins_encode %{
7668 __ mov64($dst$$Register, $src$$constant);
7669 %}
7670 ins_pipe(ialu_reg);
7671 %}
7672
7673 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7674 %{
7675 match(Set dst src);
7676 effect(KILL cr);
7677
7678 ins_cost(50);
7679 format %{ "xorl $dst, $dst\t# long" %}
7680 ins_encode %{
7681 __ xorl($dst$$Register, $dst$$Register);
7682 %}
7683 ins_pipe(ialu_reg); // XXX
7684 %}
7685
7686 instruct loadConUL32(rRegL dst, immUL32 src)
7687 %{
7688 match(Set dst src);
7689
7690 ins_cost(60);
7691 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7692 ins_encode %{
7693 __ movl($dst$$Register, $src$$constant);
7694 %}
7695 ins_pipe(ialu_reg);
7696 %}
7697
7698 instruct loadConL32(rRegL dst, immL32 src)
7699 %{
7700 match(Set dst src);
7701
7702 ins_cost(70);
7703 format %{ "movq $dst, $src\t# long (32-bit)" %}
7704 ins_encode %{
7705 __ movq($dst$$Register, $src$$constant);
7706 %}
7707 ins_pipe(ialu_reg);
7708 %}
7709
7710 instruct loadConP(rRegP dst, immP con) %{
7711 match(Set dst con);
7712
7713 format %{ "movq $dst, $con\t# ptr" %}
7714 ins_encode %{
7715 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7716 %}
7717 ins_pipe(ialu_reg_fat); // XXX
7718 %}
7719
7720 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7721 %{
7722 match(Set dst src);
7723 effect(KILL cr);
7724
7725 ins_cost(50);
7726 format %{ "xorl $dst, $dst\t# ptr" %}
7727 ins_encode %{
7728 __ xorl($dst$$Register, $dst$$Register);
7729 %}
7730 ins_pipe(ialu_reg);
7731 %}
7732
7733 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7734 %{
7735 match(Set dst src);
7736 effect(KILL cr);
7737
7738 ins_cost(60);
7739 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7740 ins_encode %{
7741 __ movl($dst$$Register, $src$$constant);
7742 %}
7743 ins_pipe(ialu_reg);
7744 %}
7745
7746 instruct loadConF(regF dst, immF con) %{
7747 match(Set dst con);
7748 ins_cost(125);
7749 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7750 ins_encode %{
7751 __ movflt($dst$$XMMRegister, $constantaddress($con));
7752 %}
7753 ins_pipe(pipe_slow);
7754 %}
7755
7756 instruct loadConH(regF dst, immH con) %{
7757 match(Set dst con);
7758 ins_cost(125);
7759 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7760 ins_encode %{
7761 __ movflt($dst$$XMMRegister, $constantaddress($con));
7762 %}
7763 ins_pipe(pipe_slow);
7764 %}
7765
7766 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7767 match(Set dst src);
7768 effect(KILL cr);
7769 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7770 ins_encode %{
7771 __ xorq($dst$$Register, $dst$$Register);
7772 %}
7773 ins_pipe(ialu_reg);
7774 %}
7775
7776 instruct loadConN(rRegN dst, immN src) %{
7777 match(Set dst src);
7778
7779 ins_cost(125);
7780 format %{ "movl $dst, $src\t# compressed ptr" %}
7781 ins_encode %{
7782 address con = (address)$src$$constant;
7783 if (con == nullptr) {
7784 ShouldNotReachHere();
7785 } else {
7786 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7787 }
7788 %}
7789 ins_pipe(ialu_reg_fat); // XXX
7790 %}
7791
7792 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7793 match(Set dst src);
7794
7795 ins_cost(125);
7796 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7797 ins_encode %{
7798 address con = (address)$src$$constant;
7799 if (con == nullptr) {
7800 ShouldNotReachHere();
7801 } else {
7802 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7803 }
7804 %}
7805 ins_pipe(ialu_reg_fat); // XXX
7806 %}
7807
7808 instruct loadConF0(regF dst, immF0 src)
7809 %{
7810 match(Set dst src);
7811 ins_cost(100);
7812
7813 format %{ "xorps $dst, $dst\t# float 0.0" %}
7814 ins_encode %{
7815 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7816 %}
7817 ins_pipe(pipe_slow);
7818 %}
7819
7820 // Use the same format since predicate() can not be used here.
7821 instruct loadConD(regD dst, immD con) %{
7822 match(Set dst con);
7823 ins_cost(125);
7824 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7825 ins_encode %{
7826 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7827 %}
7828 ins_pipe(pipe_slow);
7829 %}
7830
7831 instruct loadConD0(regD dst, immD0 src)
7832 %{
7833 match(Set dst src);
7834 ins_cost(100);
7835
7836 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7837 ins_encode %{
7838 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7839 %}
7840 ins_pipe(pipe_slow);
7841 %}
7842
7843 instruct loadSSI(rRegI dst, stackSlotI src)
7844 %{
7845 match(Set dst src);
7846
7847 ins_cost(125);
7848 format %{ "movl $dst, $src\t# int stk" %}
7849 ins_encode %{
7850 __ movl($dst$$Register, $src$$Address);
7851 %}
7852 ins_pipe(ialu_reg_mem);
7853 %}
7854
7855 instruct loadSSL(rRegL dst, stackSlotL src)
7856 %{
7857 match(Set dst src);
7858
7859 ins_cost(125);
7860 format %{ "movq $dst, $src\t# long stk" %}
7861 ins_encode %{
7862 __ movq($dst$$Register, $src$$Address);
7863 %}
7864 ins_pipe(ialu_reg_mem);
7865 %}
7866
7867 instruct loadSSP(rRegP dst, stackSlotP src)
7868 %{
7869 match(Set dst src);
7870
7871 ins_cost(125);
7872 format %{ "movq $dst, $src\t# ptr stk" %}
7873 ins_encode %{
7874 __ movq($dst$$Register, $src$$Address);
7875 %}
7876 ins_pipe(ialu_reg_mem);
7877 %}
7878
7879 instruct loadSSF(regF dst, stackSlotF src)
7880 %{
7881 match(Set dst src);
7882
7883 ins_cost(125);
7884 format %{ "movss $dst, $src\t# float stk" %}
7885 ins_encode %{
7886 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7887 %}
7888 ins_pipe(pipe_slow); // XXX
7889 %}
7890
7891 // Use the same format since predicate() can not be used here.
7892 instruct loadSSD(regD dst, stackSlotD src)
7893 %{
7894 match(Set dst src);
7895
7896 ins_cost(125);
7897 format %{ "movsd $dst, $src\t# double stk" %}
7898 ins_encode %{
7899 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7900 %}
7901 ins_pipe(pipe_slow); // XXX
7902 %}
7903
7904 // Prefetch instructions for allocation.
7905 // Must be safe to execute with invalid address (cannot fault).
7906
7907 instruct prefetchAlloc( memory mem ) %{
7908 predicate(AllocatePrefetchInstr==3);
7909 match(PrefetchAllocation mem);
7910 ins_cost(125);
7911
7912 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7913 ins_encode %{
7914 __ prefetchw($mem$$Address);
7915 %}
7916 ins_pipe(ialu_mem);
7917 %}
7918
7919 instruct prefetchAllocNTA( memory mem ) %{
7920 predicate(AllocatePrefetchInstr==0);
7921 match(PrefetchAllocation mem);
7922 ins_cost(125);
7923
7924 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7925 ins_encode %{
7926 __ prefetchnta($mem$$Address);
7927 %}
7928 ins_pipe(ialu_mem);
7929 %}
7930
7931 instruct prefetchAllocT0( memory mem ) %{
7932 predicate(AllocatePrefetchInstr==1);
7933 match(PrefetchAllocation mem);
7934 ins_cost(125);
7935
7936 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7937 ins_encode %{
7938 __ prefetcht0($mem$$Address);
7939 %}
7940 ins_pipe(ialu_mem);
7941 %}
7942
7943 instruct prefetchAllocT2( memory mem ) %{
7944 predicate(AllocatePrefetchInstr==2);
7945 match(PrefetchAllocation mem);
7946 ins_cost(125);
7947
7948 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7949 ins_encode %{
7950 __ prefetcht2($mem$$Address);
7951 %}
7952 ins_pipe(ialu_mem);
7953 %}
7954
7955 //----------Store Instructions-------------------------------------------------
7956
7957 // Store Byte
7958 instruct storeB(memory mem, rRegI src)
7959 %{
7960 match(Set mem (StoreB mem src));
7961
7962 ins_cost(125); // XXX
7963 format %{ "movb $mem, $src\t# byte" %}
7964 ins_encode %{
7965 __ movb($mem$$Address, $src$$Register);
7966 %}
7967 ins_pipe(ialu_mem_reg);
7968 %}
7969
7970 // Store Char/Short
7971 instruct storeC(memory mem, rRegI src)
7972 %{
7973 match(Set mem (StoreC mem src));
7974
7975 ins_cost(125); // XXX
7976 format %{ "movw $mem, $src\t# char/short" %}
7977 ins_encode %{
7978 __ movw($mem$$Address, $src$$Register);
7979 %}
7980 ins_pipe(ialu_mem_reg);
7981 %}
7982
7983 // Store Integer
7984 instruct storeI(memory mem, rRegI src)
7985 %{
7986 match(Set mem (StoreI mem src));
7987
7988 ins_cost(125); // XXX
7989 format %{ "movl $mem, $src\t# int" %}
7990 ins_encode %{
7991 __ movl($mem$$Address, $src$$Register);
7992 %}
7993 ins_pipe(ialu_mem_reg);
7994 %}
7995
7996 // Store Long
7997 instruct storeL(memory mem, rRegL src)
7998 %{
7999 match(Set mem (StoreL mem src));
8000
8001 ins_cost(125); // XXX
8002 format %{ "movq $mem, $src\t# long" %}
8003 ins_encode %{
8004 __ movq($mem$$Address, $src$$Register);
8005 %}
8006 ins_pipe(ialu_mem_reg); // XXX
8007 %}
8008
8009 // Store Pointer
8010 instruct storeP(memory mem, any_RegP src)
8011 %{
8012 predicate(n->as_Store()->barrier_data() == 0);
8013 match(Set mem (StoreP mem src));
8014
8015 ins_cost(125); // XXX
8016 format %{ "movq $mem, $src\t# ptr" %}
8017 ins_encode %{
8018 __ movq($mem$$Address, $src$$Register);
8019 %}
8020 ins_pipe(ialu_mem_reg);
8021 %}
8022
8023 instruct storeImmP0(memory mem, immP0 zero)
8024 %{
8025 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8026 match(Set mem (StoreP mem zero));
8027
8028 ins_cost(125); // XXX
8029 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8030 ins_encode %{
8031 __ movq($mem$$Address, r12);
8032 %}
8033 ins_pipe(ialu_mem_reg);
8034 %}
8035
8036 // Store Null Pointer, mark word, or other simple pointer constant.
8037 instruct storeImmP(memory mem, immP31 src)
8038 %{
8039 predicate(n->as_Store()->barrier_data() == 0);
8040 match(Set mem (StoreP mem src));
8041
8042 ins_cost(150); // XXX
8043 format %{ "movq $mem, $src\t# ptr" %}
8044 ins_encode %{
8045 __ movq($mem$$Address, $src$$constant);
8046 %}
8047 ins_pipe(ialu_mem_imm);
8048 %}
8049
8050 // Store Compressed Pointer
8051 instruct storeN(memory mem, rRegN src)
8052 %{
8053 predicate(n->as_Store()->barrier_data() == 0);
8054 match(Set mem (StoreN mem src));
8055
8056 ins_cost(125); // XXX
8057 format %{ "movl $mem, $src\t# compressed ptr" %}
8058 ins_encode %{
8059 __ movl($mem$$Address, $src$$Register);
8060 %}
8061 ins_pipe(ialu_mem_reg);
8062 %}
8063
8064 instruct storeNKlass(memory mem, rRegN src)
8065 %{
8066 match(Set mem (StoreNKlass mem src));
8067
8068 ins_cost(125); // XXX
8069 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8070 ins_encode %{
8071 __ movl($mem$$Address, $src$$Register);
8072 %}
8073 ins_pipe(ialu_mem_reg);
8074 %}
8075
8076 instruct storeImmN0(memory mem, immN0 zero)
8077 %{
8078 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8079 match(Set mem (StoreN mem zero));
8080
8081 ins_cost(125); // XXX
8082 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8083 ins_encode %{
8084 __ movl($mem$$Address, r12);
8085 %}
8086 ins_pipe(ialu_mem_reg);
8087 %}
8088
8089 instruct storeImmN(memory mem, immN src)
8090 %{
8091 predicate(n->as_Store()->barrier_data() == 0);
8092 match(Set mem (StoreN mem src));
8093
8094 ins_cost(150); // XXX
8095 format %{ "movl $mem, $src\t# compressed ptr" %}
8096 ins_encode %{
8097 address con = (address)$src$$constant;
8098 if (con == nullptr) {
8099 __ movl($mem$$Address, 0);
8100 } else {
8101 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8102 }
8103 %}
8104 ins_pipe(ialu_mem_imm);
8105 %}
8106
8107 instruct storeImmNKlass(memory mem, immNKlass src)
8108 %{
8109 match(Set mem (StoreNKlass mem src));
8110
8111 ins_cost(150); // XXX
8112 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8113 ins_encode %{
8114 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8115 %}
8116 ins_pipe(ialu_mem_imm);
8117 %}
8118
8119 // Store Integer Immediate
8120 instruct storeImmI0(memory mem, immI_0 zero)
8121 %{
8122 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8123 match(Set mem (StoreI mem zero));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, r12);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeImmI(memory mem, immI src)
8134 %{
8135 match(Set mem (StoreI mem src));
8136
8137 ins_cost(150);
8138 format %{ "movl $mem, $src\t# int" %}
8139 ins_encode %{
8140 __ movl($mem$$Address, $src$$constant);
8141 %}
8142 ins_pipe(ialu_mem_imm);
8143 %}
8144
8145 // Store Long Immediate
8146 instruct storeImmL0(memory mem, immL0 zero)
8147 %{
8148 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8149 match(Set mem (StoreL mem zero));
8150
8151 ins_cost(125); // XXX
8152 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8153 ins_encode %{
8154 __ movq($mem$$Address, r12);
8155 %}
8156 ins_pipe(ialu_mem_reg);
8157 %}
8158
8159 instruct storeImmL(memory mem, immL32 src)
8160 %{
8161 match(Set mem (StoreL mem src));
8162
8163 ins_cost(150);
8164 format %{ "movq $mem, $src\t# long" %}
8165 ins_encode %{
8166 __ movq($mem$$Address, $src$$constant);
8167 %}
8168 ins_pipe(ialu_mem_imm);
8169 %}
8170
8171 // Store Short/Char Immediate
8172 instruct storeImmC0(memory mem, immI_0 zero)
8173 %{
8174 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8175 match(Set mem (StoreC mem zero));
8176
8177 ins_cost(125); // XXX
8178 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8179 ins_encode %{
8180 __ movw($mem$$Address, r12);
8181 %}
8182 ins_pipe(ialu_mem_reg);
8183 %}
8184
8185 instruct storeImmI16(memory mem, immI16 src)
8186 %{
8187 predicate(UseStoreImmI16);
8188 match(Set mem (StoreC mem src));
8189
8190 ins_cost(150);
8191 format %{ "movw $mem, $src\t# short/char" %}
8192 ins_encode %{
8193 __ movw($mem$$Address, $src$$constant);
8194 %}
8195 ins_pipe(ialu_mem_imm);
8196 %}
8197
8198 // Store Byte Immediate
8199 instruct storeImmB0(memory mem, immI_0 zero)
8200 %{
8201 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8202 match(Set mem (StoreB mem zero));
8203
8204 ins_cost(125); // XXX
8205 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8206 ins_encode %{
8207 __ movb($mem$$Address, r12);
8208 %}
8209 ins_pipe(ialu_mem_reg);
8210 %}
8211
8212 instruct storeImmB(memory mem, immI8 src)
8213 %{
8214 match(Set mem (StoreB mem src));
8215
8216 ins_cost(150); // XXX
8217 format %{ "movb $mem, $src\t# byte" %}
8218 ins_encode %{
8219 __ movb($mem$$Address, $src$$constant);
8220 %}
8221 ins_pipe(ialu_mem_imm);
8222 %}
8223
8224 // Store Float
8225 instruct storeF(memory mem, regF src)
8226 %{
8227 match(Set mem (StoreF mem src));
8228
8229 ins_cost(95); // XXX
8230 format %{ "movss $mem, $src\t# float" %}
8231 ins_encode %{
8232 __ movflt($mem$$Address, $src$$XMMRegister);
8233 %}
8234 ins_pipe(pipe_slow); // XXX
8235 %}
8236
8237 // Store immediate Float value (it is faster than store from XMM register)
8238 instruct storeF0(memory mem, immF0 zero)
8239 %{
8240 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8241 match(Set mem (StoreF mem zero));
8242
8243 ins_cost(25); // XXX
8244 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8245 ins_encode %{
8246 __ movl($mem$$Address, r12);
8247 %}
8248 ins_pipe(ialu_mem_reg);
8249 %}
8250
8251 instruct storeF_imm(memory mem, immF src)
8252 %{
8253 match(Set mem (StoreF mem src));
8254
8255 ins_cost(50);
8256 format %{ "movl $mem, $src\t# float" %}
8257 ins_encode %{
8258 __ movl($mem$$Address, jint_cast($src$$constant));
8259 %}
8260 ins_pipe(ialu_mem_imm);
8261 %}
8262
8263 // Store Double
8264 instruct storeD(memory mem, regD src)
8265 %{
8266 match(Set mem (StoreD mem src));
8267
8268 ins_cost(95); // XXX
8269 format %{ "movsd $mem, $src\t# double" %}
8270 ins_encode %{
8271 __ movdbl($mem$$Address, $src$$XMMRegister);
8272 %}
8273 ins_pipe(pipe_slow); // XXX
8274 %}
8275
8276 // Store immediate double 0.0 (it is faster than store from XMM register)
8277 instruct storeD0_imm(memory mem, immD0 src)
8278 %{
8279 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8280 match(Set mem (StoreD mem src));
8281
8282 ins_cost(50);
8283 format %{ "movq $mem, $src\t# double 0." %}
8284 ins_encode %{
8285 __ movq($mem$$Address, $src$$constant);
8286 %}
8287 ins_pipe(ialu_mem_imm);
8288 %}
8289
8290 instruct storeD0(memory mem, immD0 zero)
8291 %{
8292 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8293 match(Set mem (StoreD mem zero));
8294
8295 ins_cost(25); // XXX
8296 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8297 ins_encode %{
8298 __ movq($mem$$Address, r12);
8299 %}
8300 ins_pipe(ialu_mem_reg);
8301 %}
8302
8303 instruct storeSSI(stackSlotI dst, rRegI src)
8304 %{
8305 match(Set dst src);
8306
8307 ins_cost(100);
8308 format %{ "movl $dst, $src\t# int stk" %}
8309 ins_encode %{
8310 __ movl($dst$$Address, $src$$Register);
8311 %}
8312 ins_pipe( ialu_mem_reg );
8313 %}
8314
8315 instruct storeSSL(stackSlotL dst, rRegL src)
8316 %{
8317 match(Set dst src);
8318
8319 ins_cost(100);
8320 format %{ "movq $dst, $src\t# long stk" %}
8321 ins_encode %{
8322 __ movq($dst$$Address, $src$$Register);
8323 %}
8324 ins_pipe(ialu_mem_reg);
8325 %}
8326
8327 instruct storeSSP(stackSlotP dst, rRegP src)
8328 %{
8329 match(Set dst src);
8330
8331 ins_cost(100);
8332 format %{ "movq $dst, $src\t# ptr stk" %}
8333 ins_encode %{
8334 __ movq($dst$$Address, $src$$Register);
8335 %}
8336 ins_pipe(ialu_mem_reg);
8337 %}
8338
8339 instruct storeSSF(stackSlotF dst, regF src)
8340 %{
8341 match(Set dst src);
8342
8343 ins_cost(95); // XXX
8344 format %{ "movss $dst, $src\t# float stk" %}
8345 ins_encode %{
8346 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8347 %}
8348 ins_pipe(pipe_slow); // XXX
8349 %}
8350
8351 instruct storeSSD(stackSlotD dst, regD src)
8352 %{
8353 match(Set dst src);
8354
8355 ins_cost(95); // XXX
8356 format %{ "movsd $dst, $src\t# double stk" %}
8357 ins_encode %{
8358 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8359 %}
8360 ins_pipe(pipe_slow); // XXX
8361 %}
8362
8363 instruct cacheWB(indirect addr)
8364 %{
8365 predicate(VM_Version::supports_data_cache_line_flush());
8366 match(CacheWB addr);
8367
8368 ins_cost(100);
8369 format %{"cache wb $addr" %}
8370 ins_encode %{
8371 assert($addr->index_position() < 0, "should be");
8372 assert($addr$$disp == 0, "should be");
8373 __ cache_wb(Address($addr$$base$$Register, 0));
8374 %}
8375 ins_pipe(pipe_slow); // XXX
8376 %}
8377
8378 instruct cacheWBPreSync()
8379 %{
8380 predicate(VM_Version::supports_data_cache_line_flush());
8381 match(CacheWBPreSync);
8382
8383 ins_cost(100);
8384 format %{"cache wb presync" %}
8385 ins_encode %{
8386 __ cache_wbsync(true);
8387 %}
8388 ins_pipe(pipe_slow); // XXX
8389 %}
8390
8391 instruct cacheWBPostSync()
8392 %{
8393 predicate(VM_Version::supports_data_cache_line_flush());
8394 match(CacheWBPostSync);
8395
8396 ins_cost(100);
8397 format %{"cache wb postsync" %}
8398 ins_encode %{
8399 __ cache_wbsync(false);
8400 %}
8401 ins_pipe(pipe_slow); // XXX
8402 %}
8403
8404 //----------BSWAP Instructions-------------------------------------------------
8405 instruct bytes_reverse_int(rRegI dst) %{
8406 match(Set dst (ReverseBytesI dst));
8407
8408 format %{ "bswapl $dst" %}
8409 ins_encode %{
8410 __ bswapl($dst$$Register);
8411 %}
8412 ins_pipe( ialu_reg );
8413 %}
8414
8415 instruct bytes_reverse_long(rRegL dst) %{
8416 match(Set dst (ReverseBytesL dst));
8417
8418 format %{ "bswapq $dst" %}
8419 ins_encode %{
8420 __ bswapq($dst$$Register);
8421 %}
8422 ins_pipe( ialu_reg);
8423 %}
8424
8425 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8426 match(Set dst (ReverseBytesUS dst));
8427 effect(KILL cr);
8428
8429 format %{ "bswapl $dst\n\t"
8430 "shrl $dst,16\n\t" %}
8431 ins_encode %{
8432 __ bswapl($dst$$Register);
8433 __ shrl($dst$$Register, 16);
8434 %}
8435 ins_pipe( ialu_reg );
8436 %}
8437
8438 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8439 match(Set dst (ReverseBytesS dst));
8440 effect(KILL cr);
8441
8442 format %{ "bswapl $dst\n\t"
8443 "sar $dst,16\n\t" %}
8444 ins_encode %{
8445 __ bswapl($dst$$Register);
8446 __ sarl($dst$$Register, 16);
8447 %}
8448 ins_pipe( ialu_reg );
8449 %}
8450
8451 //---------- Zeros Count Instructions ------------------------------------------
8452
8453 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8454 predicate(UseCountLeadingZerosInstruction);
8455 match(Set dst (CountLeadingZerosI src));
8456 effect(KILL cr);
8457
8458 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8459 ins_encode %{
8460 __ lzcntl($dst$$Register, $src$$Register);
8461 %}
8462 ins_pipe(ialu_reg);
8463 %}
8464
8465 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8466 predicate(UseCountLeadingZerosInstruction);
8467 match(Set dst (CountLeadingZerosI (LoadI src)));
8468 effect(KILL cr);
8469 ins_cost(175);
8470 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8471 ins_encode %{
8472 __ lzcntl($dst$$Register, $src$$Address);
8473 %}
8474 ins_pipe(ialu_reg_mem);
8475 %}
8476
8477 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8478 predicate(!UseCountLeadingZerosInstruction);
8479 match(Set dst (CountLeadingZerosI src));
8480 effect(KILL cr);
8481
8482 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8483 "jnz skip\n\t"
8484 "movl $dst, -1\n"
8485 "skip:\n\t"
8486 "negl $dst\n\t"
8487 "addl $dst, 31" %}
8488 ins_encode %{
8489 Register Rdst = $dst$$Register;
8490 Register Rsrc = $src$$Register;
8491 Label skip;
8492 __ bsrl(Rdst, Rsrc);
8493 __ jccb(Assembler::notZero, skip);
8494 __ movl(Rdst, -1);
8495 __ bind(skip);
8496 __ negl(Rdst);
8497 __ addl(Rdst, BitsPerInt - 1);
8498 %}
8499 ins_pipe(ialu_reg);
8500 %}
8501
8502 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8503 predicate(UseCountLeadingZerosInstruction);
8504 match(Set dst (CountLeadingZerosL src));
8505 effect(KILL cr);
8506
8507 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8508 ins_encode %{
8509 __ lzcntq($dst$$Register, $src$$Register);
8510 %}
8511 ins_pipe(ialu_reg);
8512 %}
8513
8514 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8515 predicate(UseCountLeadingZerosInstruction);
8516 match(Set dst (CountLeadingZerosL (LoadL src)));
8517 effect(KILL cr);
8518 ins_cost(175);
8519 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8520 ins_encode %{
8521 __ lzcntq($dst$$Register, $src$$Address);
8522 %}
8523 ins_pipe(ialu_reg_mem);
8524 %}
8525
8526 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8527 predicate(!UseCountLeadingZerosInstruction);
8528 match(Set dst (CountLeadingZerosL src));
8529 effect(KILL cr);
8530
8531 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8532 "jnz skip\n\t"
8533 "movl $dst, -1\n"
8534 "skip:\n\t"
8535 "negl $dst\n\t"
8536 "addl $dst, 63" %}
8537 ins_encode %{
8538 Register Rdst = $dst$$Register;
8539 Register Rsrc = $src$$Register;
8540 Label skip;
8541 __ bsrq(Rdst, Rsrc);
8542 __ jccb(Assembler::notZero, skip);
8543 __ movl(Rdst, -1);
8544 __ bind(skip);
8545 __ negl(Rdst);
8546 __ addl(Rdst, BitsPerLong - 1);
8547 %}
8548 ins_pipe(ialu_reg);
8549 %}
8550
8551 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8552 predicate(UseCountTrailingZerosInstruction);
8553 match(Set dst (CountTrailingZerosI src));
8554 effect(KILL cr);
8555
8556 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8557 ins_encode %{
8558 __ tzcntl($dst$$Register, $src$$Register);
8559 %}
8560 ins_pipe(ialu_reg);
8561 %}
8562
8563 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8564 predicate(UseCountTrailingZerosInstruction);
8565 match(Set dst (CountTrailingZerosI (LoadI src)));
8566 effect(KILL cr);
8567 ins_cost(175);
8568 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8569 ins_encode %{
8570 __ tzcntl($dst$$Register, $src$$Address);
8571 %}
8572 ins_pipe(ialu_reg_mem);
8573 %}
8574
8575 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8576 predicate(!UseCountTrailingZerosInstruction);
8577 match(Set dst (CountTrailingZerosI src));
8578 effect(KILL cr);
8579
8580 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8581 "jnz done\n\t"
8582 "movl $dst, 32\n"
8583 "done:" %}
8584 ins_encode %{
8585 Register Rdst = $dst$$Register;
8586 Label done;
8587 __ bsfl(Rdst, $src$$Register);
8588 __ jccb(Assembler::notZero, done);
8589 __ movl(Rdst, BitsPerInt);
8590 __ bind(done);
8591 %}
8592 ins_pipe(ialu_reg);
8593 %}
8594
8595 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8596 predicate(UseCountTrailingZerosInstruction);
8597 match(Set dst (CountTrailingZerosL src));
8598 effect(KILL cr);
8599
8600 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8601 ins_encode %{
8602 __ tzcntq($dst$$Register, $src$$Register);
8603 %}
8604 ins_pipe(ialu_reg);
8605 %}
8606
8607 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8608 predicate(UseCountTrailingZerosInstruction);
8609 match(Set dst (CountTrailingZerosL (LoadL src)));
8610 effect(KILL cr);
8611 ins_cost(175);
8612 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8613 ins_encode %{
8614 __ tzcntq($dst$$Register, $src$$Address);
8615 %}
8616 ins_pipe(ialu_reg_mem);
8617 %}
8618
8619 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8620 predicate(!UseCountTrailingZerosInstruction);
8621 match(Set dst (CountTrailingZerosL src));
8622 effect(KILL cr);
8623
8624 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8625 "jnz done\n\t"
8626 "movl $dst, 64\n"
8627 "done:" %}
8628 ins_encode %{
8629 Register Rdst = $dst$$Register;
8630 Label done;
8631 __ bsfq(Rdst, $src$$Register);
8632 __ jccb(Assembler::notZero, done);
8633 __ movl(Rdst, BitsPerLong);
8634 __ bind(done);
8635 %}
8636 ins_pipe(ialu_reg);
8637 %}
8638
8639 //--------------- Reverse Operation Instructions ----------------
8640 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8641 predicate(!VM_Version::supports_gfni());
8642 match(Set dst (ReverseI src));
8643 effect(TEMP dst, TEMP rtmp, KILL cr);
8644 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8645 ins_encode %{
8646 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8647 %}
8648 ins_pipe( ialu_reg );
8649 %}
8650
8651 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8652 predicate(VM_Version::supports_gfni());
8653 match(Set dst (ReverseI src));
8654 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8655 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8656 ins_encode %{
8657 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8658 %}
8659 ins_pipe( ialu_reg );
8660 %}
8661
8662 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8663 predicate(!VM_Version::supports_gfni());
8664 match(Set dst (ReverseL src));
8665 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8666 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8667 ins_encode %{
8668 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8669 %}
8670 ins_pipe( ialu_reg );
8671 %}
8672
8673 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8674 predicate(VM_Version::supports_gfni());
8675 match(Set dst (ReverseL src));
8676 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8677 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8678 ins_encode %{
8679 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8680 %}
8681 ins_pipe( ialu_reg );
8682 %}
8683
8684 //---------- Population Count Instructions -------------------------------------
8685
8686 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8687 predicate(UsePopCountInstruction);
8688 match(Set dst (PopCountI src));
8689 effect(KILL cr);
8690
8691 format %{ "popcnt $dst, $src" %}
8692 ins_encode %{
8693 __ popcntl($dst$$Register, $src$$Register);
8694 %}
8695 ins_pipe(ialu_reg);
8696 %}
8697
8698 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8699 predicate(UsePopCountInstruction);
8700 match(Set dst (PopCountI (LoadI mem)));
8701 effect(KILL cr);
8702
8703 format %{ "popcnt $dst, $mem" %}
8704 ins_encode %{
8705 __ popcntl($dst$$Register, $mem$$Address);
8706 %}
8707 ins_pipe(ialu_reg);
8708 %}
8709
8710 // Note: Long.bitCount(long) returns an int.
8711 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8712 predicate(UsePopCountInstruction);
8713 match(Set dst (PopCountL src));
8714 effect(KILL cr);
8715
8716 format %{ "popcnt $dst, $src" %}
8717 ins_encode %{
8718 __ popcntq($dst$$Register, $src$$Register);
8719 %}
8720 ins_pipe(ialu_reg);
8721 %}
8722
8723 // Note: Long.bitCount(long) returns an int.
8724 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8725 predicate(UsePopCountInstruction);
8726 match(Set dst (PopCountL (LoadL mem)));
8727 effect(KILL cr);
8728
8729 format %{ "popcnt $dst, $mem" %}
8730 ins_encode %{
8731 __ popcntq($dst$$Register, $mem$$Address);
8732 %}
8733 ins_pipe(ialu_reg);
8734 %}
8735
8736
8737 //----------MemBar Instructions-----------------------------------------------
8738 // Memory barrier flavors
8739
8740 instruct membar_acquire()
8741 %{
8742 match(MemBarAcquire);
8743 match(LoadFence);
8744 ins_cost(0);
8745
8746 size(0);
8747 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8748 ins_encode();
8749 ins_pipe(empty);
8750 %}
8751
8752 instruct membar_acquire_lock()
8753 %{
8754 match(MemBarAcquireLock);
8755 ins_cost(0);
8756
8757 size(0);
8758 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8759 ins_encode();
8760 ins_pipe(empty);
8761 %}
8762
8763 instruct membar_release()
8764 %{
8765 match(MemBarRelease);
8766 match(StoreFence);
8767 ins_cost(0);
8768
8769 size(0);
8770 format %{ "MEMBAR-release ! (empty encoding)" %}
8771 ins_encode();
8772 ins_pipe(empty);
8773 %}
8774
8775 instruct membar_release_lock()
8776 %{
8777 match(MemBarReleaseLock);
8778 ins_cost(0);
8779
8780 size(0);
8781 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8782 ins_encode();
8783 ins_pipe(empty);
8784 %}
8785
8786 instruct membar_volatile(rFlagsReg cr) %{
8787 match(MemBarVolatile);
8788 effect(KILL cr);
8789 ins_cost(400);
8790
8791 format %{
8792 $$template
8793 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8794 %}
8795 ins_encode %{
8796 __ membar(Assembler::StoreLoad);
8797 %}
8798 ins_pipe(pipe_slow);
8799 %}
8800
8801 instruct unnecessary_membar_volatile()
8802 %{
8803 match(MemBarVolatile);
8804 predicate(Matcher::post_store_load_barrier(n));
8805 ins_cost(0);
8806
8807 size(0);
8808 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8809 ins_encode();
8810 ins_pipe(empty);
8811 %}
8812
8813 instruct membar_storestore() %{
8814 match(MemBarStoreStore);
8815 match(StoreStoreFence);
8816 ins_cost(0);
8817
8818 size(0);
8819 format %{ "MEMBAR-storestore (empty encoding)" %}
8820 ins_encode( );
8821 ins_pipe(empty);
8822 %}
8823
8824 //----------Move Instructions--------------------------------------------------
8825
8826 instruct castX2P(rRegP dst, rRegL src)
8827 %{
8828 match(Set dst (CastX2P src));
8829
8830 format %{ "movq $dst, $src\t# long->ptr" %}
8831 ins_encode %{
8832 if ($dst$$reg != $src$$reg) {
8833 __ movptr($dst$$Register, $src$$Register);
8834 }
8835 %}
8836 ins_pipe(ialu_reg_reg); // XXX
8837 %}
8838
8839 instruct castP2X(rRegL dst, rRegP src)
8840 %{
8841 match(Set dst (CastP2X src));
8842
8843 format %{ "movq $dst, $src\t# ptr -> long" %}
8844 ins_encode %{
8845 if ($dst$$reg != $src$$reg) {
8846 __ movptr($dst$$Register, $src$$Register);
8847 }
8848 %}
8849 ins_pipe(ialu_reg_reg); // XXX
8850 %}
8851
8852 // Convert oop into int for vectors alignment masking
8853 instruct convP2I(rRegI dst, rRegP src)
8854 %{
8855 match(Set dst (ConvL2I (CastP2X src)));
8856
8857 format %{ "movl $dst, $src\t# ptr -> int" %}
8858 ins_encode %{
8859 __ movl($dst$$Register, $src$$Register);
8860 %}
8861 ins_pipe(ialu_reg_reg); // XXX
8862 %}
8863
8864 // Convert compressed oop into int for vectors alignment masking
8865 // in case of 32bit oops (heap < 4Gb).
8866 instruct convN2I(rRegI dst, rRegN src)
8867 %{
8868 predicate(CompressedOops::shift() == 0);
8869 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8870
8871 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8872 ins_encode %{
8873 __ movl($dst$$Register, $src$$Register);
8874 %}
8875 ins_pipe(ialu_reg_reg); // XXX
8876 %}
8877
8878 // Convert oop pointer into compressed form
8879 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8880 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8881 match(Set dst (EncodeP src));
8882 effect(KILL cr);
8883 format %{ "encode_heap_oop $dst,$src" %}
8884 ins_encode %{
8885 Register s = $src$$Register;
8886 Register d = $dst$$Register;
8887 if (s != d) {
8888 __ movq(d, s);
8889 }
8890 __ encode_heap_oop(d);
8891 %}
8892 ins_pipe(ialu_reg_long);
8893 %}
8894
8895 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8896 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8897 match(Set dst (EncodeP src));
8898 effect(KILL cr);
8899 format %{ "encode_heap_oop_not_null $dst,$src" %}
8900 ins_encode %{
8901 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8902 %}
8903 ins_pipe(ialu_reg_long);
8904 %}
8905
8906 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8907 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8908 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8909 match(Set dst (DecodeN src));
8910 effect(KILL cr);
8911 format %{ "decode_heap_oop $dst,$src" %}
8912 ins_encode %{
8913 Register s = $src$$Register;
8914 Register d = $dst$$Register;
8915 if (s != d) {
8916 __ movq(d, s);
8917 }
8918 __ decode_heap_oop(d);
8919 %}
8920 ins_pipe(ialu_reg_long);
8921 %}
8922
8923 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8924 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8925 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8926 match(Set dst (DecodeN src));
8927 effect(KILL cr);
8928 format %{ "decode_heap_oop_not_null $dst,$src" %}
8929 ins_encode %{
8930 Register s = $src$$Register;
8931 Register d = $dst$$Register;
8932 if (s != d) {
8933 __ decode_heap_oop_not_null(d, s);
8934 } else {
8935 __ decode_heap_oop_not_null(d);
8936 }
8937 %}
8938 ins_pipe(ialu_reg_long);
8939 %}
8940
8941 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8942 match(Set dst (EncodePKlass src));
8943 effect(TEMP dst, KILL cr);
8944 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8945 ins_encode %{
8946 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8947 %}
8948 ins_pipe(ialu_reg_long);
8949 %}
8950
8951 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8952 match(Set dst (DecodeNKlass src));
8953 effect(TEMP dst, KILL cr);
8954 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8955 ins_encode %{
8956 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8957 %}
8958 ins_pipe(ialu_reg_long);
8959 %}
8960
8961 //----------Conditional Move---------------------------------------------------
8962 // Jump
8963 // dummy instruction for generating temp registers
8964 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8965 match(Jump (LShiftL switch_val shift));
8966 ins_cost(350);
8967 predicate(false);
8968 effect(TEMP dest);
8969
8970 format %{ "leaq $dest, [$constantaddress]\n\t"
8971 "jmp [$dest + $switch_val << $shift]\n\t" %}
8972 ins_encode %{
8973 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8974 // to do that and the compiler is using that register as one it can allocate.
8975 // So we build it all by hand.
8976 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8977 // ArrayAddress dispatch(table, index);
8978 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8979 __ lea($dest$$Register, $constantaddress);
8980 __ jmp(dispatch);
8981 %}
8982 ins_pipe(pipe_jmp);
8983 %}
8984
8985 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8986 match(Jump (AddL (LShiftL switch_val shift) offset));
8987 ins_cost(350);
8988 effect(TEMP dest);
8989
8990 format %{ "leaq $dest, [$constantaddress]\n\t"
8991 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8992 ins_encode %{
8993 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8994 // to do that and the compiler is using that register as one it can allocate.
8995 // So we build it all by hand.
8996 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8997 // ArrayAddress dispatch(table, index);
8998 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8999 __ lea($dest$$Register, $constantaddress);
9000 __ jmp(dispatch);
9001 %}
9002 ins_pipe(pipe_jmp);
9003 %}
9004
9005 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9006 match(Jump switch_val);
9007 ins_cost(350);
9008 effect(TEMP dest);
9009
9010 format %{ "leaq $dest, [$constantaddress]\n\t"
9011 "jmp [$dest + $switch_val]\n\t" %}
9012 ins_encode %{
9013 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9014 // to do that and the compiler is using that register as one it can allocate.
9015 // So we build it all by hand.
9016 // Address index(noreg, switch_reg, Address::times_1);
9017 // ArrayAddress dispatch(table, index);
9018 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9019 __ lea($dest$$Register, $constantaddress);
9020 __ jmp(dispatch);
9021 %}
9022 ins_pipe(pipe_jmp);
9023 %}
9024
9025 // Conditional move
9026 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9027 %{
9028 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9029 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9030
9031 ins_cost(100); // XXX
9032 format %{ "setbn$cop $dst\t# signed, int" %}
9033 ins_encode %{
9034 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9035 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9036 %}
9037 ins_pipe(ialu_reg);
9038 %}
9039
9040 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9041 %{
9042 predicate(!UseAPX);
9043 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9044
9045 ins_cost(200); // XXX
9046 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9047 ins_encode %{
9048 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9049 %}
9050 ins_pipe(pipe_cmov_reg);
9051 %}
9052
9053 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9054 %{
9055 predicate(UseAPX);
9056 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9057
9058 ins_cost(200);
9059 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9060 ins_encode %{
9061 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9062 %}
9063 ins_pipe(pipe_cmov_reg);
9064 %}
9065
9066 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9067 %{
9068 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9069 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9070
9071 ins_cost(100); // XXX
9072 format %{ "setbn$cop $dst\t# unsigned, int" %}
9073 ins_encode %{
9074 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9075 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9076 %}
9077 ins_pipe(ialu_reg);
9078 %}
9079
9080 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9081 predicate(!UseAPX);
9082 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9083
9084 ins_cost(200); // XXX
9085 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9086 ins_encode %{
9087 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9088 %}
9089 ins_pipe(pipe_cmov_reg);
9090 %}
9091
9092 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9093 predicate(UseAPX);
9094 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9095
9096 ins_cost(200);
9097 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9098 ins_encode %{
9099 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9100 %}
9101 ins_pipe(pipe_cmov_reg);
9102 %}
9103
9104 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9105 %{
9106 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9107 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9108
9109 ins_cost(100); // XXX
9110 format %{ "setbn$cop $dst\t# unsigned, int" %}
9111 ins_encode %{
9112 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9113 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9114 %}
9115 ins_pipe(ialu_reg);
9116 %}
9117
9118 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9119 predicate(!UseAPX);
9120 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9121 ins_cost(200);
9122 expand %{
9123 cmovI_regU(cop, cr, dst, src);
9124 %}
9125 %}
9126
9127 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9128 predicate(UseAPX);
9129 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9130 ins_cost(200);
9131 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9132 ins_encode %{
9133 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9134 %}
9135 ins_pipe(pipe_cmov_reg);
9136 %}
9137
9138 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9139 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9140 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9141
9142 ins_cost(200); // XXX
9143 format %{ "cmovpl $dst, $src\n\t"
9144 "cmovnel $dst, $src" %}
9145 ins_encode %{
9146 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9147 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9148 %}
9149 ins_pipe(pipe_cmov_reg);
9150 %}
9151
9152 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9153 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9154 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9155 effect(TEMP dst);
9156
9157 ins_cost(200);
9158 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9159 "cmovnel $dst, $src2" %}
9160 ins_encode %{
9161 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9162 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9163 %}
9164 ins_pipe(pipe_cmov_reg);
9165 %}
9166
9167 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9168 // inputs of the CMove
9169 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9170 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9171 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9172 effect(TEMP dst);
9173
9174 ins_cost(200); // XXX
9175 format %{ "cmovpl $dst, $src\n\t"
9176 "cmovnel $dst, $src" %}
9177 ins_encode %{
9178 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9179 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9180 %}
9181 ins_pipe(pipe_cmov_reg);
9182 %}
9183
9184 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9185 // and parity flag bit is set if any of the operand is a NaN.
9186 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9187 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9188 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9189 effect(TEMP dst);
9190
9191 ins_cost(200);
9192 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9193 "cmovnel $dst, $src2" %}
9194 ins_encode %{
9195 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9196 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9197 %}
9198 ins_pipe(pipe_cmov_reg);
9199 %}
9200
9201 // Conditional move
9202 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9203 predicate(!UseAPX);
9204 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9205
9206 ins_cost(250); // XXX
9207 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9208 ins_encode %{
9209 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9210 %}
9211 ins_pipe(pipe_cmov_mem);
9212 %}
9213
9214 // Conditional move
9215 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9216 %{
9217 predicate(UseAPX);
9218 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9219
9220 ins_cost(250);
9221 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9222 ins_encode %{
9223 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9224 %}
9225 ins_pipe(pipe_cmov_mem);
9226 %}
9227
9228 // Conditional move
9229 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9230 %{
9231 predicate(!UseAPX);
9232 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9233
9234 ins_cost(250); // XXX
9235 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9236 ins_encode %{
9237 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9238 %}
9239 ins_pipe(pipe_cmov_mem);
9240 %}
9241
9242 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9243 predicate(!UseAPX);
9244 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9245 ins_cost(250);
9246 expand %{
9247 cmovI_memU(cop, cr, dst, src);
9248 %}
9249 %}
9250
9251 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9252 %{
9253 predicate(UseAPX);
9254 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9255
9256 ins_cost(250);
9257 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9258 ins_encode %{
9259 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9260 %}
9261 ins_pipe(pipe_cmov_mem);
9262 %}
9263
9264 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9265 %{
9266 predicate(UseAPX);
9267 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9268 ins_cost(250);
9269 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9270 ins_encode %{
9271 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9272 %}
9273 ins_pipe(pipe_cmov_mem);
9274 %}
9275
9276 // Conditional move
9277 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9278 %{
9279 predicate(!UseAPX);
9280 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9281
9282 ins_cost(200); // XXX
9283 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9284 ins_encode %{
9285 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9286 %}
9287 ins_pipe(pipe_cmov_reg);
9288 %}
9289
9290 // Conditional move ndd
9291 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9292 %{
9293 predicate(UseAPX);
9294 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9295
9296 ins_cost(200);
9297 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9298 ins_encode %{
9299 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9300 %}
9301 ins_pipe(pipe_cmov_reg);
9302 %}
9303
9304 // Conditional move
9305 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9306 %{
9307 predicate(!UseAPX);
9308 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9309
9310 ins_cost(200); // XXX
9311 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9312 ins_encode %{
9313 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9314 %}
9315 ins_pipe(pipe_cmov_reg);
9316 %}
9317
9318 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9319 predicate(!UseAPX);
9320 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9321 ins_cost(200);
9322 expand %{
9323 cmovN_regU(cop, cr, dst, src);
9324 %}
9325 %}
9326
9327 // Conditional move ndd
9328 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9329 %{
9330 predicate(UseAPX);
9331 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9332
9333 ins_cost(200);
9334 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9335 ins_encode %{
9336 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9337 %}
9338 ins_pipe(pipe_cmov_reg);
9339 %}
9340
9341 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9342 predicate(UseAPX);
9343 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9344 ins_cost(200);
9345 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9346 ins_encode %{
9347 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9348 %}
9349 ins_pipe(pipe_cmov_reg);
9350 %}
9351
9352 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9353 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9354 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9355
9356 ins_cost(200); // XXX
9357 format %{ "cmovpl $dst, $src\n\t"
9358 "cmovnel $dst, $src" %}
9359 ins_encode %{
9360 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9361 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9362 %}
9363 ins_pipe(pipe_cmov_reg);
9364 %}
9365
9366 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9367 // inputs of the CMove
9368 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9369 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9370 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9371
9372 ins_cost(200); // XXX
9373 format %{ "cmovpl $dst, $src\n\t"
9374 "cmovnel $dst, $src" %}
9375 ins_encode %{
9376 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9377 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9378 %}
9379 ins_pipe(pipe_cmov_reg);
9380 %}
9381
9382 // Conditional move
9383 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9384 %{
9385 predicate(!UseAPX);
9386 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9387
9388 ins_cost(200); // XXX
9389 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9390 ins_encode %{
9391 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9392 %}
9393 ins_pipe(pipe_cmov_reg); // XXX
9394 %}
9395
9396 // Conditional move ndd
9397 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9398 %{
9399 predicate(UseAPX);
9400 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9401
9402 ins_cost(200);
9403 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9404 ins_encode %{
9405 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9406 %}
9407 ins_pipe(pipe_cmov_reg);
9408 %}
9409
9410 // Conditional move
9411 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9412 %{
9413 predicate(!UseAPX);
9414 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9415
9416 ins_cost(200); // XXX
9417 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9418 ins_encode %{
9419 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9420 %}
9421 ins_pipe(pipe_cmov_reg); // XXX
9422 %}
9423
9424 // Conditional move ndd
9425 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9426 %{
9427 predicate(UseAPX);
9428 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9429
9430 ins_cost(200);
9431 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9432 ins_encode %{
9433 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9434 %}
9435 ins_pipe(pipe_cmov_reg);
9436 %}
9437
9438 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9439 predicate(!UseAPX);
9440 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9441 ins_cost(200);
9442 expand %{
9443 cmovP_regU(cop, cr, dst, src);
9444 %}
9445 %}
9446
9447 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9448 predicate(UseAPX);
9449 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9450 ins_cost(200);
9451 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9452 ins_encode %{
9453 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9454 %}
9455 ins_pipe(pipe_cmov_reg);
9456 %}
9457
9458 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9459 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9460 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9461
9462 ins_cost(200); // XXX
9463 format %{ "cmovpq $dst, $src\n\t"
9464 "cmovneq $dst, $src" %}
9465 ins_encode %{
9466 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9467 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9468 %}
9469 ins_pipe(pipe_cmov_reg);
9470 %}
9471
9472 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9473 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9474 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9475 effect(TEMP dst);
9476
9477 ins_cost(200);
9478 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9479 "cmovneq $dst, $src2" %}
9480 ins_encode %{
9481 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9482 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9483 %}
9484 ins_pipe(pipe_cmov_reg);
9485 %}
9486
9487 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9488 // inputs of the CMove
9489 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9490 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9491 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9492
9493 ins_cost(200); // XXX
9494 format %{ "cmovpq $dst, $src\n\t"
9495 "cmovneq $dst, $src" %}
9496 ins_encode %{
9497 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9498 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9499 %}
9500 ins_pipe(pipe_cmov_reg);
9501 %}
9502
9503 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9504 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9505 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9506 effect(TEMP dst);
9507
9508 ins_cost(200);
9509 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9510 "cmovneq $dst, $src2" %}
9511 ins_encode %{
9512 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9513 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9514 %}
9515 ins_pipe(pipe_cmov_reg);
9516 %}
9517
9518 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9519 %{
9520 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9521 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9522
9523 ins_cost(100); // XXX
9524 format %{ "setbn$cop $dst\t# signed, long" %}
9525 ins_encode %{
9526 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9527 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9528 %}
9529 ins_pipe(ialu_reg);
9530 %}
9531
9532 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9533 %{
9534 predicate(!UseAPX);
9535 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9536
9537 ins_cost(200); // XXX
9538 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9539 ins_encode %{
9540 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9541 %}
9542 ins_pipe(pipe_cmov_reg); // XXX
9543 %}
9544
9545 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9546 %{
9547 predicate(UseAPX);
9548 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9549
9550 ins_cost(200);
9551 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9552 ins_encode %{
9553 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9554 %}
9555 ins_pipe(pipe_cmov_reg);
9556 %}
9557
9558 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9559 %{
9560 predicate(!UseAPX);
9561 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9562
9563 ins_cost(200); // XXX
9564 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9565 ins_encode %{
9566 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9567 %}
9568 ins_pipe(pipe_cmov_mem); // XXX
9569 %}
9570
9571 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9572 %{
9573 predicate(UseAPX);
9574 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9575
9576 ins_cost(200);
9577 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9578 ins_encode %{
9579 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9580 %}
9581 ins_pipe(pipe_cmov_mem);
9582 %}
9583
9584 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9585 %{
9586 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9587 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9588
9589 ins_cost(100); // XXX
9590 format %{ "setbn$cop $dst\t# unsigned, long" %}
9591 ins_encode %{
9592 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9593 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9594 %}
9595 ins_pipe(ialu_reg);
9596 %}
9597
9598 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9599 %{
9600 predicate(!UseAPX);
9601 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9602
9603 ins_cost(200); // XXX
9604 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9605 ins_encode %{
9606 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9607 %}
9608 ins_pipe(pipe_cmov_reg); // XXX
9609 %}
9610
9611 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9612 %{
9613 predicate(UseAPX);
9614 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9615
9616 ins_cost(200);
9617 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9618 ins_encode %{
9619 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9620 %}
9621 ins_pipe(pipe_cmov_reg);
9622 %}
9623
9624 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9625 %{
9626 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9627 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9628
9629 ins_cost(100); // XXX
9630 format %{ "setbn$cop $dst\t# unsigned, long" %}
9631 ins_encode %{
9632 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9633 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9634 %}
9635 ins_pipe(ialu_reg);
9636 %}
9637
9638 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9639 predicate(!UseAPX);
9640 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9641 ins_cost(200);
9642 expand %{
9643 cmovL_regU(cop, cr, dst, src);
9644 %}
9645 %}
9646
9647 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9648 %{
9649 predicate(UseAPX);
9650 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9651 ins_cost(200);
9652 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9653 ins_encode %{
9654 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9655 %}
9656 ins_pipe(pipe_cmov_reg);
9657 %}
9658
9659 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9660 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9661 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9662
9663 ins_cost(200); // XXX
9664 format %{ "cmovpq $dst, $src\n\t"
9665 "cmovneq $dst, $src" %}
9666 ins_encode %{
9667 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9668 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9669 %}
9670 ins_pipe(pipe_cmov_reg);
9671 %}
9672
9673 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9674 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9675 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9676 effect(TEMP dst);
9677
9678 ins_cost(200);
9679 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9680 "cmovneq $dst, $src2" %}
9681 ins_encode %{
9682 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9683 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9684 %}
9685 ins_pipe(pipe_cmov_reg);
9686 %}
9687
9688 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9689 // inputs of the CMove
9690 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9691 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9692 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9693
9694 ins_cost(200); // XXX
9695 format %{ "cmovpq $dst, $src\n\t"
9696 "cmovneq $dst, $src" %}
9697 ins_encode %{
9698 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9699 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9700 %}
9701 ins_pipe(pipe_cmov_reg);
9702 %}
9703
9704 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9705 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9706 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9707 effect(TEMP dst);
9708
9709 ins_cost(200);
9710 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9711 "cmovneq $dst, $src2" %}
9712 ins_encode %{
9713 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9714 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9715 %}
9716 ins_pipe(pipe_cmov_reg);
9717 %}
9718
9719 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9720 %{
9721 predicate(!UseAPX);
9722 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9723
9724 ins_cost(200); // XXX
9725 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9726 ins_encode %{
9727 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9728 %}
9729 ins_pipe(pipe_cmov_mem); // XXX
9730 %}
9731
9732 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9733 predicate(!UseAPX);
9734 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9735 ins_cost(200);
9736 expand %{
9737 cmovL_memU(cop, cr, dst, src);
9738 %}
9739 %}
9740
9741 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9742 %{
9743 predicate(UseAPX);
9744 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9745
9746 ins_cost(200);
9747 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9748 ins_encode %{
9749 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9750 %}
9751 ins_pipe(pipe_cmov_mem);
9752 %}
9753
9754 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9755 %{
9756 predicate(UseAPX);
9757 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9758 ins_cost(200);
9759 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9760 ins_encode %{
9761 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9762 %}
9763 ins_pipe(pipe_cmov_mem);
9764 %}
9765
9766 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9767 %{
9768 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9769
9770 ins_cost(200); // XXX
9771 format %{ "jn$cop skip\t# signed cmove float\n\t"
9772 "movss $dst, $src\n"
9773 "skip:" %}
9774 ins_encode %{
9775 Label Lskip;
9776 // Invert sense of branch from sense of CMOV
9777 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9778 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9779 __ bind(Lskip);
9780 %}
9781 ins_pipe(pipe_slow);
9782 %}
9783
9784 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9785 %{
9786 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9787
9788 ins_cost(200); // XXX
9789 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9790 "movss $dst, $src\n"
9791 "skip:" %}
9792 ins_encode %{
9793 Label Lskip;
9794 // Invert sense of branch from sense of CMOV
9795 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9796 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9797 __ bind(Lskip);
9798 %}
9799 ins_pipe(pipe_slow);
9800 %}
9801
9802 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9803 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9804 ins_cost(200);
9805 expand %{
9806 cmovF_regU(cop, cr, dst, src);
9807 %}
9808 %}
9809
9810 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9811 %{
9812 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9813
9814 ins_cost(200); // XXX
9815 format %{ "jn$cop skip\t# signed cmove double\n\t"
9816 "movsd $dst, $src\n"
9817 "skip:" %}
9818 ins_encode %{
9819 Label Lskip;
9820 // Invert sense of branch from sense of CMOV
9821 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9822 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9823 __ bind(Lskip);
9824 %}
9825 ins_pipe(pipe_slow);
9826 %}
9827
9828 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9829 %{
9830 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9831
9832 ins_cost(200); // XXX
9833 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9834 "movsd $dst, $src\n"
9835 "skip:" %}
9836 ins_encode %{
9837 Label Lskip;
9838 // Invert sense of branch from sense of CMOV
9839 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9840 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9841 __ bind(Lskip);
9842 %}
9843 ins_pipe(pipe_slow);
9844 %}
9845
9846 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9847 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9848 ins_cost(200);
9849 expand %{
9850 cmovD_regU(cop, cr, dst, src);
9851 %}
9852 %}
9853
9854 //----------Arithmetic Instructions--------------------------------------------
9855 //----------Addition Instructions----------------------------------------------
9856
9857 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9858 %{
9859 predicate(!UseAPX);
9860 match(Set dst (AddI dst src));
9861 effect(KILL cr);
9862 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9863 format %{ "addl $dst, $src\t# int" %}
9864 ins_encode %{
9865 __ addl($dst$$Register, $src$$Register);
9866 %}
9867 ins_pipe(ialu_reg_reg);
9868 %}
9869
9870 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9871 %{
9872 predicate(UseAPX);
9873 match(Set dst (AddI src1 src2));
9874 effect(KILL cr);
9875 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
9876
9877 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9878 ins_encode %{
9879 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9880 %}
9881 ins_pipe(ialu_reg_reg);
9882 %}
9883
9884 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9885 %{
9886 predicate(!UseAPX);
9887 match(Set dst (AddI dst src));
9888 effect(KILL cr);
9889 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9890
9891 format %{ "addl $dst, $src\t# int" %}
9892 ins_encode %{
9893 __ addl($dst$$Register, $src$$constant);
9894 %}
9895 ins_pipe( ialu_reg );
9896 %}
9897
9898 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9899 %{
9900 predicate(UseAPX);
9901 match(Set dst (AddI src1 src2));
9902 effect(KILL cr);
9903 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
9904
9905 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9906 ins_encode %{
9907 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9908 %}
9909 ins_pipe( ialu_reg );
9910 %}
9911
9912 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9913 %{
9914 predicate(UseAPX);
9915 match(Set dst (AddI (LoadI src1) src2));
9916 effect(KILL cr);
9917 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9918
9919 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9920 ins_encode %{
9921 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9922 %}
9923 ins_pipe( ialu_reg );
9924 %}
9925
9926 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9927 %{
9928 predicate(!UseAPX);
9929 match(Set dst (AddI dst (LoadI src)));
9930 effect(KILL cr);
9931 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9932
9933 ins_cost(150); // XXX
9934 format %{ "addl $dst, $src\t# int" %}
9935 ins_encode %{
9936 __ addl($dst$$Register, $src$$Address);
9937 %}
9938 ins_pipe(ialu_reg_mem);
9939 %}
9940
9941 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9942 %{
9943 predicate(UseAPX);
9944 match(Set dst (AddI src1 (LoadI src2)));
9945 effect(KILL cr);
9946 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
9947
9948 ins_cost(150);
9949 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9950 ins_encode %{
9951 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9952 %}
9953 ins_pipe(ialu_reg_mem);
9954 %}
9955
9956 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9957 %{
9958 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9959 effect(KILL cr);
9960 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9961
9962 ins_cost(150); // XXX
9963 format %{ "addl $dst, $src\t# int" %}
9964 ins_encode %{
9965 __ addl($dst$$Address, $src$$Register);
9966 %}
9967 ins_pipe(ialu_mem_reg);
9968 %}
9969
9970 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9971 %{
9972 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9973 effect(KILL cr);
9974 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9975
9976
9977 ins_cost(125); // XXX
9978 format %{ "addl $dst, $src\t# int" %}
9979 ins_encode %{
9980 __ addl($dst$$Address, $src$$constant);
9981 %}
9982 ins_pipe(ialu_mem_imm);
9983 %}
9984
9985 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9986 %{
9987 predicate(!UseAPX && UseIncDec);
9988 match(Set dst (AddI dst src));
9989 effect(KILL cr);
9990
9991 format %{ "incl $dst\t# int" %}
9992 ins_encode %{
9993 __ incrementl($dst$$Register);
9994 %}
9995 ins_pipe(ialu_reg);
9996 %}
9997
9998 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9999 %{
10000 predicate(UseAPX && UseIncDec);
10001 match(Set dst (AddI src val));
10002 effect(KILL cr);
10003 flag(PD::Flag_ndd_demotable);
10004
10005 format %{ "eincl $dst, $src\t# int ndd" %}
10006 ins_encode %{
10007 __ eincl($dst$$Register, $src$$Register, false);
10008 %}
10009 ins_pipe(ialu_reg);
10010 %}
10011
10012 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10013 %{
10014 predicate(UseAPX && UseIncDec);
10015 match(Set dst (AddI (LoadI src) val));
10016 effect(KILL cr);
10017
10018 format %{ "eincl $dst, $src\t# int ndd" %}
10019 ins_encode %{
10020 __ eincl($dst$$Register, $src$$Address, false);
10021 %}
10022 ins_pipe(ialu_reg);
10023 %}
10024
10025 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10026 %{
10027 predicate(UseIncDec);
10028 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029 effect(KILL cr);
10030
10031 ins_cost(125); // XXX
10032 format %{ "incl $dst\t# int" %}
10033 ins_encode %{
10034 __ incrementl($dst$$Address);
10035 %}
10036 ins_pipe(ialu_mem_imm);
10037 %}
10038
10039 // XXX why does that use AddI
10040 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10041 %{
10042 predicate(!UseAPX && UseIncDec);
10043 match(Set dst (AddI dst src));
10044 effect(KILL cr);
10045
10046 format %{ "decl $dst\t# int" %}
10047 ins_encode %{
10048 __ decrementl($dst$$Register);
10049 %}
10050 ins_pipe(ialu_reg);
10051 %}
10052
10053 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10054 %{
10055 predicate(UseAPX && UseIncDec);
10056 match(Set dst (AddI src val));
10057 effect(KILL cr);
10058 flag(PD::Flag_ndd_demotable);
10059
10060 format %{ "edecl $dst, $src\t# int ndd" %}
10061 ins_encode %{
10062 __ edecl($dst$$Register, $src$$Register, false);
10063 %}
10064 ins_pipe(ialu_reg);
10065 %}
10066
10067 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10068 %{
10069 predicate(UseAPX && UseIncDec);
10070 match(Set dst (AddI (LoadI src) val));
10071 effect(KILL cr);
10072
10073 format %{ "edecl $dst, $src\t# int ndd" %}
10074 ins_encode %{
10075 __ edecl($dst$$Register, $src$$Address, false);
10076 %}
10077 ins_pipe(ialu_reg);
10078 %}
10079
10080 // XXX why does that use AddI
10081 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10082 %{
10083 predicate(UseIncDec);
10084 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10085 effect(KILL cr);
10086
10087 ins_cost(125); // XXX
10088 format %{ "decl $dst\t# int" %}
10089 ins_encode %{
10090 __ decrementl($dst$$Address);
10091 %}
10092 ins_pipe(ialu_mem_imm);
10093 %}
10094
10095 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10096 %{
10097 predicate(VM_Version::supports_fast_2op_lea());
10098 match(Set dst (AddI (LShiftI index scale) disp));
10099
10100 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10101 ins_encode %{
10102 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10103 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10104 %}
10105 ins_pipe(ialu_reg_reg);
10106 %}
10107
10108 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10109 %{
10110 predicate(VM_Version::supports_fast_3op_lea());
10111 match(Set dst (AddI (AddI base index) disp));
10112
10113 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10114 ins_encode %{
10115 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10116 %}
10117 ins_pipe(ialu_reg_reg);
10118 %}
10119
10120 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10121 %{
10122 predicate(VM_Version::supports_fast_2op_lea());
10123 match(Set dst (AddI base (LShiftI index scale)));
10124
10125 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10126 ins_encode %{
10127 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10128 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10129 %}
10130 ins_pipe(ialu_reg_reg);
10131 %}
10132
10133 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10134 %{
10135 predicate(VM_Version::supports_fast_3op_lea());
10136 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10137
10138 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10139 ins_encode %{
10140 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10141 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10142 %}
10143 ins_pipe(ialu_reg_reg);
10144 %}
10145
10146 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10147 %{
10148 predicate(!UseAPX);
10149 match(Set dst (AddL dst src));
10150 effect(KILL cr);
10151 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10152
10153 format %{ "addq $dst, $src\t# long" %}
10154 ins_encode %{
10155 __ addq($dst$$Register, $src$$Register);
10156 %}
10157 ins_pipe(ialu_reg_reg);
10158 %}
10159
10160 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10161 %{
10162 predicate(UseAPX);
10163 match(Set dst (AddL src1 src2));
10164 effect(KILL cr);
10165 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10166
10167 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10168 ins_encode %{
10169 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10170 %}
10171 ins_pipe(ialu_reg_reg);
10172 %}
10173
10174 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10175 %{
10176 predicate(!UseAPX);
10177 match(Set dst (AddL dst src));
10178 effect(KILL cr);
10179 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10180
10181 format %{ "addq $dst, $src\t# long" %}
10182 ins_encode %{
10183 __ addq($dst$$Register, $src$$constant);
10184 %}
10185 ins_pipe( ialu_reg );
10186 %}
10187
10188 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10189 %{
10190 predicate(UseAPX);
10191 match(Set dst (AddL src1 src2));
10192 effect(KILL cr);
10193 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
10194
10195 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10196 ins_encode %{
10197 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10198 %}
10199 ins_pipe( ialu_reg );
10200 %}
10201
10202 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10203 %{
10204 predicate(UseAPX);
10205 match(Set dst (AddL (LoadL src1) src2));
10206 effect(KILL cr);
10207 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10208
10209 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10210 ins_encode %{
10211 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10212 %}
10213 ins_pipe( ialu_reg );
10214 %}
10215
10216 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10217 %{
10218 predicate(!UseAPX);
10219 match(Set dst (AddL dst (LoadL src)));
10220 effect(KILL cr);
10221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222
10223 ins_cost(150); // XXX
10224 format %{ "addq $dst, $src\t# long" %}
10225 ins_encode %{
10226 __ addq($dst$$Register, $src$$Address);
10227 %}
10228 ins_pipe(ialu_reg_mem);
10229 %}
10230
10231 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10232 %{
10233 predicate(UseAPX);
10234 match(Set dst (AddL src1 (LoadL src2)));
10235 effect(KILL cr);
10236 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10237
10238 ins_cost(150);
10239 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10240 ins_encode %{
10241 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10242 %}
10243 ins_pipe(ialu_reg_mem);
10244 %}
10245
10246 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10247 %{
10248 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10249 effect(KILL cr);
10250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10251
10252 ins_cost(150); // XXX
10253 format %{ "addq $dst, $src\t# long" %}
10254 ins_encode %{
10255 __ addq($dst$$Address, $src$$Register);
10256 %}
10257 ins_pipe(ialu_mem_reg);
10258 %}
10259
10260 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10261 %{
10262 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10263 effect(KILL cr);
10264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10265
10266 ins_cost(125); // XXX
10267 format %{ "addq $dst, $src\t# long" %}
10268 ins_encode %{
10269 __ addq($dst$$Address, $src$$constant);
10270 %}
10271 ins_pipe(ialu_mem_imm);
10272 %}
10273
10274 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10275 %{
10276 predicate(!UseAPX && UseIncDec);
10277 match(Set dst (AddL dst src));
10278 effect(KILL cr);
10279
10280 format %{ "incq $dst\t# long" %}
10281 ins_encode %{
10282 __ incrementq($dst$$Register);
10283 %}
10284 ins_pipe(ialu_reg);
10285 %}
10286
10287 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10288 %{
10289 predicate(UseAPX && UseIncDec);
10290 match(Set dst (AddL src val));
10291 effect(KILL cr);
10292 flag(PD::Flag_ndd_demotable);
10293
10294 format %{ "eincq $dst, $src\t# long ndd" %}
10295 ins_encode %{
10296 __ eincq($dst$$Register, $src$$Register, false);
10297 %}
10298 ins_pipe(ialu_reg);
10299 %}
10300
10301 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10302 %{
10303 predicate(UseAPX && UseIncDec);
10304 match(Set dst (AddL (LoadL src) val));
10305 effect(KILL cr);
10306
10307 format %{ "eincq $dst, $src\t# long ndd" %}
10308 ins_encode %{
10309 __ eincq($dst$$Register, $src$$Address, false);
10310 %}
10311 ins_pipe(ialu_reg);
10312 %}
10313
10314 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10315 %{
10316 predicate(UseIncDec);
10317 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10318 effect(KILL cr);
10319
10320 ins_cost(125); // XXX
10321 format %{ "incq $dst\t# long" %}
10322 ins_encode %{
10323 __ incrementq($dst$$Address);
10324 %}
10325 ins_pipe(ialu_mem_imm);
10326 %}
10327
10328 // XXX why does that use AddL
10329 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10330 %{
10331 predicate(!UseAPX && UseIncDec);
10332 match(Set dst (AddL dst src));
10333 effect(KILL cr);
10334
10335 format %{ "decq $dst\t# long" %}
10336 ins_encode %{
10337 __ decrementq($dst$$Register);
10338 %}
10339 ins_pipe(ialu_reg);
10340 %}
10341
10342 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10343 %{
10344 predicate(UseAPX && UseIncDec);
10345 match(Set dst (AddL src val));
10346 effect(KILL cr);
10347 flag(PD::Flag_ndd_demotable);
10348
10349 format %{ "edecq $dst, $src\t# long ndd" %}
10350 ins_encode %{
10351 __ edecq($dst$$Register, $src$$Register, false);
10352 %}
10353 ins_pipe(ialu_reg);
10354 %}
10355
10356 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10357 %{
10358 predicate(UseAPX && UseIncDec);
10359 match(Set dst (AddL (LoadL src) val));
10360 effect(KILL cr);
10361
10362 format %{ "edecq $dst, $src\t# long ndd" %}
10363 ins_encode %{
10364 __ edecq($dst$$Register, $src$$Address, false);
10365 %}
10366 ins_pipe(ialu_reg);
10367 %}
10368
10369 // XXX why does that use AddL
10370 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10371 %{
10372 predicate(UseIncDec);
10373 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10374 effect(KILL cr);
10375
10376 ins_cost(125); // XXX
10377 format %{ "decq $dst\t# long" %}
10378 ins_encode %{
10379 __ decrementq($dst$$Address);
10380 %}
10381 ins_pipe(ialu_mem_imm);
10382 %}
10383
10384 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10385 %{
10386 predicate(VM_Version::supports_fast_2op_lea());
10387 match(Set dst (AddL (LShiftL index scale) disp));
10388
10389 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10390 ins_encode %{
10391 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10392 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10393 %}
10394 ins_pipe(ialu_reg_reg);
10395 %}
10396
10397 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10398 %{
10399 predicate(VM_Version::supports_fast_3op_lea());
10400 match(Set dst (AddL (AddL base index) disp));
10401
10402 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10403 ins_encode %{
10404 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10405 %}
10406 ins_pipe(ialu_reg_reg);
10407 %}
10408
10409 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10410 %{
10411 predicate(VM_Version::supports_fast_2op_lea());
10412 match(Set dst (AddL base (LShiftL index scale)));
10413
10414 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10415 ins_encode %{
10416 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10417 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10418 %}
10419 ins_pipe(ialu_reg_reg);
10420 %}
10421
10422 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10423 %{
10424 predicate(VM_Version::supports_fast_3op_lea());
10425 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10426
10427 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10428 ins_encode %{
10429 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10430 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10431 %}
10432 ins_pipe(ialu_reg_reg);
10433 %}
10434
10435 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10436 %{
10437 match(Set dst (AddP dst src));
10438 effect(KILL cr);
10439 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10440
10441 format %{ "addq $dst, $src\t# ptr" %}
10442 ins_encode %{
10443 __ addq($dst$$Register, $src$$Register);
10444 %}
10445 ins_pipe(ialu_reg_reg);
10446 %}
10447
10448 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10449 %{
10450 match(Set dst (AddP dst src));
10451 effect(KILL cr);
10452 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10453
10454 format %{ "addq $dst, $src\t# ptr" %}
10455 ins_encode %{
10456 __ addq($dst$$Register, $src$$constant);
10457 %}
10458 ins_pipe( ialu_reg );
10459 %}
10460
10461 // XXX addP mem ops ????
10462
10463 instruct checkCastPP(rRegP dst)
10464 %{
10465 match(Set dst (CheckCastPP dst));
10466
10467 size(0);
10468 format %{ "# checkcastPP of $dst" %}
10469 ins_encode(/* empty encoding */);
10470 ins_pipe(empty);
10471 %}
10472
10473 instruct castPP(rRegP dst)
10474 %{
10475 match(Set dst (CastPP dst));
10476
10477 size(0);
10478 format %{ "# castPP of $dst" %}
10479 ins_encode(/* empty encoding */);
10480 ins_pipe(empty);
10481 %}
10482
10483 instruct castII(rRegI dst)
10484 %{
10485 predicate(VerifyConstraintCasts == 0);
10486 match(Set dst (CastII dst));
10487
10488 size(0);
10489 format %{ "# castII of $dst" %}
10490 ins_encode(/* empty encoding */);
10491 ins_cost(0);
10492 ins_pipe(empty);
10493 %}
10494
10495 instruct castII_checked(rRegI dst, rFlagsReg cr)
10496 %{
10497 predicate(VerifyConstraintCasts > 0);
10498 match(Set dst (CastII dst));
10499
10500 effect(KILL cr);
10501 format %{ "# cast_checked_II $dst" %}
10502 ins_encode %{
10503 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10504 %}
10505 ins_pipe(pipe_slow);
10506 %}
10507
10508 instruct castLL(rRegL dst)
10509 %{
10510 predicate(VerifyConstraintCasts == 0);
10511 match(Set dst (CastLL dst));
10512
10513 size(0);
10514 format %{ "# castLL of $dst" %}
10515 ins_encode(/* empty encoding */);
10516 ins_cost(0);
10517 ins_pipe(empty);
10518 %}
10519
10520 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10521 %{
10522 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10523 match(Set dst (CastLL dst));
10524
10525 effect(KILL cr);
10526 format %{ "# cast_checked_LL $dst" %}
10527 ins_encode %{
10528 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10529 %}
10530 ins_pipe(pipe_slow);
10531 %}
10532
10533 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10534 %{
10535 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10536 match(Set dst (CastLL dst));
10537
10538 effect(KILL cr, TEMP tmp);
10539 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10540 ins_encode %{
10541 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10542 %}
10543 ins_pipe(pipe_slow);
10544 %}
10545
10546 instruct castFF(regF dst)
10547 %{
10548 match(Set dst (CastFF dst));
10549
10550 size(0);
10551 format %{ "# castFF of $dst" %}
10552 ins_encode(/* empty encoding */);
10553 ins_cost(0);
10554 ins_pipe(empty);
10555 %}
10556
10557 instruct castHH(regF dst)
10558 %{
10559 match(Set dst (CastHH dst));
10560
10561 size(0);
10562 format %{ "# castHH of $dst" %}
10563 ins_encode(/* empty encoding */);
10564 ins_cost(0);
10565 ins_pipe(empty);
10566 %}
10567
10568 instruct castDD(regD dst)
10569 %{
10570 match(Set dst (CastDD dst));
10571
10572 size(0);
10573 format %{ "# castDD of $dst" %}
10574 ins_encode(/* empty encoding */);
10575 ins_cost(0);
10576 ins_pipe(empty);
10577 %}
10578
10579 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10580 instruct compareAndSwapP(rRegI res,
10581 memory mem_ptr,
10582 rax_RegP oldval, rRegP newval,
10583 rFlagsReg cr)
10584 %{
10585 predicate(n->as_LoadStore()->barrier_data() == 0);
10586 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10587 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10588 effect(KILL cr, KILL oldval);
10589
10590 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10591 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10592 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10593 ins_encode %{
10594 __ lock();
10595 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10596 __ setcc(Assembler::equal, $res$$Register);
10597 %}
10598 ins_pipe( pipe_cmpxchg );
10599 %}
10600
10601 instruct compareAndSwapL(rRegI res,
10602 memory mem_ptr,
10603 rax_RegL oldval, rRegL newval,
10604 rFlagsReg cr)
10605 %{
10606 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10607 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10608 effect(KILL cr, KILL oldval);
10609
10610 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10611 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10612 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10613 ins_encode %{
10614 __ lock();
10615 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10616 __ setcc(Assembler::equal, $res$$Register);
10617 %}
10618 ins_pipe( pipe_cmpxchg );
10619 %}
10620
10621 instruct compareAndSwapI(rRegI res,
10622 memory mem_ptr,
10623 rax_RegI oldval, rRegI newval,
10624 rFlagsReg cr)
10625 %{
10626 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10627 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10628 effect(KILL cr, KILL oldval);
10629
10630 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10631 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10632 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10633 ins_encode %{
10634 __ lock();
10635 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10636 __ setcc(Assembler::equal, $res$$Register);
10637 %}
10638 ins_pipe( pipe_cmpxchg );
10639 %}
10640
10641 instruct compareAndSwapB(rRegI res,
10642 memory mem_ptr,
10643 rax_RegI oldval, rRegI newval,
10644 rFlagsReg cr)
10645 %{
10646 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10647 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10648 effect(KILL cr, KILL oldval);
10649
10650 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10651 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10652 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10653 ins_encode %{
10654 __ lock();
10655 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10656 __ setcc(Assembler::equal, $res$$Register);
10657 %}
10658 ins_pipe( pipe_cmpxchg );
10659 %}
10660
10661 instruct compareAndSwapS(rRegI res,
10662 memory mem_ptr,
10663 rax_RegI oldval, rRegI newval,
10664 rFlagsReg cr)
10665 %{
10666 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10667 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10668 effect(KILL cr, KILL oldval);
10669
10670 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10671 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10672 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10673 ins_encode %{
10674 __ lock();
10675 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10676 __ setcc(Assembler::equal, $res$$Register);
10677 %}
10678 ins_pipe( pipe_cmpxchg );
10679 %}
10680
10681 instruct compareAndSwapN(rRegI res,
10682 memory mem_ptr,
10683 rax_RegN oldval, rRegN newval,
10684 rFlagsReg cr) %{
10685 predicate(n->as_LoadStore()->barrier_data() == 0);
10686 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10687 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10688 effect(KILL cr, KILL oldval);
10689
10690 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10691 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10692 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10693 ins_encode %{
10694 __ lock();
10695 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10696 __ setcc(Assembler::equal, $res$$Register);
10697 %}
10698 ins_pipe( pipe_cmpxchg );
10699 %}
10700
10701 instruct compareAndExchangeB(
10702 memory mem_ptr,
10703 rax_RegI oldval, rRegI newval,
10704 rFlagsReg cr)
10705 %{
10706 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10707 effect(KILL cr);
10708
10709 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10710 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10711 ins_encode %{
10712 __ lock();
10713 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10714 %}
10715 ins_pipe( pipe_cmpxchg );
10716 %}
10717
10718 instruct compareAndExchangeS(
10719 memory mem_ptr,
10720 rax_RegI oldval, rRegI newval,
10721 rFlagsReg cr)
10722 %{
10723 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10724 effect(KILL cr);
10725
10726 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10727 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10728 ins_encode %{
10729 __ lock();
10730 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10731 %}
10732 ins_pipe( pipe_cmpxchg );
10733 %}
10734
10735 instruct compareAndExchangeI(
10736 memory mem_ptr,
10737 rax_RegI oldval, rRegI newval,
10738 rFlagsReg cr)
10739 %{
10740 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10741 effect(KILL cr);
10742
10743 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10744 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10745 ins_encode %{
10746 __ lock();
10747 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10748 %}
10749 ins_pipe( pipe_cmpxchg );
10750 %}
10751
10752 instruct compareAndExchangeL(
10753 memory mem_ptr,
10754 rax_RegL oldval, rRegL newval,
10755 rFlagsReg cr)
10756 %{
10757 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10758 effect(KILL cr);
10759
10760 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10761 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10762 ins_encode %{
10763 __ lock();
10764 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10765 %}
10766 ins_pipe( pipe_cmpxchg );
10767 %}
10768
10769 instruct compareAndExchangeN(
10770 memory mem_ptr,
10771 rax_RegN oldval, rRegN newval,
10772 rFlagsReg cr) %{
10773 predicate(n->as_LoadStore()->barrier_data() == 0);
10774 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10775 effect(KILL cr);
10776
10777 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10778 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10779 ins_encode %{
10780 __ lock();
10781 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10782 %}
10783 ins_pipe( pipe_cmpxchg );
10784 %}
10785
10786 instruct compareAndExchangeP(
10787 memory mem_ptr,
10788 rax_RegP oldval, rRegP newval,
10789 rFlagsReg cr)
10790 %{
10791 predicate(n->as_LoadStore()->barrier_data() == 0);
10792 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10793 effect(KILL cr);
10794
10795 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10796 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10797 ins_encode %{
10798 __ lock();
10799 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10800 %}
10801 ins_pipe( pipe_cmpxchg );
10802 %}
10803
10804 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10805 predicate(n->as_LoadStore()->result_not_used());
10806 match(Set dummy (GetAndAddB mem add));
10807 effect(KILL cr);
10808 format %{ "addb_lock $mem, $add" %}
10809 ins_encode %{
10810 __ lock();
10811 __ addb($mem$$Address, $add$$Register);
10812 %}
10813 ins_pipe(pipe_cmpxchg);
10814 %}
10815
10816 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10817 predicate(n->as_LoadStore()->result_not_used());
10818 match(Set dummy (GetAndAddB mem add));
10819 effect(KILL cr);
10820 format %{ "addb_lock $mem, $add" %}
10821 ins_encode %{
10822 __ lock();
10823 __ addb($mem$$Address, $add$$constant);
10824 %}
10825 ins_pipe(pipe_cmpxchg);
10826 %}
10827
10828 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10829 predicate(!n->as_LoadStore()->result_not_used());
10830 match(Set newval (GetAndAddB mem newval));
10831 effect(KILL cr);
10832 format %{ "xaddb_lock $mem, $newval" %}
10833 ins_encode %{
10834 __ lock();
10835 __ xaddb($mem$$Address, $newval$$Register);
10836 %}
10837 ins_pipe(pipe_cmpxchg);
10838 %}
10839
10840 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10841 predicate(n->as_LoadStore()->result_not_used());
10842 match(Set dummy (GetAndAddS mem add));
10843 effect(KILL cr);
10844 format %{ "addw_lock $mem, $add" %}
10845 ins_encode %{
10846 __ lock();
10847 __ addw($mem$$Address, $add$$Register);
10848 %}
10849 ins_pipe(pipe_cmpxchg);
10850 %}
10851
10852 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10853 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10854 match(Set dummy (GetAndAddS mem add));
10855 effect(KILL cr);
10856 format %{ "addw_lock $mem, $add" %}
10857 ins_encode %{
10858 __ lock();
10859 __ addw($mem$$Address, $add$$constant);
10860 %}
10861 ins_pipe(pipe_cmpxchg);
10862 %}
10863
10864 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10865 predicate(!n->as_LoadStore()->result_not_used());
10866 match(Set newval (GetAndAddS mem newval));
10867 effect(KILL cr);
10868 format %{ "xaddw_lock $mem, $newval" %}
10869 ins_encode %{
10870 __ lock();
10871 __ xaddw($mem$$Address, $newval$$Register);
10872 %}
10873 ins_pipe(pipe_cmpxchg);
10874 %}
10875
10876 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10877 predicate(n->as_LoadStore()->result_not_used());
10878 match(Set dummy (GetAndAddI mem add));
10879 effect(KILL cr);
10880 format %{ "addl_lock $mem, $add" %}
10881 ins_encode %{
10882 __ lock();
10883 __ addl($mem$$Address, $add$$Register);
10884 %}
10885 ins_pipe(pipe_cmpxchg);
10886 %}
10887
10888 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10889 predicate(n->as_LoadStore()->result_not_used());
10890 match(Set dummy (GetAndAddI mem add));
10891 effect(KILL cr);
10892 format %{ "addl_lock $mem, $add" %}
10893 ins_encode %{
10894 __ lock();
10895 __ addl($mem$$Address, $add$$constant);
10896 %}
10897 ins_pipe(pipe_cmpxchg);
10898 %}
10899
10900 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10901 predicate(!n->as_LoadStore()->result_not_used());
10902 match(Set newval (GetAndAddI mem newval));
10903 effect(KILL cr);
10904 format %{ "xaddl_lock $mem, $newval" %}
10905 ins_encode %{
10906 __ lock();
10907 __ xaddl($mem$$Address, $newval$$Register);
10908 %}
10909 ins_pipe(pipe_cmpxchg);
10910 %}
10911
10912 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10913 predicate(n->as_LoadStore()->result_not_used());
10914 match(Set dummy (GetAndAddL mem add));
10915 effect(KILL cr);
10916 format %{ "addq_lock $mem, $add" %}
10917 ins_encode %{
10918 __ lock();
10919 __ addq($mem$$Address, $add$$Register);
10920 %}
10921 ins_pipe(pipe_cmpxchg);
10922 %}
10923
10924 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10925 predicate(n->as_LoadStore()->result_not_used());
10926 match(Set dummy (GetAndAddL mem add));
10927 effect(KILL cr);
10928 format %{ "addq_lock $mem, $add" %}
10929 ins_encode %{
10930 __ lock();
10931 __ addq($mem$$Address, $add$$constant);
10932 %}
10933 ins_pipe(pipe_cmpxchg);
10934 %}
10935
10936 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10937 predicate(!n->as_LoadStore()->result_not_used());
10938 match(Set newval (GetAndAddL mem newval));
10939 effect(KILL cr);
10940 format %{ "xaddq_lock $mem, $newval" %}
10941 ins_encode %{
10942 __ lock();
10943 __ xaddq($mem$$Address, $newval$$Register);
10944 %}
10945 ins_pipe(pipe_cmpxchg);
10946 %}
10947
10948 instruct xchgB( memory mem, rRegI newval) %{
10949 match(Set newval (GetAndSetB mem newval));
10950 format %{ "XCHGB $newval,[$mem]" %}
10951 ins_encode %{
10952 __ xchgb($newval$$Register, $mem$$Address);
10953 %}
10954 ins_pipe( pipe_cmpxchg );
10955 %}
10956
10957 instruct xchgS( memory mem, rRegI newval) %{
10958 match(Set newval (GetAndSetS mem newval));
10959 format %{ "XCHGW $newval,[$mem]" %}
10960 ins_encode %{
10961 __ xchgw($newval$$Register, $mem$$Address);
10962 %}
10963 ins_pipe( pipe_cmpxchg );
10964 %}
10965
10966 instruct xchgI( memory mem, rRegI newval) %{
10967 match(Set newval (GetAndSetI mem newval));
10968 format %{ "XCHGL $newval,[$mem]" %}
10969 ins_encode %{
10970 __ xchgl($newval$$Register, $mem$$Address);
10971 %}
10972 ins_pipe( pipe_cmpxchg );
10973 %}
10974
10975 instruct xchgL( memory mem, rRegL newval) %{
10976 match(Set newval (GetAndSetL mem newval));
10977 format %{ "XCHGL $newval,[$mem]" %}
10978 ins_encode %{
10979 __ xchgq($newval$$Register, $mem$$Address);
10980 %}
10981 ins_pipe( pipe_cmpxchg );
10982 %}
10983
10984 instruct xchgP( memory mem, rRegP newval) %{
10985 match(Set newval (GetAndSetP mem newval));
10986 predicate(n->as_LoadStore()->barrier_data() == 0);
10987 format %{ "XCHGQ $newval,[$mem]" %}
10988 ins_encode %{
10989 __ xchgq($newval$$Register, $mem$$Address);
10990 %}
10991 ins_pipe( pipe_cmpxchg );
10992 %}
10993
10994 instruct xchgN( memory mem, rRegN newval) %{
10995 predicate(n->as_LoadStore()->barrier_data() == 0);
10996 match(Set newval (GetAndSetN mem newval));
10997 format %{ "XCHGL $newval,$mem]" %}
10998 ins_encode %{
10999 __ xchgl($newval$$Register, $mem$$Address);
11000 %}
11001 ins_pipe( pipe_cmpxchg );
11002 %}
11003
11004 //----------Abs Instructions-------------------------------------------
11005
11006 // Integer Absolute Instructions
11007 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11008 %{
11009 match(Set dst (AbsI src));
11010 effect(TEMP dst, KILL cr);
11011 format %{ "xorl $dst, $dst\t# abs int\n\t"
11012 "subl $dst, $src\n\t"
11013 "cmovll $dst, $src" %}
11014 ins_encode %{
11015 __ xorl($dst$$Register, $dst$$Register);
11016 __ subl($dst$$Register, $src$$Register);
11017 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11018 %}
11019
11020 ins_pipe(ialu_reg_reg);
11021 %}
11022
11023 // Long Absolute Instructions
11024 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11025 %{
11026 match(Set dst (AbsL src));
11027 effect(TEMP dst, KILL cr);
11028 format %{ "xorl $dst, $dst\t# abs long\n\t"
11029 "subq $dst, $src\n\t"
11030 "cmovlq $dst, $src" %}
11031 ins_encode %{
11032 __ xorl($dst$$Register, $dst$$Register);
11033 __ subq($dst$$Register, $src$$Register);
11034 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11035 %}
11036
11037 ins_pipe(ialu_reg_reg);
11038 %}
11039
11040 //----------Subtraction Instructions-------------------------------------------
11041
11042 // Integer Subtraction Instructions
11043 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11044 %{
11045 predicate(!UseAPX);
11046 match(Set dst (SubI dst src));
11047 effect(KILL cr);
11048 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11049
11050 format %{ "subl $dst, $src\t# int" %}
11051 ins_encode %{
11052 __ subl($dst$$Register, $src$$Register);
11053 %}
11054 ins_pipe(ialu_reg_reg);
11055 %}
11056
11057 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11058 %{
11059 predicate(UseAPX);
11060 match(Set dst (SubI src1 src2));
11061 effect(KILL cr);
11062 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11063
11064 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11065 ins_encode %{
11066 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11067 %}
11068 ins_pipe(ialu_reg_reg);
11069 %}
11070
11071 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11072 %{
11073 predicate(UseAPX);
11074 match(Set dst (SubI src1 src2));
11075 effect(KILL cr);
11076 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11077
11078 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11079 ins_encode %{
11080 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11081 %}
11082 ins_pipe(ialu_reg_reg);
11083 %}
11084
11085 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11086 %{
11087 predicate(UseAPX);
11088 match(Set dst (SubI (LoadI src1) src2));
11089 effect(KILL cr);
11090 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11091
11092 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11093 ins_encode %{
11094 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11095 %}
11096 ins_pipe(ialu_reg_reg);
11097 %}
11098
11099 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11100 %{
11101 predicate(!UseAPX);
11102 match(Set dst (SubI dst (LoadI src)));
11103 effect(KILL cr);
11104 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11105
11106 ins_cost(150);
11107 format %{ "subl $dst, $src\t# int" %}
11108 ins_encode %{
11109 __ subl($dst$$Register, $src$$Address);
11110 %}
11111 ins_pipe(ialu_reg_mem);
11112 %}
11113
11114 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11115 %{
11116 predicate(UseAPX);
11117 match(Set dst (SubI src1 (LoadI src2)));
11118 effect(KILL cr);
11119 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11120
11121 ins_cost(150);
11122 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11123 ins_encode %{
11124 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11125 %}
11126 ins_pipe(ialu_reg_mem);
11127 %}
11128
11129 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11130 %{
11131 predicate(UseAPX);
11132 match(Set dst (SubI (LoadI src1) src2));
11133 effect(KILL cr);
11134 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11135
11136 ins_cost(150);
11137 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11138 ins_encode %{
11139 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11140 %}
11141 ins_pipe(ialu_reg_mem);
11142 %}
11143
11144 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11145 %{
11146 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11147 effect(KILL cr);
11148 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11149
11150 ins_cost(150);
11151 format %{ "subl $dst, $src\t# int" %}
11152 ins_encode %{
11153 __ subl($dst$$Address, $src$$Register);
11154 %}
11155 ins_pipe(ialu_mem_reg);
11156 %}
11157
11158 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11159 %{
11160 predicate(!UseAPX);
11161 match(Set dst (SubL dst src));
11162 effect(KILL cr);
11163 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11164
11165 format %{ "subq $dst, $src\t# long" %}
11166 ins_encode %{
11167 __ subq($dst$$Register, $src$$Register);
11168 %}
11169 ins_pipe(ialu_reg_reg);
11170 %}
11171
11172 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11173 %{
11174 predicate(UseAPX);
11175 match(Set dst (SubL src1 src2));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11178
11179 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11180 ins_encode %{
11181 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11182 %}
11183 ins_pipe(ialu_reg_reg);
11184 %}
11185
11186 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11187 %{
11188 predicate(UseAPX);
11189 match(Set dst (SubL src1 src2));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11192
11193 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11194 ins_encode %{
11195 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11196 %}
11197 ins_pipe(ialu_reg_reg);
11198 %}
11199
11200 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11201 %{
11202 predicate(UseAPX);
11203 match(Set dst (SubL (LoadL src1) src2));
11204 effect(KILL cr);
11205 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11206
11207 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11208 ins_encode %{
11209 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11210 %}
11211 ins_pipe(ialu_reg_reg);
11212 %}
11213
11214 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11215 %{
11216 predicate(!UseAPX);
11217 match(Set dst (SubL dst (LoadL src)));
11218 effect(KILL cr);
11219 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11220
11221 ins_cost(150);
11222 format %{ "subq $dst, $src\t# long" %}
11223 ins_encode %{
11224 __ subq($dst$$Register, $src$$Address);
11225 %}
11226 ins_pipe(ialu_reg_mem);
11227 %}
11228
11229 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11230 %{
11231 predicate(UseAPX);
11232 match(Set dst (SubL src1 (LoadL src2)));
11233 effect(KILL cr);
11234 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11235
11236 ins_cost(150);
11237 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11238 ins_encode %{
11239 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11240 %}
11241 ins_pipe(ialu_reg_mem);
11242 %}
11243
11244 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11245 %{
11246 predicate(UseAPX);
11247 match(Set dst (SubL (LoadL src1) src2));
11248 effect(KILL cr);
11249 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11250
11251 ins_cost(150);
11252 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11253 ins_encode %{
11254 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11255 %}
11256 ins_pipe(ialu_reg_mem);
11257 %}
11258
11259 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11260 %{
11261 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11262 effect(KILL cr);
11263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11264
11265 ins_cost(150);
11266 format %{ "subq $dst, $src\t# long" %}
11267 ins_encode %{
11268 __ subq($dst$$Address, $src$$Register);
11269 %}
11270 ins_pipe(ialu_mem_reg);
11271 %}
11272
11273 // Subtract from a pointer
11274 // XXX hmpf???
11275 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11276 %{
11277 match(Set dst (AddP dst (SubI zero src)));
11278 effect(KILL cr);
11279
11280 format %{ "subq $dst, $src\t# ptr - int" %}
11281 ins_encode %{
11282 __ subq($dst$$Register, $src$$Register);
11283 %}
11284 ins_pipe(ialu_reg_reg);
11285 %}
11286
11287 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11288 %{
11289 predicate(!UseAPX);
11290 match(Set dst (SubI zero dst));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11293
11294 format %{ "negl $dst\t# int" %}
11295 ins_encode %{
11296 __ negl($dst$$Register);
11297 %}
11298 ins_pipe(ialu_reg);
11299 %}
11300
11301 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11302 %{
11303 predicate(UseAPX);
11304 match(Set dst (SubI zero src));
11305 effect(KILL cr);
11306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11307
11308 format %{ "enegl $dst, $src\t# int ndd" %}
11309 ins_encode %{
11310 __ enegl($dst$$Register, $src$$Register, false);
11311 %}
11312 ins_pipe(ialu_reg);
11313 %}
11314
11315 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11316 %{
11317 predicate(!UseAPX);
11318 match(Set dst (NegI dst));
11319 effect(KILL cr);
11320 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11321
11322 format %{ "negl $dst\t# int" %}
11323 ins_encode %{
11324 __ negl($dst$$Register);
11325 %}
11326 ins_pipe(ialu_reg);
11327 %}
11328
11329 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11330 %{
11331 predicate(UseAPX);
11332 match(Set dst (NegI src));
11333 effect(KILL cr);
11334 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11335
11336 format %{ "enegl $dst, $src\t# int ndd" %}
11337 ins_encode %{
11338 __ enegl($dst$$Register, $src$$Register, false);
11339 %}
11340 ins_pipe(ialu_reg);
11341 %}
11342
11343 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11344 %{
11345 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11346 effect(KILL cr);
11347 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11348
11349 format %{ "negl $dst\t# int" %}
11350 ins_encode %{
11351 __ negl($dst$$Address);
11352 %}
11353 ins_pipe(ialu_reg);
11354 %}
11355
11356 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11357 %{
11358 predicate(!UseAPX);
11359 match(Set dst (SubL zero dst));
11360 effect(KILL cr);
11361 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11362
11363 format %{ "negq $dst\t# long" %}
11364 ins_encode %{
11365 __ negq($dst$$Register);
11366 %}
11367 ins_pipe(ialu_reg);
11368 %}
11369
11370 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11371 %{
11372 predicate(UseAPX);
11373 match(Set dst (SubL zero src));
11374 effect(KILL cr);
11375 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11376
11377 format %{ "enegq $dst, $src\t# long ndd" %}
11378 ins_encode %{
11379 __ enegq($dst$$Register, $src$$Register, false);
11380 %}
11381 ins_pipe(ialu_reg);
11382 %}
11383
11384 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11385 %{
11386 predicate(!UseAPX);
11387 match(Set dst (NegL dst));
11388 effect(KILL cr);
11389 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11390
11391 format %{ "negq $dst\t# int" %}
11392 ins_encode %{
11393 __ negq($dst$$Register);
11394 %}
11395 ins_pipe(ialu_reg);
11396 %}
11397
11398 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11399 %{
11400 predicate(UseAPX);
11401 match(Set dst (NegL src));
11402 effect(KILL cr);
11403 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11404
11405 format %{ "enegq $dst, $src\t# long ndd" %}
11406 ins_encode %{
11407 __ enegq($dst$$Register, $src$$Register, false);
11408 %}
11409 ins_pipe(ialu_reg);
11410 %}
11411
11412 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11413 %{
11414 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11415 effect(KILL cr);
11416 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11417
11418 format %{ "negq $dst\t# long" %}
11419 ins_encode %{
11420 __ negq($dst$$Address);
11421 %}
11422 ins_pipe(ialu_reg);
11423 %}
11424
11425 //----------Multiplication/Division Instructions-------------------------------
11426 // Integer Multiplication Instructions
11427 // Multiply Register
11428
11429 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11430 %{
11431 predicate(!UseAPX);
11432 match(Set dst (MulI dst src));
11433 effect(KILL cr);
11434
11435 ins_cost(300);
11436 format %{ "imull $dst, $src\t# int" %}
11437 ins_encode %{
11438 __ imull($dst$$Register, $src$$Register);
11439 %}
11440 ins_pipe(ialu_reg_reg_alu0);
11441 %}
11442
11443 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11444 %{
11445 predicate(UseAPX);
11446 match(Set dst (MulI src1 src2));
11447 effect(KILL cr);
11448 flag(PD::Flag_ndd_demotable_commutative);
11449
11450 ins_cost(300);
11451 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11452 ins_encode %{
11453 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11454 %}
11455 ins_pipe(ialu_reg_reg_alu0);
11456 %}
11457
11458 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11459 %{
11460 match(Set dst (MulI src imm));
11461 effect(KILL cr);
11462
11463 ins_cost(300);
11464 format %{ "imull $dst, $src, $imm\t# int" %}
11465 ins_encode %{
11466 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11467 %}
11468 ins_pipe(ialu_reg_reg_alu0);
11469 %}
11470
11471 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11472 %{
11473 predicate(!UseAPX);
11474 match(Set dst (MulI dst (LoadI src)));
11475 effect(KILL cr);
11476
11477 ins_cost(350);
11478 format %{ "imull $dst, $src\t# int" %}
11479 ins_encode %{
11480 __ imull($dst$$Register, $src$$Address);
11481 %}
11482 ins_pipe(ialu_reg_mem_alu0);
11483 %}
11484
11485 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11486 %{
11487 predicate(UseAPX);
11488 match(Set dst (MulI src1 (LoadI src2)));
11489 effect(KILL cr);
11490 flag(PD::Flag_ndd_demotable);
11491
11492 ins_cost(350);
11493 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11494 ins_encode %{
11495 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11496 %}
11497 ins_pipe(ialu_reg_mem_alu0);
11498 %}
11499
11500 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11501 %{
11502 match(Set dst (MulI (LoadI src) imm));
11503 effect(KILL cr);
11504
11505 ins_cost(300);
11506 format %{ "imull $dst, $src, $imm\t# int" %}
11507 ins_encode %{
11508 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11509 %}
11510 ins_pipe(ialu_reg_mem_alu0);
11511 %}
11512
11513 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11514 %{
11515 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11516 effect(KILL cr, KILL src2);
11517
11518 expand %{ mulI_rReg(dst, src1, cr);
11519 mulI_rReg(src2, src3, cr);
11520 addI_rReg(dst, src2, cr); %}
11521 %}
11522
11523 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11524 %{
11525 predicate(!UseAPX);
11526 match(Set dst (MulL dst src));
11527 effect(KILL cr);
11528
11529 ins_cost(300);
11530 format %{ "imulq $dst, $src\t# long" %}
11531 ins_encode %{
11532 __ imulq($dst$$Register, $src$$Register);
11533 %}
11534 ins_pipe(ialu_reg_reg_alu0);
11535 %}
11536
11537 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11538 %{
11539 predicate(UseAPX);
11540 match(Set dst (MulL src1 src2));
11541 effect(KILL cr);
11542 flag(PD::Flag_ndd_demotable_commutative);
11543
11544 ins_cost(300);
11545 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11546 ins_encode %{
11547 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11548 %}
11549 ins_pipe(ialu_reg_reg_alu0);
11550 %}
11551
11552 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11553 %{
11554 match(Set dst (MulL src imm));
11555 effect(KILL cr);
11556
11557 ins_cost(300);
11558 format %{ "imulq $dst, $src, $imm\t# long" %}
11559 ins_encode %{
11560 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11561 %}
11562 ins_pipe(ialu_reg_reg_alu0);
11563 %}
11564
11565 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11566 %{
11567 predicate(!UseAPX);
11568 match(Set dst (MulL dst (LoadL src)));
11569 effect(KILL cr);
11570
11571 ins_cost(350);
11572 format %{ "imulq $dst, $src\t# long" %}
11573 ins_encode %{
11574 __ imulq($dst$$Register, $src$$Address);
11575 %}
11576 ins_pipe(ialu_reg_mem_alu0);
11577 %}
11578
11579 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11580 %{
11581 predicate(UseAPX);
11582 match(Set dst (MulL src1 (LoadL src2)));
11583 effect(KILL cr);
11584 flag(PD::Flag_ndd_demotable_commutative);
11585
11586 ins_cost(350);
11587 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11588 ins_encode %{
11589 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11590 %}
11591 ins_pipe(ialu_reg_mem_alu0);
11592 %}
11593
11594 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11595 %{
11596 match(Set dst (MulL (LoadL src) imm));
11597 effect(KILL cr);
11598
11599 ins_cost(300);
11600 format %{ "imulq $dst, $src, $imm\t# long" %}
11601 ins_encode %{
11602 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11603 %}
11604 ins_pipe(ialu_reg_mem_alu0);
11605 %}
11606
11607 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11608 %{
11609 match(Set dst (MulHiL src rax));
11610 effect(USE_KILL rax, KILL cr);
11611
11612 ins_cost(300);
11613 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11614 ins_encode %{
11615 __ imulq($src$$Register);
11616 %}
11617 ins_pipe(ialu_reg_reg_alu0);
11618 %}
11619
11620 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11621 %{
11622 match(Set dst (UMulHiL src rax));
11623 effect(USE_KILL rax, KILL cr);
11624
11625 ins_cost(300);
11626 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11627 ins_encode %{
11628 __ mulq($src$$Register);
11629 %}
11630 ins_pipe(ialu_reg_reg_alu0);
11631 %}
11632
11633 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11634 rFlagsReg cr)
11635 %{
11636 match(Set rax (DivI rax div));
11637 effect(KILL rdx, KILL cr);
11638
11639 ins_cost(30*100+10*100); // XXX
11640 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11641 "jne,s normal\n\t"
11642 "xorl rdx, rdx\n\t"
11643 "cmpl $div, -1\n\t"
11644 "je,s done\n"
11645 "normal: cdql\n\t"
11646 "idivl $div\n"
11647 "done:" %}
11648 ins_encode(cdql_enc(div));
11649 ins_pipe(ialu_reg_reg_alu0);
11650 %}
11651
11652 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11653 rFlagsReg cr)
11654 %{
11655 match(Set rax (DivL rax div));
11656 effect(KILL rdx, KILL cr);
11657
11658 ins_cost(30*100+10*100); // XXX
11659 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11660 "cmpq rax, rdx\n\t"
11661 "jne,s normal\n\t"
11662 "xorl rdx, rdx\n\t"
11663 "cmpq $div, -1\n\t"
11664 "je,s done\n"
11665 "normal: cdqq\n\t"
11666 "idivq $div\n"
11667 "done:" %}
11668 ins_encode(cdqq_enc(div));
11669 ins_pipe(ialu_reg_reg_alu0);
11670 %}
11671
11672 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11673 %{
11674 match(Set rax (UDivI rax div));
11675 effect(KILL rdx, KILL cr);
11676
11677 ins_cost(300);
11678 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11679 ins_encode %{
11680 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11681 %}
11682 ins_pipe(ialu_reg_reg_alu0);
11683 %}
11684
11685 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11686 %{
11687 match(Set rax (UDivL rax div));
11688 effect(KILL rdx, KILL cr);
11689
11690 ins_cost(300);
11691 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11692 ins_encode %{
11693 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11694 %}
11695 ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697
11698 // Integer DIVMOD with Register, both quotient and mod results
11699 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11700 rFlagsReg cr)
11701 %{
11702 match(DivModI rax div);
11703 effect(KILL cr);
11704
11705 ins_cost(30*100+10*100); // XXX
11706 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11707 "jne,s normal\n\t"
11708 "xorl rdx, rdx\n\t"
11709 "cmpl $div, -1\n\t"
11710 "je,s done\n"
11711 "normal: cdql\n\t"
11712 "idivl $div\n"
11713 "done:" %}
11714 ins_encode(cdql_enc(div));
11715 ins_pipe(pipe_slow);
11716 %}
11717
11718 // Long DIVMOD with Register, both quotient and mod results
11719 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11720 rFlagsReg cr)
11721 %{
11722 match(DivModL rax div);
11723 effect(KILL cr);
11724
11725 ins_cost(30*100+10*100); // XXX
11726 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11727 "cmpq rax, rdx\n\t"
11728 "jne,s normal\n\t"
11729 "xorl rdx, rdx\n\t"
11730 "cmpq $div, -1\n\t"
11731 "je,s done\n"
11732 "normal: cdqq\n\t"
11733 "idivq $div\n"
11734 "done:" %}
11735 ins_encode(cdqq_enc(div));
11736 ins_pipe(pipe_slow);
11737 %}
11738
11739 // Unsigned integer DIVMOD with Register, both quotient and mod results
11740 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11741 no_rax_rdx_RegI div, rFlagsReg cr)
11742 %{
11743 match(UDivModI rax div);
11744 effect(TEMP tmp, KILL cr);
11745
11746 ins_cost(300);
11747 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11748 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11749 %}
11750 ins_encode %{
11751 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11752 %}
11753 ins_pipe(pipe_slow);
11754 %}
11755
11756 // Unsigned long DIVMOD with Register, both quotient and mod results
11757 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11758 no_rax_rdx_RegL div, rFlagsReg cr)
11759 %{
11760 match(UDivModL rax div);
11761 effect(TEMP tmp, KILL cr);
11762
11763 ins_cost(300);
11764 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11765 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11766 %}
11767 ins_encode %{
11768 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11769 %}
11770 ins_pipe(pipe_slow);
11771 %}
11772
11773 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11774 rFlagsReg cr)
11775 %{
11776 match(Set rdx (ModI rax div));
11777 effect(KILL rax, KILL cr);
11778
11779 ins_cost(300); // XXX
11780 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11781 "jne,s normal\n\t"
11782 "xorl rdx, rdx\n\t"
11783 "cmpl $div, -1\n\t"
11784 "je,s done\n"
11785 "normal: cdql\n\t"
11786 "idivl $div\n"
11787 "done:" %}
11788 ins_encode(cdql_enc(div));
11789 ins_pipe(ialu_reg_reg_alu0);
11790 %}
11791
11792 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11793 rFlagsReg cr)
11794 %{
11795 match(Set rdx (ModL rax div));
11796 effect(KILL rax, KILL cr);
11797
11798 ins_cost(300); // XXX
11799 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11800 "cmpq rax, rdx\n\t"
11801 "jne,s normal\n\t"
11802 "xorl rdx, rdx\n\t"
11803 "cmpq $div, -1\n\t"
11804 "je,s done\n"
11805 "normal: cdqq\n\t"
11806 "idivq $div\n"
11807 "done:" %}
11808 ins_encode(cdqq_enc(div));
11809 ins_pipe(ialu_reg_reg_alu0);
11810 %}
11811
11812 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11813 %{
11814 match(Set rdx (UModI rax div));
11815 effect(KILL rax, KILL cr);
11816
11817 ins_cost(300);
11818 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11819 ins_encode %{
11820 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11821 %}
11822 ins_pipe(ialu_reg_reg_alu0);
11823 %}
11824
11825 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11826 %{
11827 match(Set rdx (UModL rax div));
11828 effect(KILL rax, KILL cr);
11829
11830 ins_cost(300);
11831 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11832 ins_encode %{
11833 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11834 %}
11835 ins_pipe(ialu_reg_reg_alu0);
11836 %}
11837
11838 // Integer Shift Instructions
11839 // Shift Left by one, two, three
11840 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11841 %{
11842 predicate(!UseAPX);
11843 match(Set dst (LShiftI dst shift));
11844 effect(KILL cr);
11845
11846 format %{ "sall $dst, $shift" %}
11847 ins_encode %{
11848 __ sall($dst$$Register, $shift$$constant);
11849 %}
11850 ins_pipe(ialu_reg);
11851 %}
11852
11853 // Shift Left by one, two, three
11854 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11855 %{
11856 predicate(UseAPX);
11857 match(Set dst (LShiftI src shift));
11858 effect(KILL cr);
11859 flag(PD::Flag_ndd_demotable);
11860
11861 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11862 ins_encode %{
11863 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11864 %}
11865 ins_pipe(ialu_reg);
11866 %}
11867
11868 // Shift Left by 8-bit immediate
11869 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11870 %{
11871 predicate(!UseAPX);
11872 match(Set dst (LShiftI dst shift));
11873 effect(KILL cr);
11874
11875 format %{ "sall $dst, $shift" %}
11876 ins_encode %{
11877 __ sall($dst$$Register, $shift$$constant);
11878 %}
11879 ins_pipe(ialu_reg);
11880 %}
11881
11882 // Shift Left by 8-bit immediate
11883 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11884 %{
11885 predicate(UseAPX);
11886 match(Set dst (LShiftI src shift));
11887 effect(KILL cr);
11888 flag(PD::Flag_ndd_demotable);
11889
11890 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11891 ins_encode %{
11892 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11893 %}
11894 ins_pipe(ialu_reg);
11895 %}
11896
11897 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11898 %{
11899 predicate(UseAPX);
11900 match(Set dst (LShiftI (LoadI src) shift));
11901 effect(KILL cr);
11902
11903 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11904 ins_encode %{
11905 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11906 %}
11907 ins_pipe(ialu_reg);
11908 %}
11909
11910 // Shift Left by 8-bit immediate
11911 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11912 %{
11913 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11914 effect(KILL cr);
11915
11916 format %{ "sall $dst, $shift" %}
11917 ins_encode %{
11918 __ sall($dst$$Address, $shift$$constant);
11919 %}
11920 ins_pipe(ialu_mem_imm);
11921 %}
11922
11923 // Shift Left by variable
11924 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11925 %{
11926 predicate(!VM_Version::supports_bmi2());
11927 match(Set dst (LShiftI dst shift));
11928 effect(KILL cr);
11929
11930 format %{ "sall $dst, $shift" %}
11931 ins_encode %{
11932 __ sall($dst$$Register);
11933 %}
11934 ins_pipe(ialu_reg_reg);
11935 %}
11936
11937 // Shift Left by variable
11938 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11939 %{
11940 predicate(!VM_Version::supports_bmi2());
11941 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11942 effect(KILL cr);
11943
11944 format %{ "sall $dst, $shift" %}
11945 ins_encode %{
11946 __ sall($dst$$Address);
11947 %}
11948 ins_pipe(ialu_mem_reg);
11949 %}
11950
11951 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11952 %{
11953 predicate(VM_Version::supports_bmi2());
11954 match(Set dst (LShiftI src shift));
11955
11956 format %{ "shlxl $dst, $src, $shift" %}
11957 ins_encode %{
11958 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11959 %}
11960 ins_pipe(ialu_reg_reg);
11961 %}
11962
11963 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11964 %{
11965 predicate(VM_Version::supports_bmi2());
11966 match(Set dst (LShiftI (LoadI src) shift));
11967 ins_cost(175);
11968 format %{ "shlxl $dst, $src, $shift" %}
11969 ins_encode %{
11970 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11971 %}
11972 ins_pipe(ialu_reg_mem);
11973 %}
11974
11975 // Arithmetic Shift Right by 8-bit immediate
11976 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11977 %{
11978 predicate(!UseAPX);
11979 match(Set dst (RShiftI dst shift));
11980 effect(KILL cr);
11981
11982 format %{ "sarl $dst, $shift" %}
11983 ins_encode %{
11984 __ sarl($dst$$Register, $shift$$constant);
11985 %}
11986 ins_pipe(ialu_mem_imm);
11987 %}
11988
11989 // Arithmetic Shift Right by 8-bit immediate
11990 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11991 %{
11992 predicate(UseAPX);
11993 match(Set dst (RShiftI src shift));
11994 effect(KILL cr);
11995 flag(PD::Flag_ndd_demotable);
11996
11997 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11998 ins_encode %{
11999 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12000 %}
12001 ins_pipe(ialu_mem_imm);
12002 %}
12003
12004 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12005 %{
12006 predicate(UseAPX);
12007 match(Set dst (RShiftI (LoadI src) shift));
12008 effect(KILL cr);
12009
12010 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12011 ins_encode %{
12012 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12013 %}
12014 ins_pipe(ialu_mem_imm);
12015 %}
12016
12017 // Arithmetic Shift Right by 8-bit immediate
12018 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12019 %{
12020 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12021 effect(KILL cr);
12022
12023 format %{ "sarl $dst, $shift" %}
12024 ins_encode %{
12025 __ sarl($dst$$Address, $shift$$constant);
12026 %}
12027 ins_pipe(ialu_mem_imm);
12028 %}
12029
12030 // Arithmetic Shift Right by variable
12031 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12032 %{
12033 predicate(!VM_Version::supports_bmi2());
12034 match(Set dst (RShiftI dst shift));
12035 effect(KILL cr);
12036
12037 format %{ "sarl $dst, $shift" %}
12038 ins_encode %{
12039 __ sarl($dst$$Register);
12040 %}
12041 ins_pipe(ialu_reg_reg);
12042 %}
12043
12044 // Arithmetic Shift Right by variable
12045 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12046 %{
12047 predicate(!VM_Version::supports_bmi2());
12048 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12049 effect(KILL cr);
12050
12051 format %{ "sarl $dst, $shift" %}
12052 ins_encode %{
12053 __ sarl($dst$$Address);
12054 %}
12055 ins_pipe(ialu_mem_reg);
12056 %}
12057
12058 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12059 %{
12060 predicate(VM_Version::supports_bmi2());
12061 match(Set dst (RShiftI src shift));
12062
12063 format %{ "sarxl $dst, $src, $shift" %}
12064 ins_encode %{
12065 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12066 %}
12067 ins_pipe(ialu_reg_reg);
12068 %}
12069
12070 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12071 %{
12072 predicate(VM_Version::supports_bmi2());
12073 match(Set dst (RShiftI (LoadI src) shift));
12074 ins_cost(175);
12075 format %{ "sarxl $dst, $src, $shift" %}
12076 ins_encode %{
12077 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12078 %}
12079 ins_pipe(ialu_reg_mem);
12080 %}
12081
12082 // Logical Shift Right by 8-bit immediate
12083 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12084 %{
12085 predicate(!UseAPX);
12086 match(Set dst (URShiftI dst shift));
12087 effect(KILL cr);
12088
12089 format %{ "shrl $dst, $shift" %}
12090 ins_encode %{
12091 __ shrl($dst$$Register, $shift$$constant);
12092 %}
12093 ins_pipe(ialu_reg);
12094 %}
12095
12096 // Logical Shift Right by 8-bit immediate
12097 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12098 %{
12099 predicate(UseAPX);
12100 match(Set dst (URShiftI src shift));
12101 effect(KILL cr);
12102 flag(PD::Flag_ndd_demotable);
12103
12104 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12105 ins_encode %{
12106 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12107 %}
12108 ins_pipe(ialu_reg);
12109 %}
12110
12111 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12112 %{
12113 predicate(UseAPX);
12114 match(Set dst (URShiftI (LoadI src) shift));
12115 effect(KILL cr);
12116
12117 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12118 ins_encode %{
12119 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12120 %}
12121 ins_pipe(ialu_reg);
12122 %}
12123
12124 // Logical Shift Right by 8-bit immediate
12125 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12126 %{
12127 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12128 effect(KILL cr);
12129
12130 format %{ "shrl $dst, $shift" %}
12131 ins_encode %{
12132 __ shrl($dst$$Address, $shift$$constant);
12133 %}
12134 ins_pipe(ialu_mem_imm);
12135 %}
12136
12137 // Logical Shift Right by variable
12138 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12139 %{
12140 predicate(!VM_Version::supports_bmi2());
12141 match(Set dst (URShiftI dst shift));
12142 effect(KILL cr);
12143
12144 format %{ "shrl $dst, $shift" %}
12145 ins_encode %{
12146 __ shrl($dst$$Register);
12147 %}
12148 ins_pipe(ialu_reg_reg);
12149 %}
12150
12151 // Logical Shift Right by variable
12152 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12153 %{
12154 predicate(!VM_Version::supports_bmi2());
12155 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12156 effect(KILL cr);
12157
12158 format %{ "shrl $dst, $shift" %}
12159 ins_encode %{
12160 __ shrl($dst$$Address);
12161 %}
12162 ins_pipe(ialu_mem_reg);
12163 %}
12164
12165 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12166 %{
12167 predicate(VM_Version::supports_bmi2());
12168 match(Set dst (URShiftI src shift));
12169
12170 format %{ "shrxl $dst, $src, $shift" %}
12171 ins_encode %{
12172 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12173 %}
12174 ins_pipe(ialu_reg_reg);
12175 %}
12176
12177 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12178 %{
12179 predicate(VM_Version::supports_bmi2());
12180 match(Set dst (URShiftI (LoadI src) shift));
12181 ins_cost(175);
12182 format %{ "shrxl $dst, $src, $shift" %}
12183 ins_encode %{
12184 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12185 %}
12186 ins_pipe(ialu_reg_mem);
12187 %}
12188
12189 // Long Shift Instructions
12190 // Shift Left by one, two, three
12191 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12192 %{
12193 predicate(!UseAPX);
12194 match(Set dst (LShiftL dst shift));
12195 effect(KILL cr);
12196
12197 format %{ "salq $dst, $shift" %}
12198 ins_encode %{
12199 __ salq($dst$$Register, $shift$$constant);
12200 %}
12201 ins_pipe(ialu_reg);
12202 %}
12203
12204 // Shift Left by one, two, three
12205 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12206 %{
12207 predicate(UseAPX);
12208 match(Set dst (LShiftL src shift));
12209 effect(KILL cr);
12210 flag(PD::Flag_ndd_demotable);
12211
12212 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12213 ins_encode %{
12214 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12215 %}
12216 ins_pipe(ialu_reg);
12217 %}
12218
12219 // Shift Left by 8-bit immediate
12220 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12221 %{
12222 predicate(!UseAPX);
12223 match(Set dst (LShiftL dst shift));
12224 effect(KILL cr);
12225
12226 format %{ "salq $dst, $shift" %}
12227 ins_encode %{
12228 __ salq($dst$$Register, $shift$$constant);
12229 %}
12230 ins_pipe(ialu_reg);
12231 %}
12232
12233 // Shift Left by 8-bit immediate
12234 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12235 %{
12236 predicate(UseAPX);
12237 match(Set dst (LShiftL src shift));
12238 effect(KILL cr);
12239 flag(PD::Flag_ndd_demotable);
12240
12241 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12242 ins_encode %{
12243 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12244 %}
12245 ins_pipe(ialu_reg);
12246 %}
12247
12248 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12249 %{
12250 predicate(UseAPX);
12251 match(Set dst (LShiftL (LoadL src) shift));
12252 effect(KILL cr);
12253
12254 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12255 ins_encode %{
12256 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12257 %}
12258 ins_pipe(ialu_reg);
12259 %}
12260
12261 // Shift Left by 8-bit immediate
12262 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12263 %{
12264 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12265 effect(KILL cr);
12266
12267 format %{ "salq $dst, $shift" %}
12268 ins_encode %{
12269 __ salq($dst$$Address, $shift$$constant);
12270 %}
12271 ins_pipe(ialu_mem_imm);
12272 %}
12273
12274 // Shift Left by variable
12275 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12276 %{
12277 predicate(!VM_Version::supports_bmi2());
12278 match(Set dst (LShiftL dst shift));
12279 effect(KILL cr);
12280
12281 format %{ "salq $dst, $shift" %}
12282 ins_encode %{
12283 __ salq($dst$$Register);
12284 %}
12285 ins_pipe(ialu_reg_reg);
12286 %}
12287
12288 // Shift Left by variable
12289 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12290 %{
12291 predicate(!VM_Version::supports_bmi2());
12292 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12293 effect(KILL cr);
12294
12295 format %{ "salq $dst, $shift" %}
12296 ins_encode %{
12297 __ salq($dst$$Address);
12298 %}
12299 ins_pipe(ialu_mem_reg);
12300 %}
12301
12302 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12303 %{
12304 predicate(VM_Version::supports_bmi2());
12305 match(Set dst (LShiftL src shift));
12306
12307 format %{ "shlxq $dst, $src, $shift" %}
12308 ins_encode %{
12309 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12310 %}
12311 ins_pipe(ialu_reg_reg);
12312 %}
12313
12314 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12315 %{
12316 predicate(VM_Version::supports_bmi2());
12317 match(Set dst (LShiftL (LoadL src) shift));
12318 ins_cost(175);
12319 format %{ "shlxq $dst, $src, $shift" %}
12320 ins_encode %{
12321 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12322 %}
12323 ins_pipe(ialu_reg_mem);
12324 %}
12325
12326 // Arithmetic Shift Right by 8-bit immediate
12327 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12328 %{
12329 predicate(!UseAPX);
12330 match(Set dst (RShiftL dst shift));
12331 effect(KILL cr);
12332
12333 format %{ "sarq $dst, $shift" %}
12334 ins_encode %{
12335 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12336 %}
12337 ins_pipe(ialu_mem_imm);
12338 %}
12339
12340 // Arithmetic Shift Right by 8-bit immediate
12341 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12342 %{
12343 predicate(UseAPX);
12344 match(Set dst (RShiftL src shift));
12345 effect(KILL cr);
12346 flag(PD::Flag_ndd_demotable);
12347
12348 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12349 ins_encode %{
12350 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12351 %}
12352 ins_pipe(ialu_mem_imm);
12353 %}
12354
12355 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12356 %{
12357 predicate(UseAPX);
12358 match(Set dst (RShiftL (LoadL src) shift));
12359 effect(KILL cr);
12360
12361 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12362 ins_encode %{
12363 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12364 %}
12365 ins_pipe(ialu_mem_imm);
12366 %}
12367
12368 // Arithmetic Shift Right by 8-bit immediate
12369 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12370 %{
12371 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12372 effect(KILL cr);
12373
12374 format %{ "sarq $dst, $shift" %}
12375 ins_encode %{
12376 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12377 %}
12378 ins_pipe(ialu_mem_imm);
12379 %}
12380
12381 // Arithmetic Shift Right by variable
12382 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12383 %{
12384 predicate(!VM_Version::supports_bmi2());
12385 match(Set dst (RShiftL dst shift));
12386 effect(KILL cr);
12387
12388 format %{ "sarq $dst, $shift" %}
12389 ins_encode %{
12390 __ sarq($dst$$Register);
12391 %}
12392 ins_pipe(ialu_reg_reg);
12393 %}
12394
12395 // Arithmetic Shift Right by variable
12396 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12397 %{
12398 predicate(!VM_Version::supports_bmi2());
12399 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12400 effect(KILL cr);
12401
12402 format %{ "sarq $dst, $shift" %}
12403 ins_encode %{
12404 __ sarq($dst$$Address);
12405 %}
12406 ins_pipe(ialu_mem_reg);
12407 %}
12408
12409 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12410 %{
12411 predicate(VM_Version::supports_bmi2());
12412 match(Set dst (RShiftL src shift));
12413
12414 format %{ "sarxq $dst, $src, $shift" %}
12415 ins_encode %{
12416 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12417 %}
12418 ins_pipe(ialu_reg_reg);
12419 %}
12420
12421 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12422 %{
12423 predicate(VM_Version::supports_bmi2());
12424 match(Set dst (RShiftL (LoadL src) shift));
12425 ins_cost(175);
12426 format %{ "sarxq $dst, $src, $shift" %}
12427 ins_encode %{
12428 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12429 %}
12430 ins_pipe(ialu_reg_mem);
12431 %}
12432
12433 // Logical Shift Right by 8-bit immediate
12434 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12435 %{
12436 predicate(!UseAPX);
12437 match(Set dst (URShiftL dst shift));
12438 effect(KILL cr);
12439
12440 format %{ "shrq $dst, $shift" %}
12441 ins_encode %{
12442 __ shrq($dst$$Register, $shift$$constant);
12443 %}
12444 ins_pipe(ialu_reg);
12445 %}
12446
12447 // Logical Shift Right by 8-bit immediate
12448 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12449 %{
12450 predicate(UseAPX);
12451 match(Set dst (URShiftL src shift));
12452 effect(KILL cr);
12453 flag(PD::Flag_ndd_demotable);
12454
12455 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12456 ins_encode %{
12457 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12458 %}
12459 ins_pipe(ialu_reg);
12460 %}
12461
12462 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12463 %{
12464 predicate(UseAPX);
12465 match(Set dst (URShiftL (LoadL src) shift));
12466 effect(KILL cr);
12467
12468 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12469 ins_encode %{
12470 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12471 %}
12472 ins_pipe(ialu_reg);
12473 %}
12474
12475 // Logical Shift Right by 8-bit immediate
12476 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12477 %{
12478 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12479 effect(KILL cr);
12480
12481 format %{ "shrq $dst, $shift" %}
12482 ins_encode %{
12483 __ shrq($dst$$Address, $shift$$constant);
12484 %}
12485 ins_pipe(ialu_mem_imm);
12486 %}
12487
12488 // Logical Shift Right by variable
12489 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12490 %{
12491 predicate(!VM_Version::supports_bmi2());
12492 match(Set dst (URShiftL dst shift));
12493 effect(KILL cr);
12494
12495 format %{ "shrq $dst, $shift" %}
12496 ins_encode %{
12497 __ shrq($dst$$Register);
12498 %}
12499 ins_pipe(ialu_reg_reg);
12500 %}
12501
12502 // Logical Shift Right by variable
12503 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12504 %{
12505 predicate(!VM_Version::supports_bmi2());
12506 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12507 effect(KILL cr);
12508
12509 format %{ "shrq $dst, $shift" %}
12510 ins_encode %{
12511 __ shrq($dst$$Address);
12512 %}
12513 ins_pipe(ialu_mem_reg);
12514 %}
12515
12516 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12517 %{
12518 predicate(VM_Version::supports_bmi2());
12519 match(Set dst (URShiftL src shift));
12520
12521 format %{ "shrxq $dst, $src, $shift" %}
12522 ins_encode %{
12523 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12524 %}
12525 ins_pipe(ialu_reg_reg);
12526 %}
12527
12528 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12529 %{
12530 predicate(VM_Version::supports_bmi2());
12531 match(Set dst (URShiftL (LoadL src) shift));
12532 ins_cost(175);
12533 format %{ "shrxq $dst, $src, $shift" %}
12534 ins_encode %{
12535 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12536 %}
12537 ins_pipe(ialu_reg_mem);
12538 %}
12539
12540 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12541 // This idiom is used by the compiler for the i2b bytecode.
12542 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12543 %{
12544 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12545
12546 format %{ "movsbl $dst, $src\t# i2b" %}
12547 ins_encode %{
12548 __ movsbl($dst$$Register, $src$$Register);
12549 %}
12550 ins_pipe(ialu_reg_reg);
12551 %}
12552
12553 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12554 // This idiom is used by the compiler the i2s bytecode.
12555 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12556 %{
12557 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12558
12559 format %{ "movswl $dst, $src\t# i2s" %}
12560 ins_encode %{
12561 __ movswl($dst$$Register, $src$$Register);
12562 %}
12563 ins_pipe(ialu_reg_reg);
12564 %}
12565
12566 // ROL/ROR instructions
12567
12568 // Rotate left by constant.
12569 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12570 %{
12571 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12572 match(Set dst (RotateLeft dst shift));
12573 effect(KILL cr);
12574 format %{ "roll $dst, $shift" %}
12575 ins_encode %{
12576 __ roll($dst$$Register, $shift$$constant);
12577 %}
12578 ins_pipe(ialu_reg);
12579 %}
12580
12581 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12582 %{
12583 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12584 match(Set dst (RotateLeft src shift));
12585 format %{ "rolxl $dst, $src, $shift" %}
12586 ins_encode %{
12587 int shift = 32 - ($shift$$constant & 31);
12588 __ rorxl($dst$$Register, $src$$Register, shift);
12589 %}
12590 ins_pipe(ialu_reg_reg);
12591 %}
12592
12593 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12594 %{
12595 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12596 match(Set dst (RotateLeft (LoadI src) shift));
12597 ins_cost(175);
12598 format %{ "rolxl $dst, $src, $shift" %}
12599 ins_encode %{
12600 int shift = 32 - ($shift$$constant & 31);
12601 __ rorxl($dst$$Register, $src$$Address, shift);
12602 %}
12603 ins_pipe(ialu_reg_mem);
12604 %}
12605
12606 // Rotate Left by variable
12607 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12608 %{
12609 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12610 match(Set dst (RotateLeft dst shift));
12611 effect(KILL cr);
12612 format %{ "roll $dst, $shift" %}
12613 ins_encode %{
12614 __ roll($dst$$Register);
12615 %}
12616 ins_pipe(ialu_reg_reg);
12617 %}
12618
12619 // Rotate Left by variable
12620 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12621 %{
12622 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12623 match(Set dst (RotateLeft src shift));
12624 effect(KILL cr);
12625 flag(PD::Flag_ndd_demotable);
12626
12627 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12628 ins_encode %{
12629 __ eroll($dst$$Register, $src$$Register, false);
12630 %}
12631 ins_pipe(ialu_reg_reg);
12632 %}
12633
12634 // Rotate Right by constant.
12635 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12636 %{
12637 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12638 match(Set dst (RotateRight dst shift));
12639 effect(KILL cr);
12640 format %{ "rorl $dst, $shift" %}
12641 ins_encode %{
12642 __ rorl($dst$$Register, $shift$$constant);
12643 %}
12644 ins_pipe(ialu_reg);
12645 %}
12646
12647 // Rotate Right by constant.
12648 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12649 %{
12650 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12651 match(Set dst (RotateRight src shift));
12652 format %{ "rorxl $dst, $src, $shift" %}
12653 ins_encode %{
12654 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12655 %}
12656 ins_pipe(ialu_reg_reg);
12657 %}
12658
12659 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12660 %{
12661 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12662 match(Set dst (RotateRight (LoadI src) shift));
12663 ins_cost(175);
12664 format %{ "rorxl $dst, $src, $shift" %}
12665 ins_encode %{
12666 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12667 %}
12668 ins_pipe(ialu_reg_mem);
12669 %}
12670
12671 // Rotate Right by variable
12672 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12673 %{
12674 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12675 match(Set dst (RotateRight dst shift));
12676 effect(KILL cr);
12677 format %{ "rorl $dst, $shift" %}
12678 ins_encode %{
12679 __ rorl($dst$$Register);
12680 %}
12681 ins_pipe(ialu_reg_reg);
12682 %}
12683
12684 // Rotate Right by variable
12685 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12686 %{
12687 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12688 match(Set dst (RotateRight src shift));
12689 effect(KILL cr);
12690 flag(PD::Flag_ndd_demotable);
12691
12692 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12693 ins_encode %{
12694 __ erorl($dst$$Register, $src$$Register, false);
12695 %}
12696 ins_pipe(ialu_reg_reg);
12697 %}
12698
12699 // Rotate Left by constant.
12700 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12701 %{
12702 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12703 match(Set dst (RotateLeft dst shift));
12704 effect(KILL cr);
12705 format %{ "rolq $dst, $shift" %}
12706 ins_encode %{
12707 __ rolq($dst$$Register, $shift$$constant);
12708 %}
12709 ins_pipe(ialu_reg);
12710 %}
12711
12712 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12713 %{
12714 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12715 match(Set dst (RotateLeft src shift));
12716 format %{ "rolxq $dst, $src, $shift" %}
12717 ins_encode %{
12718 int shift = 64 - ($shift$$constant & 63);
12719 __ rorxq($dst$$Register, $src$$Register, shift);
12720 %}
12721 ins_pipe(ialu_reg_reg);
12722 %}
12723
12724 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12725 %{
12726 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12727 match(Set dst (RotateLeft (LoadL src) shift));
12728 ins_cost(175);
12729 format %{ "rolxq $dst, $src, $shift" %}
12730 ins_encode %{
12731 int shift = 64 - ($shift$$constant & 63);
12732 __ rorxq($dst$$Register, $src$$Address, shift);
12733 %}
12734 ins_pipe(ialu_reg_mem);
12735 %}
12736
12737 // Rotate Left by variable
12738 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12739 %{
12740 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12741 match(Set dst (RotateLeft dst shift));
12742 effect(KILL cr);
12743
12744 format %{ "rolq $dst, $shift" %}
12745 ins_encode %{
12746 __ rolq($dst$$Register);
12747 %}
12748 ins_pipe(ialu_reg_reg);
12749 %}
12750
12751 // Rotate Left by variable
12752 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12753 %{
12754 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12755 match(Set dst (RotateLeft src shift));
12756 effect(KILL cr);
12757 flag(PD::Flag_ndd_demotable);
12758
12759 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12760 ins_encode %{
12761 __ erolq($dst$$Register, $src$$Register, false);
12762 %}
12763 ins_pipe(ialu_reg_reg);
12764 %}
12765
12766 // Rotate Right by constant.
12767 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12768 %{
12769 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12770 match(Set dst (RotateRight dst shift));
12771 effect(KILL cr);
12772 format %{ "rorq $dst, $shift" %}
12773 ins_encode %{
12774 __ rorq($dst$$Register, $shift$$constant);
12775 %}
12776 ins_pipe(ialu_reg);
12777 %}
12778
12779 // Rotate Right by constant
12780 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12781 %{
12782 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12783 match(Set dst (RotateRight src shift));
12784 format %{ "rorxq $dst, $src, $shift" %}
12785 ins_encode %{
12786 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12787 %}
12788 ins_pipe(ialu_reg_reg);
12789 %}
12790
12791 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12792 %{
12793 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12794 match(Set dst (RotateRight (LoadL src) shift));
12795 ins_cost(175);
12796 format %{ "rorxq $dst, $src, $shift" %}
12797 ins_encode %{
12798 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12799 %}
12800 ins_pipe(ialu_reg_mem);
12801 %}
12802
12803 // Rotate Right by variable
12804 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12805 %{
12806 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12807 match(Set dst (RotateRight dst shift));
12808 effect(KILL cr);
12809 format %{ "rorq $dst, $shift" %}
12810 ins_encode %{
12811 __ rorq($dst$$Register);
12812 %}
12813 ins_pipe(ialu_reg_reg);
12814 %}
12815
12816 // Rotate Right by variable
12817 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12818 %{
12819 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12820 match(Set dst (RotateRight src shift));
12821 effect(KILL cr);
12822 flag(PD::Flag_ndd_demotable);
12823
12824 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12825 ins_encode %{
12826 __ erorq($dst$$Register, $src$$Register, false);
12827 %}
12828 ins_pipe(ialu_reg_reg);
12829 %}
12830
12831 //----------------------------- CompressBits/ExpandBits ------------------------
12832
12833 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12834 predicate(n->bottom_type()->isa_long());
12835 match(Set dst (CompressBits src mask));
12836 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12837 ins_encode %{
12838 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12839 %}
12840 ins_pipe( pipe_slow );
12841 %}
12842
12843 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12844 predicate(n->bottom_type()->isa_long());
12845 match(Set dst (ExpandBits src mask));
12846 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12847 ins_encode %{
12848 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12849 %}
12850 ins_pipe( pipe_slow );
12851 %}
12852
12853 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12854 predicate(n->bottom_type()->isa_long());
12855 match(Set dst (CompressBits src (LoadL mask)));
12856 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12857 ins_encode %{
12858 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12859 %}
12860 ins_pipe( pipe_slow );
12861 %}
12862
12863 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12864 predicate(n->bottom_type()->isa_long());
12865 match(Set dst (ExpandBits src (LoadL mask)));
12866 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12867 ins_encode %{
12868 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12869 %}
12870 ins_pipe( pipe_slow );
12871 %}
12872
12873
12874 // Logical Instructions
12875
12876 // Integer Logical Instructions
12877
12878 // And Instructions
12879 // And Register with Register
12880 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12881 %{
12882 predicate(!UseAPX);
12883 match(Set dst (AndI dst src));
12884 effect(KILL cr);
12885 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12886
12887 format %{ "andl $dst, $src\t# int" %}
12888 ins_encode %{
12889 __ andl($dst$$Register, $src$$Register);
12890 %}
12891 ins_pipe(ialu_reg_reg);
12892 %}
12893
12894 // And Register with Register using New Data Destination (NDD)
12895 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12896 %{
12897 predicate(UseAPX);
12898 match(Set dst (AndI src1 src2));
12899 effect(KILL cr);
12900 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
12901
12902 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12903 ins_encode %{
12904 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12905
12906 %}
12907 ins_pipe(ialu_reg_reg);
12908 %}
12909
12910 // And Register with Immediate 255
12911 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12912 %{
12913 match(Set dst (AndI src mask));
12914
12915 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12916 ins_encode %{
12917 __ movzbl($dst$$Register, $src$$Register);
12918 %}
12919 ins_pipe(ialu_reg);
12920 %}
12921
12922 // And Register with Immediate 255 and promote to long
12923 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12924 %{
12925 match(Set dst (ConvI2L (AndI src mask)));
12926
12927 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12928 ins_encode %{
12929 __ movzbl($dst$$Register, $src$$Register);
12930 %}
12931 ins_pipe(ialu_reg);
12932 %}
12933
12934 // And Register with Immediate 65535
12935 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12936 %{
12937 match(Set dst (AndI src mask));
12938
12939 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12940 ins_encode %{
12941 __ movzwl($dst$$Register, $src$$Register);
12942 %}
12943 ins_pipe(ialu_reg);
12944 %}
12945
12946 // And Register with Immediate 65535 and promote to long
12947 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12948 %{
12949 match(Set dst (ConvI2L (AndI src mask)));
12950
12951 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12952 ins_encode %{
12953 __ movzwl($dst$$Register, $src$$Register);
12954 %}
12955 ins_pipe(ialu_reg);
12956 %}
12957
12958 // Can skip int2long conversions after AND with small bitmask
12959 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12960 %{
12961 predicate(VM_Version::supports_bmi2());
12962 ins_cost(125);
12963 effect(TEMP tmp, KILL cr);
12964 match(Set dst (ConvI2L (AndI src mask)));
12965 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12966 ins_encode %{
12967 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12968 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12969 %}
12970 ins_pipe(ialu_reg_reg);
12971 %}
12972
12973 // And Register with Immediate
12974 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12975 %{
12976 predicate(!UseAPX);
12977 match(Set dst (AndI dst src));
12978 effect(KILL cr);
12979 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12980
12981 format %{ "andl $dst, $src\t# int" %}
12982 ins_encode %{
12983 __ andl($dst$$Register, $src$$constant);
12984 %}
12985 ins_pipe(ialu_reg);
12986 %}
12987
12988 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12989 %{
12990 predicate(UseAPX);
12991 match(Set dst (AndI src1 src2));
12992 effect(KILL cr);
12993 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
12994
12995 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12996 ins_encode %{
12997 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12998 %}
12999 ins_pipe(ialu_reg);
13000 %}
13001
13002 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13003 %{
13004 predicate(UseAPX);
13005 match(Set dst (AndI (LoadI src1) src2));
13006 effect(KILL cr);
13007 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13008
13009 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13010 ins_encode %{
13011 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13012 %}
13013 ins_pipe(ialu_reg);
13014 %}
13015
13016 // And Register with Memory
13017 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13018 %{
13019 predicate(!UseAPX);
13020 match(Set dst (AndI dst (LoadI src)));
13021 effect(KILL cr);
13022 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023
13024 ins_cost(150);
13025 format %{ "andl $dst, $src\t# int" %}
13026 ins_encode %{
13027 __ andl($dst$$Register, $src$$Address);
13028 %}
13029 ins_pipe(ialu_reg_mem);
13030 %}
13031
13032 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13033 %{
13034 predicate(UseAPX);
13035 match(Set dst (AndI src1 (LoadI src2)));
13036 effect(KILL cr);
13037 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13038
13039 ins_cost(150);
13040 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13041 ins_encode %{
13042 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13043 %}
13044 ins_pipe(ialu_reg_mem);
13045 %}
13046
13047 // And Memory with Register
13048 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13049 %{
13050 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13051 effect(KILL cr);
13052 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13053
13054 ins_cost(150);
13055 format %{ "andb $dst, $src\t# byte" %}
13056 ins_encode %{
13057 __ andb($dst$$Address, $src$$Register);
13058 %}
13059 ins_pipe(ialu_mem_reg);
13060 %}
13061
13062 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13063 %{
13064 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13065 effect(KILL cr);
13066 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13067
13068 ins_cost(150);
13069 format %{ "andl $dst, $src\t# int" %}
13070 ins_encode %{
13071 __ andl($dst$$Address, $src$$Register);
13072 %}
13073 ins_pipe(ialu_mem_reg);
13074 %}
13075
13076 // And Memory with Immediate
13077 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13078 %{
13079 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13080 effect(KILL cr);
13081 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13082
13083 ins_cost(125);
13084 format %{ "andl $dst, $src\t# int" %}
13085 ins_encode %{
13086 __ andl($dst$$Address, $src$$constant);
13087 %}
13088 ins_pipe(ialu_mem_imm);
13089 %}
13090
13091 // BMI1 instructions
13092 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13093 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13094 predicate(UseBMI1Instructions);
13095 effect(KILL cr);
13096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13097
13098 ins_cost(125);
13099 format %{ "andnl $dst, $src1, $src2" %}
13100
13101 ins_encode %{
13102 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13103 %}
13104 ins_pipe(ialu_reg_mem);
13105 %}
13106
13107 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13108 match(Set dst (AndI (XorI src1 minus_1) src2));
13109 predicate(UseBMI1Instructions);
13110 effect(KILL cr);
13111 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13112
13113 format %{ "andnl $dst, $src1, $src2" %}
13114
13115 ins_encode %{
13116 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13117 %}
13118 ins_pipe(ialu_reg);
13119 %}
13120
13121 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13122 match(Set dst (AndI (SubI imm_zero src) src));
13123 predicate(UseBMI1Instructions);
13124 effect(KILL cr);
13125 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13126
13127 format %{ "blsil $dst, $src" %}
13128
13129 ins_encode %{
13130 __ blsil($dst$$Register, $src$$Register);
13131 %}
13132 ins_pipe(ialu_reg);
13133 %}
13134
13135 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13136 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13137 predicate(UseBMI1Instructions);
13138 effect(KILL cr);
13139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13140
13141 ins_cost(125);
13142 format %{ "blsil $dst, $src" %}
13143
13144 ins_encode %{
13145 __ blsil($dst$$Register, $src$$Address);
13146 %}
13147 ins_pipe(ialu_reg_mem);
13148 %}
13149
13150 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13151 %{
13152 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13153 predicate(UseBMI1Instructions);
13154 effect(KILL cr);
13155 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13156
13157 ins_cost(125);
13158 format %{ "blsmskl $dst, $src" %}
13159
13160 ins_encode %{
13161 __ blsmskl($dst$$Register, $src$$Address);
13162 %}
13163 ins_pipe(ialu_reg_mem);
13164 %}
13165
13166 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13167 %{
13168 match(Set dst (XorI (AddI src minus_1) src));
13169 predicate(UseBMI1Instructions);
13170 effect(KILL cr);
13171 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13172
13173 format %{ "blsmskl $dst, $src" %}
13174
13175 ins_encode %{
13176 __ blsmskl($dst$$Register, $src$$Register);
13177 %}
13178
13179 ins_pipe(ialu_reg);
13180 %}
13181
13182 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13183 %{
13184 match(Set dst (AndI (AddI src minus_1) src) );
13185 predicate(UseBMI1Instructions);
13186 effect(KILL cr);
13187 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13188
13189 format %{ "blsrl $dst, $src" %}
13190
13191 ins_encode %{
13192 __ blsrl($dst$$Register, $src$$Register);
13193 %}
13194
13195 ins_pipe(ialu_reg_mem);
13196 %}
13197
13198 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13199 %{
13200 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13201 predicate(UseBMI1Instructions);
13202 effect(KILL cr);
13203 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13204
13205 ins_cost(125);
13206 format %{ "blsrl $dst, $src" %}
13207
13208 ins_encode %{
13209 __ blsrl($dst$$Register, $src$$Address);
13210 %}
13211
13212 ins_pipe(ialu_reg);
13213 %}
13214
13215 // Or Instructions
13216 // Or Register with Register
13217 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13218 %{
13219 predicate(!UseAPX);
13220 match(Set dst (OrI dst src));
13221 effect(KILL cr);
13222 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223
13224 format %{ "orl $dst, $src\t# int" %}
13225 ins_encode %{
13226 __ orl($dst$$Register, $src$$Register);
13227 %}
13228 ins_pipe(ialu_reg_reg);
13229 %}
13230
13231 // Or Register with Register using New Data Destination (NDD)
13232 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13233 %{
13234 predicate(UseAPX);
13235 match(Set dst (OrI src1 src2));
13236 effect(KILL cr);
13237 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13238
13239 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13240 ins_encode %{
13241 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13242 %}
13243 ins_pipe(ialu_reg_reg);
13244 %}
13245
13246 // Or Register with Immediate
13247 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13248 %{
13249 predicate(!UseAPX);
13250 match(Set dst (OrI dst src));
13251 effect(KILL cr);
13252 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253
13254 format %{ "orl $dst, $src\t# int" %}
13255 ins_encode %{
13256 __ orl($dst$$Register, $src$$constant);
13257 %}
13258 ins_pipe(ialu_reg);
13259 %}
13260
13261 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13262 %{
13263 predicate(UseAPX);
13264 match(Set dst (OrI src1 src2));
13265 effect(KILL cr);
13266 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13267
13268 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13269 ins_encode %{
13270 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13271 %}
13272 ins_pipe(ialu_reg);
13273 %}
13274
13275 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13276 %{
13277 predicate(UseAPX);
13278 match(Set dst (OrI src1 src2));
13279 effect(KILL cr);
13280 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13281
13282 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13283 ins_encode %{
13284 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13285 %}
13286 ins_pipe(ialu_reg);
13287 %}
13288
13289 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13290 %{
13291 predicate(UseAPX);
13292 match(Set dst (OrI (LoadI src1) src2));
13293 effect(KILL cr);
13294 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13295
13296 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13297 ins_encode %{
13298 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13299 %}
13300 ins_pipe(ialu_reg);
13301 %}
13302
13303 // Or Register with Memory
13304 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13305 %{
13306 predicate(!UseAPX);
13307 match(Set dst (OrI dst (LoadI src)));
13308 effect(KILL cr);
13309 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13310
13311 ins_cost(150);
13312 format %{ "orl $dst, $src\t# int" %}
13313 ins_encode %{
13314 __ orl($dst$$Register, $src$$Address);
13315 %}
13316 ins_pipe(ialu_reg_mem);
13317 %}
13318
13319 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13320 %{
13321 predicate(UseAPX);
13322 match(Set dst (OrI src1 (LoadI src2)));
13323 effect(KILL cr);
13324 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13325
13326 ins_cost(150);
13327 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13328 ins_encode %{
13329 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13330 %}
13331 ins_pipe(ialu_reg_mem);
13332 %}
13333
13334 // Or Memory with Register
13335 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13336 %{
13337 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13338 effect(KILL cr);
13339 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13340
13341 ins_cost(150);
13342 format %{ "orb $dst, $src\t# byte" %}
13343 ins_encode %{
13344 __ orb($dst$$Address, $src$$Register);
13345 %}
13346 ins_pipe(ialu_mem_reg);
13347 %}
13348
13349 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13350 %{
13351 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13352 effect(KILL cr);
13353 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13354
13355 ins_cost(150);
13356 format %{ "orl $dst, $src\t# int" %}
13357 ins_encode %{
13358 __ orl($dst$$Address, $src$$Register);
13359 %}
13360 ins_pipe(ialu_mem_reg);
13361 %}
13362
13363 // Or Memory with Immediate
13364 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13365 %{
13366 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13367 effect(KILL cr);
13368 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13369
13370 ins_cost(125);
13371 format %{ "orl $dst, $src\t# int" %}
13372 ins_encode %{
13373 __ orl($dst$$Address, $src$$constant);
13374 %}
13375 ins_pipe(ialu_mem_imm);
13376 %}
13377
13378 // Xor Instructions
13379 // Xor Register with Register
13380 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13381 %{
13382 predicate(!UseAPX);
13383 match(Set dst (XorI dst src));
13384 effect(KILL cr);
13385 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13386
13387 format %{ "xorl $dst, $src\t# int" %}
13388 ins_encode %{
13389 __ xorl($dst$$Register, $src$$Register);
13390 %}
13391 ins_pipe(ialu_reg_reg);
13392 %}
13393
13394 // Xor Register with Register using New Data Destination (NDD)
13395 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13396 %{
13397 predicate(UseAPX);
13398 match(Set dst (XorI src1 src2));
13399 effect(KILL cr);
13400 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13401
13402 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13403 ins_encode %{
13404 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13405 %}
13406 ins_pipe(ialu_reg_reg);
13407 %}
13408
13409 // Xor Register with Immediate -1
13410 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13411 %{
13412 predicate(!UseAPX);
13413 match(Set dst (XorI dst imm));
13414
13415 format %{ "notl $dst" %}
13416 ins_encode %{
13417 __ notl($dst$$Register);
13418 %}
13419 ins_pipe(ialu_reg);
13420 %}
13421
13422 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13423 %{
13424 match(Set dst (XorI src imm));
13425 predicate(UseAPX);
13426 flag(PD::Flag_ndd_demotable);
13427
13428 format %{ "enotl $dst, $src" %}
13429 ins_encode %{
13430 __ enotl($dst$$Register, $src$$Register);
13431 %}
13432 ins_pipe(ialu_reg);
13433 %}
13434
13435 // Xor Register with Immediate
13436 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13437 %{
13438 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13439 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13440 match(Set dst (XorI dst src));
13441 effect(KILL cr);
13442 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443
13444 format %{ "xorl $dst, $src\t# int" %}
13445 ins_encode %{
13446 __ xorl($dst$$Register, $src$$constant);
13447 %}
13448 ins_pipe(ialu_reg);
13449 %}
13450
13451 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13452 %{
13453 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13454 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13455 match(Set dst (XorI src1 src2));
13456 effect(KILL cr);
13457 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13458
13459 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13460 ins_encode %{
13461 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13462 %}
13463 ins_pipe(ialu_reg);
13464 %}
13465
13466 // Xor Memory with Immediate
13467 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13468 %{
13469 predicate(UseAPX);
13470 match(Set dst (XorI (LoadI src1) src2));
13471 effect(KILL cr);
13472 ins_cost(150);
13473 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13474
13475 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13476 ins_encode %{
13477 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13478 %}
13479 ins_pipe(ialu_reg);
13480 %}
13481
13482 // Xor Register with Memory
13483 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13484 %{
13485 predicate(!UseAPX);
13486 match(Set dst (XorI dst (LoadI src)));
13487 effect(KILL cr);
13488 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13489
13490 ins_cost(150);
13491 format %{ "xorl $dst, $src\t# int" %}
13492 ins_encode %{
13493 __ xorl($dst$$Register, $src$$Address);
13494 %}
13495 ins_pipe(ialu_reg_mem);
13496 %}
13497
13498 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13499 %{
13500 predicate(UseAPX);
13501 match(Set dst (XorI src1 (LoadI src2)));
13502 effect(KILL cr);
13503 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13504
13505 ins_cost(150);
13506 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13507 ins_encode %{
13508 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13509 %}
13510 ins_pipe(ialu_reg_mem);
13511 %}
13512
13513 // Xor Memory with Register
13514 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13515 %{
13516 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13517 effect(KILL cr);
13518 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13519
13520 ins_cost(150);
13521 format %{ "xorb $dst, $src\t# byte" %}
13522 ins_encode %{
13523 __ xorb($dst$$Address, $src$$Register);
13524 %}
13525 ins_pipe(ialu_mem_reg);
13526 %}
13527
13528 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13529 %{
13530 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13531 effect(KILL cr);
13532 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13533
13534 ins_cost(150);
13535 format %{ "xorl $dst, $src\t# int" %}
13536 ins_encode %{
13537 __ xorl($dst$$Address, $src$$Register);
13538 %}
13539 ins_pipe(ialu_mem_reg);
13540 %}
13541
13542 // Xor Memory with Immediate
13543 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13544 %{
13545 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13546 effect(KILL cr);
13547 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13548
13549 ins_cost(125);
13550 format %{ "xorl $dst, $src\t# int" %}
13551 ins_encode %{
13552 __ xorl($dst$$Address, $src$$constant);
13553 %}
13554 ins_pipe(ialu_mem_imm);
13555 %}
13556
13557
13558 // Long Logical Instructions
13559
13560 // And Instructions
13561 // And Register with Register
13562 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13563 %{
13564 predicate(!UseAPX);
13565 match(Set dst (AndL dst src));
13566 effect(KILL cr);
13567 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13568
13569 format %{ "andq $dst, $src\t# long" %}
13570 ins_encode %{
13571 __ andq($dst$$Register, $src$$Register);
13572 %}
13573 ins_pipe(ialu_reg_reg);
13574 %}
13575
13576 // And Register with Register using New Data Destination (NDD)
13577 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13578 %{
13579 predicate(UseAPX);
13580 match(Set dst (AndL src1 src2));
13581 effect(KILL cr);
13582 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13583
13584 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13585 ins_encode %{
13586 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13587
13588 %}
13589 ins_pipe(ialu_reg_reg);
13590 %}
13591
13592 // And Register with Immediate 255
13593 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13594 %{
13595 match(Set dst (AndL src mask));
13596
13597 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13598 ins_encode %{
13599 // movzbl zeroes out the upper 32-bit and does not need REX.W
13600 __ movzbl($dst$$Register, $src$$Register);
13601 %}
13602 ins_pipe(ialu_reg);
13603 %}
13604
13605 // And Register with Immediate 65535
13606 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13607 %{
13608 match(Set dst (AndL src mask));
13609
13610 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13611 ins_encode %{
13612 // movzwl zeroes out the upper 32-bit and does not need REX.W
13613 __ movzwl($dst$$Register, $src$$Register);
13614 %}
13615 ins_pipe(ialu_reg);
13616 %}
13617
13618 // And Register with Immediate
13619 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13620 %{
13621 predicate(!UseAPX);
13622 match(Set dst (AndL dst src));
13623 effect(KILL cr);
13624 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13625
13626 format %{ "andq $dst, $src\t# long" %}
13627 ins_encode %{
13628 __ andq($dst$$Register, $src$$constant);
13629 %}
13630 ins_pipe(ialu_reg);
13631 %}
13632
13633 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13634 %{
13635 predicate(UseAPX);
13636 match(Set dst (AndL src1 src2));
13637 effect(KILL cr);
13638 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13639
13640 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13641 ins_encode %{
13642 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13643 %}
13644 ins_pipe(ialu_reg);
13645 %}
13646
13647 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13648 %{
13649 predicate(UseAPX);
13650 match(Set dst (AndL (LoadL src1) src2));
13651 effect(KILL cr);
13652 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13653
13654 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13655 ins_encode %{
13656 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13657 %}
13658 ins_pipe(ialu_reg);
13659 %}
13660
13661 // And Register with Memory
13662 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13663 %{
13664 predicate(!UseAPX);
13665 match(Set dst (AndL dst (LoadL src)));
13666 effect(KILL cr);
13667 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668
13669 ins_cost(150);
13670 format %{ "andq $dst, $src\t# long" %}
13671 ins_encode %{
13672 __ andq($dst$$Register, $src$$Address);
13673 %}
13674 ins_pipe(ialu_reg_mem);
13675 %}
13676
13677 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13678 %{
13679 predicate(UseAPX);
13680 match(Set dst (AndL src1 (LoadL src2)));
13681 effect(KILL cr);
13682 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13683
13684 ins_cost(150);
13685 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13686 ins_encode %{
13687 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13688 %}
13689 ins_pipe(ialu_reg_mem);
13690 %}
13691
13692 // And Memory with Register
13693 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13694 %{
13695 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13696 effect(KILL cr);
13697 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13698
13699 ins_cost(150);
13700 format %{ "andq $dst, $src\t# long" %}
13701 ins_encode %{
13702 __ andq($dst$$Address, $src$$Register);
13703 %}
13704 ins_pipe(ialu_mem_reg);
13705 %}
13706
13707 // And Memory with Immediate
13708 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13709 %{
13710 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13711 effect(KILL cr);
13712 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13713
13714 ins_cost(125);
13715 format %{ "andq $dst, $src\t# long" %}
13716 ins_encode %{
13717 __ andq($dst$$Address, $src$$constant);
13718 %}
13719 ins_pipe(ialu_mem_imm);
13720 %}
13721
13722 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13723 %{
13724 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13725 // because AND/OR works well enough for 8/32-bit values.
13726 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13727
13728 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13729 effect(KILL cr);
13730
13731 ins_cost(125);
13732 format %{ "btrq $dst, log2(not($con))\t# long" %}
13733 ins_encode %{
13734 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13735 %}
13736 ins_pipe(ialu_mem_imm);
13737 %}
13738
13739 // BMI1 instructions
13740 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13741 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13742 predicate(UseBMI1Instructions);
13743 effect(KILL cr);
13744 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13745
13746 ins_cost(125);
13747 format %{ "andnq $dst, $src1, $src2" %}
13748
13749 ins_encode %{
13750 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13751 %}
13752 ins_pipe(ialu_reg_mem);
13753 %}
13754
13755 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13756 match(Set dst (AndL (XorL src1 minus_1) src2));
13757 predicate(UseBMI1Instructions);
13758 effect(KILL cr);
13759 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13760
13761 format %{ "andnq $dst, $src1, $src2" %}
13762
13763 ins_encode %{
13764 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13765 %}
13766 ins_pipe(ialu_reg_mem);
13767 %}
13768
13769 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13770 match(Set dst (AndL (SubL imm_zero src) src));
13771 predicate(UseBMI1Instructions);
13772 effect(KILL cr);
13773 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13774
13775 format %{ "blsiq $dst, $src" %}
13776
13777 ins_encode %{
13778 __ blsiq($dst$$Register, $src$$Register);
13779 %}
13780 ins_pipe(ialu_reg);
13781 %}
13782
13783 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13784 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13785 predicate(UseBMI1Instructions);
13786 effect(KILL cr);
13787 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13788
13789 ins_cost(125);
13790 format %{ "blsiq $dst, $src" %}
13791
13792 ins_encode %{
13793 __ blsiq($dst$$Register, $src$$Address);
13794 %}
13795 ins_pipe(ialu_reg_mem);
13796 %}
13797
13798 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13799 %{
13800 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13801 predicate(UseBMI1Instructions);
13802 effect(KILL cr);
13803 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13804
13805 ins_cost(125);
13806 format %{ "blsmskq $dst, $src" %}
13807
13808 ins_encode %{
13809 __ blsmskq($dst$$Register, $src$$Address);
13810 %}
13811 ins_pipe(ialu_reg_mem);
13812 %}
13813
13814 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13815 %{
13816 match(Set dst (XorL (AddL src minus_1) src));
13817 predicate(UseBMI1Instructions);
13818 effect(KILL cr);
13819 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13820
13821 format %{ "blsmskq $dst, $src" %}
13822
13823 ins_encode %{
13824 __ blsmskq($dst$$Register, $src$$Register);
13825 %}
13826
13827 ins_pipe(ialu_reg);
13828 %}
13829
13830 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13831 %{
13832 match(Set dst (AndL (AddL src minus_1) src) );
13833 predicate(UseBMI1Instructions);
13834 effect(KILL cr);
13835 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13836
13837 format %{ "blsrq $dst, $src" %}
13838
13839 ins_encode %{
13840 __ blsrq($dst$$Register, $src$$Register);
13841 %}
13842
13843 ins_pipe(ialu_reg);
13844 %}
13845
13846 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13847 %{
13848 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13849 predicate(UseBMI1Instructions);
13850 effect(KILL cr);
13851 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13852
13853 ins_cost(125);
13854 format %{ "blsrq $dst, $src" %}
13855
13856 ins_encode %{
13857 __ blsrq($dst$$Register, $src$$Address);
13858 %}
13859
13860 ins_pipe(ialu_reg);
13861 %}
13862
13863 // Or Instructions
13864 // Or Register with Register
13865 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13866 %{
13867 predicate(!UseAPX);
13868 match(Set dst (OrL dst src));
13869 effect(KILL cr);
13870 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13871
13872 format %{ "orq $dst, $src\t# long" %}
13873 ins_encode %{
13874 __ orq($dst$$Register, $src$$Register);
13875 %}
13876 ins_pipe(ialu_reg_reg);
13877 %}
13878
13879 // Or Register with Register using New Data Destination (NDD)
13880 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13881 %{
13882 predicate(UseAPX);
13883 match(Set dst (OrL src1 src2));
13884 effect(KILL cr);
13885 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13886
13887 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13888 ins_encode %{
13889 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13890
13891 %}
13892 ins_pipe(ialu_reg_reg);
13893 %}
13894
13895 // Use any_RegP to match R15 (TLS register) without spilling.
13896 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13897 match(Set dst (OrL dst (CastP2X src)));
13898 effect(KILL cr);
13899 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13900
13901 format %{ "orq $dst, $src\t# long" %}
13902 ins_encode %{
13903 __ orq($dst$$Register, $src$$Register);
13904 %}
13905 ins_pipe(ialu_reg_reg);
13906 %}
13907
13908 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13909 match(Set dst (OrL src1 (CastP2X src2)));
13910 effect(KILL cr);
13911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13912
13913 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13914 ins_encode %{
13915 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13916 %}
13917 ins_pipe(ialu_reg_reg);
13918 %}
13919
13920 // Or Register with Immediate
13921 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13922 %{
13923 predicate(!UseAPX);
13924 match(Set dst (OrL dst src));
13925 effect(KILL cr);
13926 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13927
13928 format %{ "orq $dst, $src\t# long" %}
13929 ins_encode %{
13930 __ orq($dst$$Register, $src$$constant);
13931 %}
13932 ins_pipe(ialu_reg);
13933 %}
13934
13935 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13936 %{
13937 predicate(UseAPX);
13938 match(Set dst (OrL src1 src2));
13939 effect(KILL cr);
13940 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13941
13942 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13943 ins_encode %{
13944 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13945 %}
13946 ins_pipe(ialu_reg);
13947 %}
13948
13949 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13950 %{
13951 predicate(UseAPX);
13952 match(Set dst (OrL src1 src2));
13953 effect(KILL cr);
13954 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13955
13956 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13957 ins_encode %{
13958 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13959 %}
13960 ins_pipe(ialu_reg);
13961 %}
13962
13963 // Or Memory with Immediate
13964 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13965 %{
13966 predicate(UseAPX);
13967 match(Set dst (OrL (LoadL src1) src2));
13968 effect(KILL cr);
13969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970
13971 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13972 ins_encode %{
13973 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13974 %}
13975 ins_pipe(ialu_reg);
13976 %}
13977
13978 // Or Register with Memory
13979 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13980 %{
13981 predicate(!UseAPX);
13982 match(Set dst (OrL dst (LoadL src)));
13983 effect(KILL cr);
13984 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13985
13986 ins_cost(150);
13987 format %{ "orq $dst, $src\t# long" %}
13988 ins_encode %{
13989 __ orq($dst$$Register, $src$$Address);
13990 %}
13991 ins_pipe(ialu_reg_mem);
13992 %}
13993
13994 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13995 %{
13996 predicate(UseAPX);
13997 match(Set dst (OrL src1 (LoadL src2)));
13998 effect(KILL cr);
13999 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14000
14001 ins_cost(150);
14002 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14003 ins_encode %{
14004 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14005 %}
14006 ins_pipe(ialu_reg_mem);
14007 %}
14008
14009 // Or Memory with Register
14010 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14011 %{
14012 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14013 effect(KILL cr);
14014 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14015
14016 ins_cost(150);
14017 format %{ "orq $dst, $src\t# long" %}
14018 ins_encode %{
14019 __ orq($dst$$Address, $src$$Register);
14020 %}
14021 ins_pipe(ialu_mem_reg);
14022 %}
14023
14024 // Or Memory with Immediate
14025 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14026 %{
14027 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14028 effect(KILL cr);
14029 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14030
14031 ins_cost(125);
14032 format %{ "orq $dst, $src\t# long" %}
14033 ins_encode %{
14034 __ orq($dst$$Address, $src$$constant);
14035 %}
14036 ins_pipe(ialu_mem_imm);
14037 %}
14038
14039 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14040 %{
14041 // con should be a pure 64-bit power of 2 immediate
14042 // because AND/OR works well enough for 8/32-bit values.
14043 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14044
14045 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14046 effect(KILL cr);
14047
14048 ins_cost(125);
14049 format %{ "btsq $dst, log2($con)\t# long" %}
14050 ins_encode %{
14051 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14052 %}
14053 ins_pipe(ialu_mem_imm);
14054 %}
14055
14056 // Xor Instructions
14057 // Xor Register with Register
14058 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14059 %{
14060 predicate(!UseAPX);
14061 match(Set dst (XorL dst src));
14062 effect(KILL cr);
14063 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14064
14065 format %{ "xorq $dst, $src\t# long" %}
14066 ins_encode %{
14067 __ xorq($dst$$Register, $src$$Register);
14068 %}
14069 ins_pipe(ialu_reg_reg);
14070 %}
14071
14072 // Xor Register with Register using New Data Destination (NDD)
14073 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14074 %{
14075 predicate(UseAPX);
14076 match(Set dst (XorL src1 src2));
14077 effect(KILL cr);
14078 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14079
14080 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14081 ins_encode %{
14082 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14083 %}
14084 ins_pipe(ialu_reg_reg);
14085 %}
14086
14087 // Xor Register with Immediate -1
14088 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14089 %{
14090 predicate(!UseAPX);
14091 match(Set dst (XorL dst imm));
14092
14093 format %{ "notq $dst" %}
14094 ins_encode %{
14095 __ notq($dst$$Register);
14096 %}
14097 ins_pipe(ialu_reg);
14098 %}
14099
14100 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14101 %{
14102 predicate(UseAPX);
14103 match(Set dst (XorL src imm));
14104 flag(PD::Flag_ndd_demotable);
14105
14106 format %{ "enotq $dst, $src" %}
14107 ins_encode %{
14108 __ enotq($dst$$Register, $src$$Register);
14109 %}
14110 ins_pipe(ialu_reg);
14111 %}
14112
14113 // Xor Register with Immediate
14114 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14115 %{
14116 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14117 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14118 match(Set dst (XorL dst src));
14119 effect(KILL cr);
14120 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14121
14122 format %{ "xorq $dst, $src\t# long" %}
14123 ins_encode %{
14124 __ xorq($dst$$Register, $src$$constant);
14125 %}
14126 ins_pipe(ialu_reg);
14127 %}
14128
14129 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14130 %{
14131 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14132 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14133 match(Set dst (XorL src1 src2));
14134 effect(KILL cr);
14135 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14136
14137 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14138 ins_encode %{
14139 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14140 %}
14141 ins_pipe(ialu_reg);
14142 %}
14143
14144 // Xor Memory with Immediate
14145 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14146 %{
14147 predicate(UseAPX);
14148 match(Set dst (XorL (LoadL src1) src2));
14149 effect(KILL cr);
14150 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14151 ins_cost(150);
14152
14153 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14154 ins_encode %{
14155 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14156 %}
14157 ins_pipe(ialu_reg);
14158 %}
14159
14160 // Xor Register with Memory
14161 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14162 %{
14163 predicate(!UseAPX);
14164 match(Set dst (XorL dst (LoadL src)));
14165 effect(KILL cr);
14166 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14167
14168 ins_cost(150);
14169 format %{ "xorq $dst, $src\t# long" %}
14170 ins_encode %{
14171 __ xorq($dst$$Register, $src$$Address);
14172 %}
14173 ins_pipe(ialu_reg_mem);
14174 %}
14175
14176 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14177 %{
14178 predicate(UseAPX);
14179 match(Set dst (XorL src1 (LoadL src2)));
14180 effect(KILL cr);
14181 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14182
14183 ins_cost(150);
14184 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14185 ins_encode %{
14186 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14187 %}
14188 ins_pipe(ialu_reg_mem);
14189 %}
14190
14191 // Xor Memory with Register
14192 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14193 %{
14194 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14195 effect(KILL cr);
14196 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14197
14198 ins_cost(150);
14199 format %{ "xorq $dst, $src\t# long" %}
14200 ins_encode %{
14201 __ xorq($dst$$Address, $src$$Register);
14202 %}
14203 ins_pipe(ialu_mem_reg);
14204 %}
14205
14206 // Xor Memory with Immediate
14207 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14208 %{
14209 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14210 effect(KILL cr);
14211 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14212
14213 ins_cost(125);
14214 format %{ "xorq $dst, $src\t# long" %}
14215 ins_encode %{
14216 __ xorq($dst$$Address, $src$$constant);
14217 %}
14218 ins_pipe(ialu_mem_imm);
14219 %}
14220
14221 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14222 %{
14223 match(Set dst (CmpLTMask p q));
14224 effect(KILL cr);
14225
14226 ins_cost(400);
14227 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14228 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14229 "negl $dst" %}
14230 ins_encode %{
14231 __ cmpl($p$$Register, $q$$Register);
14232 __ setcc(Assembler::less, $dst$$Register);
14233 __ negl($dst$$Register);
14234 %}
14235 ins_pipe(pipe_slow);
14236 %}
14237
14238 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14239 %{
14240 match(Set dst (CmpLTMask dst zero));
14241 effect(KILL cr);
14242
14243 ins_cost(100);
14244 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14245 ins_encode %{
14246 __ sarl($dst$$Register, 31);
14247 %}
14248 ins_pipe(ialu_reg);
14249 %}
14250
14251 /* Better to save a register than avoid a branch */
14252 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14253 %{
14254 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14255 effect(KILL cr);
14256 ins_cost(300);
14257 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14258 "jge done\n\t"
14259 "addl $p,$y\n"
14260 "done: " %}
14261 ins_encode %{
14262 Register Rp = $p$$Register;
14263 Register Rq = $q$$Register;
14264 Register Ry = $y$$Register;
14265 Label done;
14266 __ subl(Rp, Rq);
14267 __ jccb(Assembler::greaterEqual, done);
14268 __ addl(Rp, Ry);
14269 __ bind(done);
14270 %}
14271 ins_pipe(pipe_cmplt);
14272 %}
14273
14274 /* Better to save a register than avoid a branch */
14275 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14276 %{
14277 match(Set y (AndI (CmpLTMask p q) y));
14278 effect(KILL cr);
14279
14280 ins_cost(300);
14281
14282 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14283 "jlt done\n\t"
14284 "xorl $y, $y\n"
14285 "done: " %}
14286 ins_encode %{
14287 Register Rp = $p$$Register;
14288 Register Rq = $q$$Register;
14289 Register Ry = $y$$Register;
14290 Label done;
14291 __ cmpl(Rp, Rq);
14292 __ jccb(Assembler::less, done);
14293 __ xorl(Ry, Ry);
14294 __ bind(done);
14295 %}
14296 ins_pipe(pipe_cmplt);
14297 %}
14298
14299
14300 //---------- FP Instructions------------------------------------------------
14301
14302 // Really expensive, avoid
14303 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14304 %{
14305 match(Set cr (CmpF src1 src2));
14306
14307 ins_cost(500);
14308 format %{ "ucomiss $src1, $src2\n\t"
14309 "jnp,s exit\n\t"
14310 "pushfq\t# saw NaN, set CF\n\t"
14311 "andq [rsp], #0xffffff2b\n\t"
14312 "popfq\n"
14313 "exit:" %}
14314 ins_encode %{
14315 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14316 emit_cmpfp_fixup(masm);
14317 %}
14318 ins_pipe(pipe_slow);
14319 %}
14320
14321 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14322 match(Set cr (CmpF src1 src2));
14323
14324 ins_cost(100);
14325 format %{ "ucomiss $src1, $src2" %}
14326 ins_encode %{
14327 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14328 %}
14329 ins_pipe(pipe_slow);
14330 %}
14331
14332 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14333 match(Set cr (CmpF src1 (LoadF src2)));
14334
14335 ins_cost(100);
14336 format %{ "ucomiss $src1, $src2" %}
14337 ins_encode %{
14338 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14339 %}
14340 ins_pipe(pipe_slow);
14341 %}
14342
14343 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14344 match(Set cr (CmpF src con));
14345 ins_cost(100);
14346 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14347 ins_encode %{
14348 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14349 %}
14350 ins_pipe(pipe_slow);
14351 %}
14352
14353 // Really expensive, avoid
14354 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14355 %{
14356 match(Set cr (CmpD src1 src2));
14357
14358 ins_cost(500);
14359 format %{ "ucomisd $src1, $src2\n\t"
14360 "jnp,s exit\n\t"
14361 "pushfq\t# saw NaN, set CF\n\t"
14362 "andq [rsp], #0xffffff2b\n\t"
14363 "popfq\n"
14364 "exit:" %}
14365 ins_encode %{
14366 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14367 emit_cmpfp_fixup(masm);
14368 %}
14369 ins_pipe(pipe_slow);
14370 %}
14371
14372 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14373 match(Set cr (CmpD src1 src2));
14374
14375 ins_cost(100);
14376 format %{ "ucomisd $src1, $src2 test" %}
14377 ins_encode %{
14378 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14379 %}
14380 ins_pipe(pipe_slow);
14381 %}
14382
14383 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14384 match(Set cr (CmpD src1 (LoadD src2)));
14385
14386 ins_cost(100);
14387 format %{ "ucomisd $src1, $src2" %}
14388 ins_encode %{
14389 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14390 %}
14391 ins_pipe(pipe_slow);
14392 %}
14393
14394 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14395 match(Set cr (CmpD src con));
14396 ins_cost(100);
14397 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14398 ins_encode %{
14399 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14400 %}
14401 ins_pipe(pipe_slow);
14402 %}
14403
14404 // Compare into -1,0,1
14405 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14406 %{
14407 match(Set dst (CmpF3 src1 src2));
14408 effect(KILL cr);
14409
14410 ins_cost(275);
14411 format %{ "ucomiss $src1, $src2\n\t"
14412 "movl $dst, #-1\n\t"
14413 "jp,s done\n\t"
14414 "jb,s done\n\t"
14415 "setne $dst\n\t"
14416 "movzbl $dst, $dst\n"
14417 "done:" %}
14418 ins_encode %{
14419 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14420 emit_cmpfp3(masm, $dst$$Register);
14421 %}
14422 ins_pipe(pipe_slow);
14423 %}
14424
14425 // Compare into -1,0,1
14426 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14427 %{
14428 match(Set dst (CmpF3 src1 (LoadF src2)));
14429 effect(KILL cr);
14430
14431 ins_cost(275);
14432 format %{ "ucomiss $src1, $src2\n\t"
14433 "movl $dst, #-1\n\t"
14434 "jp,s done\n\t"
14435 "jb,s done\n\t"
14436 "setne $dst\n\t"
14437 "movzbl $dst, $dst\n"
14438 "done:" %}
14439 ins_encode %{
14440 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14441 emit_cmpfp3(masm, $dst$$Register);
14442 %}
14443 ins_pipe(pipe_slow);
14444 %}
14445
14446 // Compare into -1,0,1
14447 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14448 match(Set dst (CmpF3 src con));
14449 effect(KILL cr);
14450
14451 ins_cost(275);
14452 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14453 "movl $dst, #-1\n\t"
14454 "jp,s done\n\t"
14455 "jb,s done\n\t"
14456 "setne $dst\n\t"
14457 "movzbl $dst, $dst\n"
14458 "done:" %}
14459 ins_encode %{
14460 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14461 emit_cmpfp3(masm, $dst$$Register);
14462 %}
14463 ins_pipe(pipe_slow);
14464 %}
14465
14466 // Compare into -1,0,1
14467 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14468 %{
14469 match(Set dst (CmpD3 src1 src2));
14470 effect(KILL cr);
14471
14472 ins_cost(275);
14473 format %{ "ucomisd $src1, $src2\n\t"
14474 "movl $dst, #-1\n\t"
14475 "jp,s done\n\t"
14476 "jb,s done\n\t"
14477 "setne $dst\n\t"
14478 "movzbl $dst, $dst\n"
14479 "done:" %}
14480 ins_encode %{
14481 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14482 emit_cmpfp3(masm, $dst$$Register);
14483 %}
14484 ins_pipe(pipe_slow);
14485 %}
14486
14487 // Compare into -1,0,1
14488 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14489 %{
14490 match(Set dst (CmpD3 src1 (LoadD src2)));
14491 effect(KILL cr);
14492
14493 ins_cost(275);
14494 format %{ "ucomisd $src1, $src2\n\t"
14495 "movl $dst, #-1\n\t"
14496 "jp,s done\n\t"
14497 "jb,s done\n\t"
14498 "setne $dst\n\t"
14499 "movzbl $dst, $dst\n"
14500 "done:" %}
14501 ins_encode %{
14502 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14503 emit_cmpfp3(masm, $dst$$Register);
14504 %}
14505 ins_pipe(pipe_slow);
14506 %}
14507
14508 // Compare into -1,0,1
14509 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14510 match(Set dst (CmpD3 src con));
14511 effect(KILL cr);
14512
14513 ins_cost(275);
14514 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14515 "movl $dst, #-1\n\t"
14516 "jp,s done\n\t"
14517 "jb,s done\n\t"
14518 "setne $dst\n\t"
14519 "movzbl $dst, $dst\n"
14520 "done:" %}
14521 ins_encode %{
14522 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14523 emit_cmpfp3(masm, $dst$$Register);
14524 %}
14525 ins_pipe(pipe_slow);
14526 %}
14527
14528 //----------Arithmetic Conversion Instructions---------------------------------
14529
14530 instruct convF2D_reg_reg(regD dst, regF src)
14531 %{
14532 match(Set dst (ConvF2D src));
14533
14534 format %{ "cvtss2sd $dst, $src" %}
14535 ins_encode %{
14536 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14537 %}
14538 ins_pipe(pipe_slow); // XXX
14539 %}
14540
14541 instruct convF2D_reg_mem(regD dst, memory src)
14542 %{
14543 predicate(UseAVX == 0);
14544 match(Set dst (ConvF2D (LoadF src)));
14545
14546 format %{ "cvtss2sd $dst, $src" %}
14547 ins_encode %{
14548 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14549 %}
14550 ins_pipe(pipe_slow); // XXX
14551 %}
14552
14553 instruct convD2F_reg_reg(regF dst, regD src)
14554 %{
14555 match(Set dst (ConvD2F src));
14556
14557 format %{ "cvtsd2ss $dst, $src" %}
14558 ins_encode %{
14559 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14560 %}
14561 ins_pipe(pipe_slow); // XXX
14562 %}
14563
14564 instruct convD2F_reg_mem(regF dst, memory src)
14565 %{
14566 predicate(UseAVX == 0);
14567 match(Set dst (ConvD2F (LoadD src)));
14568
14569 format %{ "cvtsd2ss $dst, $src" %}
14570 ins_encode %{
14571 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14572 %}
14573 ins_pipe(pipe_slow); // XXX
14574 %}
14575
14576 // XXX do mem variants
14577 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14578 %{
14579 predicate(!VM_Version::supports_avx10_2());
14580 match(Set dst (ConvF2I src));
14581 effect(KILL cr);
14582 format %{ "convert_f2i $dst, $src" %}
14583 ins_encode %{
14584 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14585 %}
14586 ins_pipe(pipe_slow);
14587 %}
14588
14589 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14590 %{
14591 predicate(VM_Version::supports_avx10_2());
14592 match(Set dst (ConvF2I src));
14593 format %{ "evcvttss2sisl $dst, $src" %}
14594 ins_encode %{
14595 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14601 %{
14602 predicate(VM_Version::supports_avx10_2());
14603 match(Set dst (ConvF2I (LoadF src)));
14604 format %{ "evcvttss2sisl $dst, $src" %}
14605 ins_encode %{
14606 __ evcvttss2sisl($dst$$Register, $src$$Address);
14607 %}
14608 ins_pipe(pipe_slow);
14609 %}
14610
14611 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14612 %{
14613 predicate(!VM_Version::supports_avx10_2());
14614 match(Set dst (ConvF2L src));
14615 effect(KILL cr);
14616 format %{ "convert_f2l $dst, $src"%}
14617 ins_encode %{
14618 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14619 %}
14620 ins_pipe(pipe_slow);
14621 %}
14622
14623 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14624 %{
14625 predicate(VM_Version::supports_avx10_2());
14626 match(Set dst (ConvF2L src));
14627 format %{ "evcvttss2sisq $dst, $src" %}
14628 ins_encode %{
14629 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14630 %}
14631 ins_pipe(pipe_slow);
14632 %}
14633
14634 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14635 %{
14636 predicate(VM_Version::supports_avx10_2());
14637 match(Set dst (ConvF2L (LoadF src)));
14638 format %{ "evcvttss2sisq $dst, $src" %}
14639 ins_encode %{
14640 __ evcvttss2sisq($dst$$Register, $src$$Address);
14641 %}
14642 ins_pipe(pipe_slow);
14643 %}
14644
14645 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14646 %{
14647 predicate(!VM_Version::supports_avx10_2());
14648 match(Set dst (ConvD2I src));
14649 effect(KILL cr);
14650 format %{ "convert_d2i $dst, $src"%}
14651 ins_encode %{
14652 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14653 %}
14654 ins_pipe(pipe_slow);
14655 %}
14656
14657 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14658 %{
14659 predicate(VM_Version::supports_avx10_2());
14660 match(Set dst (ConvD2I src));
14661 format %{ "evcvttsd2sisl $dst, $src" %}
14662 ins_encode %{
14663 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14664 %}
14665 ins_pipe(pipe_slow);
14666 %}
14667
14668 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14669 %{
14670 predicate(VM_Version::supports_avx10_2());
14671 match(Set dst (ConvD2I (LoadD src)));
14672 format %{ "evcvttsd2sisl $dst, $src" %}
14673 ins_encode %{
14674 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14675 %}
14676 ins_pipe(pipe_slow);
14677 %}
14678
14679 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14680 %{
14681 predicate(!VM_Version::supports_avx10_2());
14682 match(Set dst (ConvD2L src));
14683 effect(KILL cr);
14684 format %{ "convert_d2l $dst, $src"%}
14685 ins_encode %{
14686 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14687 %}
14688 ins_pipe(pipe_slow);
14689 %}
14690
14691 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14692 %{
14693 predicate(VM_Version::supports_avx10_2());
14694 match(Set dst (ConvD2L src));
14695 format %{ "evcvttsd2sisq $dst, $src" %}
14696 ins_encode %{
14697 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14698 %}
14699 ins_pipe(pipe_slow);
14700 %}
14701
14702 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14703 %{
14704 predicate(VM_Version::supports_avx10_2());
14705 match(Set dst (ConvD2L (LoadD src)));
14706 format %{ "evcvttsd2sisq $dst, $src" %}
14707 ins_encode %{
14708 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14709 %}
14710 ins_pipe(pipe_slow);
14711 %}
14712
14713 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14714 %{
14715 match(Set dst (RoundD src));
14716 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14717 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14718 ins_encode %{
14719 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14720 %}
14721 ins_pipe(pipe_slow);
14722 %}
14723
14724 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14725 %{
14726 match(Set dst (RoundF src));
14727 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14728 format %{ "round_float $dst,$src" %}
14729 ins_encode %{
14730 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14731 %}
14732 ins_pipe(pipe_slow);
14733 %}
14734
14735 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14736 %{
14737 predicate(!UseXmmI2F);
14738 match(Set dst (ConvI2F src));
14739
14740 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14741 ins_encode %{
14742 if (UseAVX > 0) {
14743 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14744 }
14745 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14746 %}
14747 ins_pipe(pipe_slow); // XXX
14748 %}
14749
14750 instruct convI2F_reg_mem(regF dst, memory src)
14751 %{
14752 predicate(UseAVX == 0);
14753 match(Set dst (ConvI2F (LoadI src)));
14754
14755 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14756 ins_encode %{
14757 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14758 %}
14759 ins_pipe(pipe_slow); // XXX
14760 %}
14761
14762 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14763 %{
14764 predicate(!UseXmmI2D);
14765 match(Set dst (ConvI2D src));
14766
14767 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14768 ins_encode %{
14769 if (UseAVX > 0) {
14770 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14771 }
14772 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14773 %}
14774 ins_pipe(pipe_slow); // XXX
14775 %}
14776
14777 instruct convI2D_reg_mem(regD dst, memory src)
14778 %{
14779 predicate(UseAVX == 0);
14780 match(Set dst (ConvI2D (LoadI src)));
14781
14782 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14783 ins_encode %{
14784 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14785 %}
14786 ins_pipe(pipe_slow); // XXX
14787 %}
14788
14789 instruct convXI2F_reg(regF dst, rRegI src)
14790 %{
14791 predicate(UseXmmI2F);
14792 match(Set dst (ConvI2F src));
14793
14794 format %{ "movdl $dst, $src\n\t"
14795 "cvtdq2psl $dst, $dst\t# i2f" %}
14796 ins_encode %{
14797 __ movdl($dst$$XMMRegister, $src$$Register);
14798 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14799 %}
14800 ins_pipe(pipe_slow); // XXX
14801 %}
14802
14803 instruct convXI2D_reg(regD dst, rRegI src)
14804 %{
14805 predicate(UseXmmI2D);
14806 match(Set dst (ConvI2D src));
14807
14808 format %{ "movdl $dst, $src\n\t"
14809 "cvtdq2pdl $dst, $dst\t# i2d" %}
14810 ins_encode %{
14811 __ movdl($dst$$XMMRegister, $src$$Register);
14812 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14813 %}
14814 ins_pipe(pipe_slow); // XXX
14815 %}
14816
14817 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14818 %{
14819 match(Set dst (ConvL2F src));
14820
14821 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14822 ins_encode %{
14823 if (UseAVX > 0) {
14824 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14825 }
14826 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14827 %}
14828 ins_pipe(pipe_slow); // XXX
14829 %}
14830
14831 instruct convL2F_reg_mem(regF dst, memory src)
14832 %{
14833 predicate(UseAVX == 0);
14834 match(Set dst (ConvL2F (LoadL src)));
14835
14836 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14837 ins_encode %{
14838 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14839 %}
14840 ins_pipe(pipe_slow); // XXX
14841 %}
14842
14843 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14844 %{
14845 match(Set dst (ConvL2D src));
14846
14847 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14848 ins_encode %{
14849 if (UseAVX > 0) {
14850 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14851 }
14852 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14853 %}
14854 ins_pipe(pipe_slow); // XXX
14855 %}
14856
14857 instruct convL2D_reg_mem(regD dst, memory src)
14858 %{
14859 predicate(UseAVX == 0);
14860 match(Set dst (ConvL2D (LoadL src)));
14861
14862 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14863 ins_encode %{
14864 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14865 %}
14866 ins_pipe(pipe_slow); // XXX
14867 %}
14868
14869 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14870 %{
14871 match(Set dst (ConvI2L src));
14872
14873 ins_cost(125);
14874 format %{ "movslq $dst, $src\t# i2l" %}
14875 ins_encode %{
14876 __ movslq($dst$$Register, $src$$Register);
14877 %}
14878 ins_pipe(ialu_reg_reg);
14879 %}
14880
14881 // Zero-extend convert int to long
14882 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14883 %{
14884 match(Set dst (AndL (ConvI2L src) mask));
14885
14886 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14887 ins_encode %{
14888 if ($dst$$reg != $src$$reg) {
14889 __ movl($dst$$Register, $src$$Register);
14890 }
14891 %}
14892 ins_pipe(ialu_reg_reg);
14893 %}
14894
14895 // Zero-extend convert int to long
14896 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14897 %{
14898 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14899
14900 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14901 ins_encode %{
14902 __ movl($dst$$Register, $src$$Address);
14903 %}
14904 ins_pipe(ialu_reg_mem);
14905 %}
14906
14907 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14908 %{
14909 match(Set dst (AndL src mask));
14910
14911 format %{ "movl $dst, $src\t# zero-extend long" %}
14912 ins_encode %{
14913 __ movl($dst$$Register, $src$$Register);
14914 %}
14915 ins_pipe(ialu_reg_reg);
14916 %}
14917
14918 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14919 %{
14920 match(Set dst (ConvL2I src));
14921
14922 format %{ "movl $dst, $src\t# l2i" %}
14923 ins_encode %{
14924 __ movl($dst$$Register, $src$$Register);
14925 %}
14926 ins_pipe(ialu_reg_reg);
14927 %}
14928
14929
14930 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14931 match(Set dst (MoveF2I src));
14932 effect(DEF dst, USE src);
14933
14934 ins_cost(125);
14935 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14936 ins_encode %{
14937 __ movl($dst$$Register, Address(rsp, $src$$disp));
14938 %}
14939 ins_pipe(ialu_reg_mem);
14940 %}
14941
14942 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14943 match(Set dst (MoveI2F src));
14944 effect(DEF dst, USE src);
14945
14946 ins_cost(125);
14947 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14948 ins_encode %{
14949 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14950 %}
14951 ins_pipe(pipe_slow);
14952 %}
14953
14954 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14955 match(Set dst (MoveD2L src));
14956 effect(DEF dst, USE src);
14957
14958 ins_cost(125);
14959 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14960 ins_encode %{
14961 __ movq($dst$$Register, Address(rsp, $src$$disp));
14962 %}
14963 ins_pipe(ialu_reg_mem);
14964 %}
14965
14966 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14967 predicate(!UseXmmLoadAndClearUpper);
14968 match(Set dst (MoveL2D src));
14969 effect(DEF dst, USE src);
14970
14971 ins_cost(125);
14972 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14973 ins_encode %{
14974 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14975 %}
14976 ins_pipe(pipe_slow);
14977 %}
14978
14979 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14980 predicate(UseXmmLoadAndClearUpper);
14981 match(Set dst (MoveL2D src));
14982 effect(DEF dst, USE src);
14983
14984 ins_cost(125);
14985 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14986 ins_encode %{
14987 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14988 %}
14989 ins_pipe(pipe_slow);
14990 %}
14991
14992
14993 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14994 match(Set dst (MoveF2I src));
14995 effect(DEF dst, USE src);
14996
14997 ins_cost(95); // XXX
14998 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14999 ins_encode %{
15000 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15001 %}
15002 ins_pipe(pipe_slow);
15003 %}
15004
15005 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15006 match(Set dst (MoveI2F src));
15007 effect(DEF dst, USE src);
15008
15009 ins_cost(100);
15010 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15011 ins_encode %{
15012 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15013 %}
15014 ins_pipe( ialu_mem_reg );
15015 %}
15016
15017 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15018 match(Set dst (MoveD2L src));
15019 effect(DEF dst, USE src);
15020
15021 ins_cost(95); // XXX
15022 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15023 ins_encode %{
15024 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15025 %}
15026 ins_pipe(pipe_slow);
15027 %}
15028
15029 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15030 match(Set dst (MoveL2D src));
15031 effect(DEF dst, USE src);
15032
15033 ins_cost(100);
15034 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15035 ins_encode %{
15036 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15037 %}
15038 ins_pipe(ialu_mem_reg);
15039 %}
15040
15041 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15042 match(Set dst (MoveF2I src));
15043 effect(DEF dst, USE src);
15044 ins_cost(85);
15045 format %{ "movd $dst,$src\t# MoveF2I" %}
15046 ins_encode %{
15047 __ movdl($dst$$Register, $src$$XMMRegister);
15048 %}
15049 ins_pipe( pipe_slow );
15050 %}
15051
15052 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15053 match(Set dst (MoveD2L src));
15054 effect(DEF dst, USE src);
15055 ins_cost(85);
15056 format %{ "movd $dst,$src\t# MoveD2L" %}
15057 ins_encode %{
15058 __ movdq($dst$$Register, $src$$XMMRegister);
15059 %}
15060 ins_pipe( pipe_slow );
15061 %}
15062
15063 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15064 match(Set dst (MoveI2F src));
15065 effect(DEF dst, USE src);
15066 ins_cost(100);
15067 format %{ "movd $dst,$src\t# MoveI2F" %}
15068 ins_encode %{
15069 __ movdl($dst$$XMMRegister, $src$$Register);
15070 %}
15071 ins_pipe( pipe_slow );
15072 %}
15073
15074 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15075 match(Set dst (MoveL2D src));
15076 effect(DEF dst, USE src);
15077 ins_cost(100);
15078 format %{ "movd $dst,$src\t# MoveL2D" %}
15079 ins_encode %{
15080 __ movdq($dst$$XMMRegister, $src$$Register);
15081 %}
15082 ins_pipe( pipe_slow );
15083 %}
15084
15085 // Fast clearing of an array
15086 // Small non-constant lenght ClearArray for non-AVX512 targets.
15087 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15088 Universe dummy, rFlagsReg cr)
15089 %{
15090 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15091 match(Set dummy (ClearArray cnt base));
15092 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15093
15094 format %{ $$template
15095 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15096 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15097 $$emit$$"jg LARGE\n\t"
15098 $$emit$$"dec rcx\n\t"
15099 $$emit$$"js DONE\t# Zero length\n\t"
15100 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15101 $$emit$$"dec rcx\n\t"
15102 $$emit$$"jge LOOP\n\t"
15103 $$emit$$"jmp DONE\n\t"
15104 $$emit$$"# LARGE:\n\t"
15105 if (UseFastStosb) {
15106 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15107 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15108 } else if (UseXMMForObjInit) {
15109 $$emit$$"mov rdi,rax\n\t"
15110 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15111 $$emit$$"jmpq L_zero_64_bytes\n\t"
15112 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15113 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15114 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15115 $$emit$$"add 0x40,rax\n\t"
15116 $$emit$$"# L_zero_64_bytes:\n\t"
15117 $$emit$$"sub 0x8,rcx\n\t"
15118 $$emit$$"jge L_loop\n\t"
15119 $$emit$$"add 0x4,rcx\n\t"
15120 $$emit$$"jl L_tail\n\t"
15121 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15122 $$emit$$"add 0x20,rax\n\t"
15123 $$emit$$"sub 0x4,rcx\n\t"
15124 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15125 $$emit$$"add 0x4,rcx\n\t"
15126 $$emit$$"jle L_end\n\t"
15127 $$emit$$"dec rcx\n\t"
15128 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15129 $$emit$$"vmovq xmm0,(rax)\n\t"
15130 $$emit$$"add 0x8,rax\n\t"
15131 $$emit$$"dec rcx\n\t"
15132 $$emit$$"jge L_sloop\n\t"
15133 $$emit$$"# L_end:\n\t"
15134 } else {
15135 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15136 }
15137 $$emit$$"# DONE"
15138 %}
15139 ins_encode %{
15140 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15141 $tmp$$XMMRegister, false, knoreg);
15142 %}
15143 ins_pipe(pipe_slow);
15144 %}
15145
15146 // Small non-constant length ClearArray for AVX512 targets.
15147 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15148 Universe dummy, rFlagsReg cr)
15149 %{
15150 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15151 match(Set dummy (ClearArray cnt base));
15152 ins_cost(125);
15153 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15154
15155 format %{ $$template
15156 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15157 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15158 $$emit$$"jg LARGE\n\t"
15159 $$emit$$"dec rcx\n\t"
15160 $$emit$$"js DONE\t# Zero length\n\t"
15161 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15162 $$emit$$"dec rcx\n\t"
15163 $$emit$$"jge LOOP\n\t"
15164 $$emit$$"jmp DONE\n\t"
15165 $$emit$$"# LARGE:\n\t"
15166 if (UseFastStosb) {
15167 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15168 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15169 } else if (UseXMMForObjInit) {
15170 $$emit$$"mov rdi,rax\n\t"
15171 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15172 $$emit$$"jmpq L_zero_64_bytes\n\t"
15173 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15174 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15175 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15176 $$emit$$"add 0x40,rax\n\t"
15177 $$emit$$"# L_zero_64_bytes:\n\t"
15178 $$emit$$"sub 0x8,rcx\n\t"
15179 $$emit$$"jge L_loop\n\t"
15180 $$emit$$"add 0x4,rcx\n\t"
15181 $$emit$$"jl L_tail\n\t"
15182 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183 $$emit$$"add 0x20,rax\n\t"
15184 $$emit$$"sub 0x4,rcx\n\t"
15185 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15186 $$emit$$"add 0x4,rcx\n\t"
15187 $$emit$$"jle L_end\n\t"
15188 $$emit$$"dec rcx\n\t"
15189 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15190 $$emit$$"vmovq xmm0,(rax)\n\t"
15191 $$emit$$"add 0x8,rax\n\t"
15192 $$emit$$"dec rcx\n\t"
15193 $$emit$$"jge L_sloop\n\t"
15194 $$emit$$"# L_end:\n\t"
15195 } else {
15196 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15197 }
15198 $$emit$$"# DONE"
15199 %}
15200 ins_encode %{
15201 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15202 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15203 %}
15204 ins_pipe(pipe_slow);
15205 %}
15206
15207 // Large non-constant length ClearArray for non-AVX512 targets.
15208 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15209 Universe dummy, rFlagsReg cr)
15210 %{
15211 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15212 match(Set dummy (ClearArray cnt base));
15213 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15214
15215 format %{ $$template
15216 if (UseFastStosb) {
15217 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15218 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15219 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15220 } else if (UseXMMForObjInit) {
15221 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15222 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15223 $$emit$$"jmpq L_zero_64_bytes\n\t"
15224 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15225 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15226 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15227 $$emit$$"add 0x40,rax\n\t"
15228 $$emit$$"# L_zero_64_bytes:\n\t"
15229 $$emit$$"sub 0x8,rcx\n\t"
15230 $$emit$$"jge L_loop\n\t"
15231 $$emit$$"add 0x4,rcx\n\t"
15232 $$emit$$"jl L_tail\n\t"
15233 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15234 $$emit$$"add 0x20,rax\n\t"
15235 $$emit$$"sub 0x4,rcx\n\t"
15236 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15237 $$emit$$"add 0x4,rcx\n\t"
15238 $$emit$$"jle L_end\n\t"
15239 $$emit$$"dec rcx\n\t"
15240 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15241 $$emit$$"vmovq xmm0,(rax)\n\t"
15242 $$emit$$"add 0x8,rax\n\t"
15243 $$emit$$"dec rcx\n\t"
15244 $$emit$$"jge L_sloop\n\t"
15245 $$emit$$"# L_end:\n\t"
15246 } else {
15247 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15248 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15249 }
15250 %}
15251 ins_encode %{
15252 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15253 $tmp$$XMMRegister, true, knoreg);
15254 %}
15255 ins_pipe(pipe_slow);
15256 %}
15257
15258 // Large non-constant length ClearArray for AVX512 targets.
15259 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15260 Universe dummy, rFlagsReg cr)
15261 %{
15262 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15263 match(Set dummy (ClearArray cnt base));
15264 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15265
15266 format %{ $$template
15267 if (UseFastStosb) {
15268 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15269 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15270 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15271 } else if (UseXMMForObjInit) {
15272 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15273 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15274 $$emit$$"jmpq L_zero_64_bytes\n\t"
15275 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15276 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15277 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15278 $$emit$$"add 0x40,rax\n\t"
15279 $$emit$$"# L_zero_64_bytes:\n\t"
15280 $$emit$$"sub 0x8,rcx\n\t"
15281 $$emit$$"jge L_loop\n\t"
15282 $$emit$$"add 0x4,rcx\n\t"
15283 $$emit$$"jl L_tail\n\t"
15284 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285 $$emit$$"add 0x20,rax\n\t"
15286 $$emit$$"sub 0x4,rcx\n\t"
15287 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288 $$emit$$"add 0x4,rcx\n\t"
15289 $$emit$$"jle L_end\n\t"
15290 $$emit$$"dec rcx\n\t"
15291 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292 $$emit$$"vmovq xmm0,(rax)\n\t"
15293 $$emit$$"add 0x8,rax\n\t"
15294 $$emit$$"dec rcx\n\t"
15295 $$emit$$"jge L_sloop\n\t"
15296 $$emit$$"# L_end:\n\t"
15297 } else {
15298 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15299 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15300 }
15301 %}
15302 ins_encode %{
15303 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15304 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15305 %}
15306 ins_pipe(pipe_slow);
15307 %}
15308
15309 // Small constant length ClearArray for AVX512 targets.
15310 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15311 %{
15312 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15313 match(Set dummy (ClearArray cnt base));
15314 ins_cost(100);
15315 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15316 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15317 ins_encode %{
15318 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15319 %}
15320 ins_pipe(pipe_slow);
15321 %}
15322
15323 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15324 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15325 %{
15326 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15327 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15328 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15329
15330 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15331 ins_encode %{
15332 __ string_compare($str1$$Register, $str2$$Register,
15333 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15334 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15335 %}
15336 ins_pipe( pipe_slow );
15337 %}
15338
15339 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15340 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15341 %{
15342 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15343 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15344 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15345
15346 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15347 ins_encode %{
15348 __ string_compare($str1$$Register, $str2$$Register,
15349 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15350 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15351 %}
15352 ins_pipe( pipe_slow );
15353 %}
15354
15355 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15356 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15357 %{
15358 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15359 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15360 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15361
15362 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15363 ins_encode %{
15364 __ string_compare($str1$$Register, $str2$$Register,
15365 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15366 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15367 %}
15368 ins_pipe( pipe_slow );
15369 %}
15370
15371 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15372 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15373 %{
15374 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15375 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15376 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15377
15378 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15379 ins_encode %{
15380 __ string_compare($str1$$Register, $str2$$Register,
15381 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15382 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15383 %}
15384 ins_pipe( pipe_slow );
15385 %}
15386
15387 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15388 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15389 %{
15390 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15391 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15392 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15393
15394 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15395 ins_encode %{
15396 __ string_compare($str1$$Register, $str2$$Register,
15397 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15398 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15399 %}
15400 ins_pipe( pipe_slow );
15401 %}
15402
15403 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15404 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15405 %{
15406 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15407 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15408 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15409
15410 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15411 ins_encode %{
15412 __ string_compare($str1$$Register, $str2$$Register,
15413 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15414 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15415 %}
15416 ins_pipe( pipe_slow );
15417 %}
15418
15419 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15420 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15421 %{
15422 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15423 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15424 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15425
15426 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15427 ins_encode %{
15428 __ string_compare($str2$$Register, $str1$$Register,
15429 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15430 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15431 %}
15432 ins_pipe( pipe_slow );
15433 %}
15434
15435 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15436 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15437 %{
15438 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15439 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15440 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15441
15442 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15443 ins_encode %{
15444 __ string_compare($str2$$Register, $str1$$Register,
15445 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15446 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15447 %}
15448 ins_pipe( pipe_slow );
15449 %}
15450
15451 // fast search of substring with known size.
15452 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15453 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15454 %{
15455 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15456 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15457 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15458
15459 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15460 ins_encode %{
15461 int icnt2 = (int)$int_cnt2$$constant;
15462 if (icnt2 >= 16) {
15463 // IndexOf for constant substrings with size >= 16 elements
15464 // which don't need to be loaded through stack.
15465 __ string_indexofC8($str1$$Register, $str2$$Register,
15466 $cnt1$$Register, $cnt2$$Register,
15467 icnt2, $result$$Register,
15468 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15469 } else {
15470 // Small strings are loaded through stack if they cross page boundary.
15471 __ string_indexof($str1$$Register, $str2$$Register,
15472 $cnt1$$Register, $cnt2$$Register,
15473 icnt2, $result$$Register,
15474 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15475 }
15476 %}
15477 ins_pipe( pipe_slow );
15478 %}
15479
15480 // fast search of substring with known size.
15481 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15482 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15485 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15486 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15487
15488 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15489 ins_encode %{
15490 int icnt2 = (int)$int_cnt2$$constant;
15491 if (icnt2 >= 8) {
15492 // IndexOf for constant substrings with size >= 8 elements
15493 // which don't need to be loaded through stack.
15494 __ string_indexofC8($str1$$Register, $str2$$Register,
15495 $cnt1$$Register, $cnt2$$Register,
15496 icnt2, $result$$Register,
15497 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15498 } else {
15499 // Small strings are loaded through stack if they cross page boundary.
15500 __ string_indexof($str1$$Register, $str2$$Register,
15501 $cnt1$$Register, $cnt2$$Register,
15502 icnt2, $result$$Register,
15503 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15504 }
15505 %}
15506 ins_pipe( pipe_slow );
15507 %}
15508
15509 // fast search of substring with known size.
15510 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15511 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15512 %{
15513 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15514 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15515 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15516
15517 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15518 ins_encode %{
15519 int icnt2 = (int)$int_cnt2$$constant;
15520 if (icnt2 >= 8) {
15521 // IndexOf for constant substrings with size >= 8 elements
15522 // which don't need to be loaded through stack.
15523 __ string_indexofC8($str1$$Register, $str2$$Register,
15524 $cnt1$$Register, $cnt2$$Register,
15525 icnt2, $result$$Register,
15526 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15527 } else {
15528 // Small strings are loaded through stack if they cross page boundary.
15529 __ string_indexof($str1$$Register, $str2$$Register,
15530 $cnt1$$Register, $cnt2$$Register,
15531 icnt2, $result$$Register,
15532 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15533 }
15534 %}
15535 ins_pipe( pipe_slow );
15536 %}
15537
15538 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15539 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15540 %{
15541 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15542 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15543 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15544
15545 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15546 ins_encode %{
15547 __ string_indexof($str1$$Register, $str2$$Register,
15548 $cnt1$$Register, $cnt2$$Register,
15549 (-1), $result$$Register,
15550 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15551 %}
15552 ins_pipe( pipe_slow );
15553 %}
15554
15555 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15556 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15557 %{
15558 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15559 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15560 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15561
15562 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15563 ins_encode %{
15564 __ string_indexof($str1$$Register, $str2$$Register,
15565 $cnt1$$Register, $cnt2$$Register,
15566 (-1), $result$$Register,
15567 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15568 %}
15569 ins_pipe( pipe_slow );
15570 %}
15571
15572 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15573 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15574 %{
15575 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15576 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15577 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15578
15579 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15580 ins_encode %{
15581 __ string_indexof($str1$$Register, $str2$$Register,
15582 $cnt1$$Register, $cnt2$$Register,
15583 (-1), $result$$Register,
15584 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15585 %}
15586 ins_pipe( pipe_slow );
15587 %}
15588
15589 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15590 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15591 %{
15592 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15593 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15594 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15595 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15596 ins_encode %{
15597 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15598 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15599 %}
15600 ins_pipe( pipe_slow );
15601 %}
15602
15603 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15604 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15605 %{
15606 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15607 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15608 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15609 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15610 ins_encode %{
15611 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15612 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15613 %}
15614 ins_pipe( pipe_slow );
15615 %}
15616
15617 // fast string equals
15618 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15619 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15620 %{
15621 predicate(!VM_Version::supports_avx512vlbw());
15622 match(Set result (StrEquals (Binary str1 str2) cnt));
15623 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15624
15625 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15626 ins_encode %{
15627 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15628 $cnt$$Register, $result$$Register, $tmp3$$Register,
15629 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15630 %}
15631 ins_pipe( pipe_slow );
15632 %}
15633
15634 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15635 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15636 %{
15637 predicate(VM_Version::supports_avx512vlbw());
15638 match(Set result (StrEquals (Binary str1 str2) cnt));
15639 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15640
15641 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15642 ins_encode %{
15643 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15644 $cnt$$Register, $result$$Register, $tmp3$$Register,
15645 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15646 %}
15647 ins_pipe( pipe_slow );
15648 %}
15649
15650 // fast array equals
15651 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15652 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15653 %{
15654 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15655 match(Set result (AryEq ary1 ary2));
15656 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15657
15658 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15659 ins_encode %{
15660 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15661 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15662 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15663 %}
15664 ins_pipe( pipe_slow );
15665 %}
15666
15667 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15668 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15669 %{
15670 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15671 match(Set result (AryEq ary1 ary2));
15672 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15673
15674 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15675 ins_encode %{
15676 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15677 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15678 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15679 %}
15680 ins_pipe( pipe_slow );
15681 %}
15682
15683 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15684 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15685 %{
15686 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15687 match(Set result (AryEq ary1 ary2));
15688 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15689
15690 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15691 ins_encode %{
15692 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15693 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15694 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15695 %}
15696 ins_pipe( pipe_slow );
15697 %}
15698
15699 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15700 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15701 %{
15702 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15703 match(Set result (AryEq ary1 ary2));
15704 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15705
15706 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15707 ins_encode %{
15708 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15709 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15710 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15711 %}
15712 ins_pipe( pipe_slow );
15713 %}
15714
15715 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15716 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15717 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15718 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15719 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15720 %{
15721 predicate(UseAVX >= 2);
15722 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15723 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15724 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15725 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15726 USE basic_type, KILL cr);
15727
15728 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15729 ins_encode %{
15730 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15731 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15732 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15733 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15734 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15735 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15736 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15737 %}
15738 ins_pipe( pipe_slow );
15739 %}
15740
15741 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15742 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15743 %{
15744 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15745 match(Set result (CountPositives ary1 len));
15746 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15747
15748 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15749 ins_encode %{
15750 __ count_positives($ary1$$Register, $len$$Register,
15751 $result$$Register, $tmp3$$Register,
15752 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15753 %}
15754 ins_pipe( pipe_slow );
15755 %}
15756
15757 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15758 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15759 %{
15760 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15761 match(Set result (CountPositives ary1 len));
15762 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15763
15764 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15765 ins_encode %{
15766 __ count_positives($ary1$$Register, $len$$Register,
15767 $result$$Register, $tmp3$$Register,
15768 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15769 %}
15770 ins_pipe( pipe_slow );
15771 %}
15772
15773 // fast char[] to byte[] compression
15774 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15775 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15776 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15777 match(Set result (StrCompressedCopy src (Binary dst len)));
15778 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15779 USE_KILL len, KILL tmp5, KILL cr);
15780
15781 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15782 ins_encode %{
15783 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15784 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15785 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15786 knoreg, knoreg);
15787 %}
15788 ins_pipe( pipe_slow );
15789 %}
15790
15791 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15792 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15793 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15794 match(Set result (StrCompressedCopy src (Binary dst len)));
15795 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15796 USE_KILL len, KILL tmp5, KILL cr);
15797
15798 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15799 ins_encode %{
15800 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15801 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15802 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15803 $ktmp1$$KRegister, $ktmp2$$KRegister);
15804 %}
15805 ins_pipe( pipe_slow );
15806 %}
15807 // fast byte[] to char[] inflation
15808 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15809 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15810 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15811 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15812 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15813
15814 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15815 ins_encode %{
15816 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15817 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15818 %}
15819 ins_pipe( pipe_slow );
15820 %}
15821
15822 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15823 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15824 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15825 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15826 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15827
15828 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15829 ins_encode %{
15830 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15831 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15832 %}
15833 ins_pipe( pipe_slow );
15834 %}
15835
15836 // encode char[] to byte[] in ISO_8859_1
15837 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15838 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15839 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15840 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15841 match(Set result (EncodeISOArray src (Binary dst len)));
15842 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15843
15844 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15845 ins_encode %{
15846 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15847 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15848 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15849 %}
15850 ins_pipe( pipe_slow );
15851 %}
15852
15853 // encode char[] to byte[] in ASCII
15854 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15855 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15856 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15857 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15858 match(Set result (EncodeISOArray src (Binary dst len)));
15859 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15860
15861 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15862 ins_encode %{
15863 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15864 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15865 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15866 %}
15867 ins_pipe( pipe_slow );
15868 %}
15869
15870 //----------Overflow Math Instructions-----------------------------------------
15871
15872 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15873 %{
15874 match(Set cr (OverflowAddI op1 op2));
15875 effect(DEF cr, USE_KILL op1, USE op2);
15876
15877 format %{ "addl $op1, $op2\t# overflow check int" %}
15878
15879 ins_encode %{
15880 __ addl($op1$$Register, $op2$$Register);
15881 %}
15882 ins_pipe(ialu_reg_reg);
15883 %}
15884
15885 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15886 %{
15887 match(Set cr (OverflowAddI op1 op2));
15888 effect(DEF cr, USE_KILL op1, USE op2);
15889
15890 format %{ "addl $op1, $op2\t# overflow check int" %}
15891
15892 ins_encode %{
15893 __ addl($op1$$Register, $op2$$constant);
15894 %}
15895 ins_pipe(ialu_reg_reg);
15896 %}
15897
15898 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15899 %{
15900 match(Set cr (OverflowAddL op1 op2));
15901 effect(DEF cr, USE_KILL op1, USE op2);
15902
15903 format %{ "addq $op1, $op2\t# overflow check long" %}
15904 ins_encode %{
15905 __ addq($op1$$Register, $op2$$Register);
15906 %}
15907 ins_pipe(ialu_reg_reg);
15908 %}
15909
15910 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15911 %{
15912 match(Set cr (OverflowAddL op1 op2));
15913 effect(DEF cr, USE_KILL op1, USE op2);
15914
15915 format %{ "addq $op1, $op2\t# overflow check long" %}
15916 ins_encode %{
15917 __ addq($op1$$Register, $op2$$constant);
15918 %}
15919 ins_pipe(ialu_reg_reg);
15920 %}
15921
15922 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15923 %{
15924 match(Set cr (OverflowSubI op1 op2));
15925
15926 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15927 ins_encode %{
15928 __ cmpl($op1$$Register, $op2$$Register);
15929 %}
15930 ins_pipe(ialu_reg_reg);
15931 %}
15932
15933 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15934 %{
15935 match(Set cr (OverflowSubI op1 op2));
15936
15937 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15938 ins_encode %{
15939 __ cmpl($op1$$Register, $op2$$constant);
15940 %}
15941 ins_pipe(ialu_reg_reg);
15942 %}
15943
15944 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15945 %{
15946 match(Set cr (OverflowSubL op1 op2));
15947
15948 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15949 ins_encode %{
15950 __ cmpq($op1$$Register, $op2$$Register);
15951 %}
15952 ins_pipe(ialu_reg_reg);
15953 %}
15954
15955 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15956 %{
15957 match(Set cr (OverflowSubL op1 op2));
15958
15959 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15960 ins_encode %{
15961 __ cmpq($op1$$Register, $op2$$constant);
15962 %}
15963 ins_pipe(ialu_reg_reg);
15964 %}
15965
15966 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15967 %{
15968 match(Set cr (OverflowSubI zero op2));
15969 effect(DEF cr, USE_KILL op2);
15970
15971 format %{ "negl $op2\t# overflow check int" %}
15972 ins_encode %{
15973 __ negl($op2$$Register);
15974 %}
15975 ins_pipe(ialu_reg_reg);
15976 %}
15977
15978 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15979 %{
15980 match(Set cr (OverflowSubL zero op2));
15981 effect(DEF cr, USE_KILL op2);
15982
15983 format %{ "negq $op2\t# overflow check long" %}
15984 ins_encode %{
15985 __ negq($op2$$Register);
15986 %}
15987 ins_pipe(ialu_reg_reg);
15988 %}
15989
15990 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15991 %{
15992 match(Set cr (OverflowMulI op1 op2));
15993 effect(DEF cr, USE_KILL op1, USE op2);
15994
15995 format %{ "imull $op1, $op2\t# overflow check int" %}
15996 ins_encode %{
15997 __ imull($op1$$Register, $op2$$Register);
15998 %}
15999 ins_pipe(ialu_reg_reg_alu0);
16000 %}
16001
16002 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16003 %{
16004 match(Set cr (OverflowMulI op1 op2));
16005 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16006
16007 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16008 ins_encode %{
16009 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16010 %}
16011 ins_pipe(ialu_reg_reg_alu0);
16012 %}
16013
16014 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16015 %{
16016 match(Set cr (OverflowMulL op1 op2));
16017 effect(DEF cr, USE_KILL op1, USE op2);
16018
16019 format %{ "imulq $op1, $op2\t# overflow check long" %}
16020 ins_encode %{
16021 __ imulq($op1$$Register, $op2$$Register);
16022 %}
16023 ins_pipe(ialu_reg_reg_alu0);
16024 %}
16025
16026 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16027 %{
16028 match(Set cr (OverflowMulL op1 op2));
16029 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16030
16031 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16032 ins_encode %{
16033 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16034 %}
16035 ins_pipe(ialu_reg_reg_alu0);
16036 %}
16037
16038
16039 //----------Control Flow Instructions------------------------------------------
16040 // Signed compare Instructions
16041
16042 // XXX more variants!!
16043 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16044 %{
16045 match(Set cr (CmpI op1 op2));
16046 effect(DEF cr, USE op1, USE op2);
16047
16048 format %{ "cmpl $op1, $op2" %}
16049 ins_encode %{
16050 __ cmpl($op1$$Register, $op2$$Register);
16051 %}
16052 ins_pipe(ialu_cr_reg_reg);
16053 %}
16054
16055 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16056 %{
16057 match(Set cr (CmpI op1 op2));
16058
16059 format %{ "cmpl $op1, $op2" %}
16060 ins_encode %{
16061 __ cmpl($op1$$Register, $op2$$constant);
16062 %}
16063 ins_pipe(ialu_cr_reg_imm);
16064 %}
16065
16066 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16067 %{
16068 match(Set cr (CmpI op1 (LoadI op2)));
16069
16070 ins_cost(500); // XXX
16071 format %{ "cmpl $op1, $op2" %}
16072 ins_encode %{
16073 __ cmpl($op1$$Register, $op2$$Address);
16074 %}
16075 ins_pipe(ialu_cr_reg_mem);
16076 %}
16077
16078 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16079 %{
16080 match(Set cr (CmpI src zero));
16081
16082 format %{ "testl $src, $src" %}
16083 ins_encode %{
16084 __ testl($src$$Register, $src$$Register);
16085 %}
16086 ins_pipe(ialu_cr_reg_imm);
16087 %}
16088
16089 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16090 %{
16091 match(Set cr (CmpI (AndI src con) zero));
16092
16093 format %{ "testl $src, $con" %}
16094 ins_encode %{
16095 __ testl($src$$Register, $con$$constant);
16096 %}
16097 ins_pipe(ialu_cr_reg_imm);
16098 %}
16099
16100 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16101 %{
16102 match(Set cr (CmpI (AndI src1 src2) zero));
16103
16104 format %{ "testl $src1, $src2" %}
16105 ins_encode %{
16106 __ testl($src1$$Register, $src2$$Register);
16107 %}
16108 ins_pipe(ialu_cr_reg_imm);
16109 %}
16110
16111 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16112 %{
16113 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16114
16115 format %{ "testl $src, $mem" %}
16116 ins_encode %{
16117 __ testl($src$$Register, $mem$$Address);
16118 %}
16119 ins_pipe(ialu_cr_reg_mem);
16120 %}
16121
16122 // Unsigned compare Instructions; really, same as signed except they
16123 // produce an rFlagsRegU instead of rFlagsReg.
16124 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16125 %{
16126 match(Set cr (CmpU op1 op2));
16127
16128 format %{ "cmpl $op1, $op2\t# unsigned" %}
16129 ins_encode %{
16130 __ cmpl($op1$$Register, $op2$$Register);
16131 %}
16132 ins_pipe(ialu_cr_reg_reg);
16133 %}
16134
16135 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16136 %{
16137 match(Set cr (CmpU op1 op2));
16138
16139 format %{ "cmpl $op1, $op2\t# unsigned" %}
16140 ins_encode %{
16141 __ cmpl($op1$$Register, $op2$$constant);
16142 %}
16143 ins_pipe(ialu_cr_reg_imm);
16144 %}
16145
16146 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16147 %{
16148 match(Set cr (CmpU op1 (LoadI op2)));
16149
16150 ins_cost(500); // XXX
16151 format %{ "cmpl $op1, $op2\t# unsigned" %}
16152 ins_encode %{
16153 __ cmpl($op1$$Register, $op2$$Address);
16154 %}
16155 ins_pipe(ialu_cr_reg_mem);
16156 %}
16157
16158 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16159 %{
16160 match(Set cr (CmpU src zero));
16161
16162 format %{ "testl $src, $src\t# unsigned" %}
16163 ins_encode %{
16164 __ testl($src$$Register, $src$$Register);
16165 %}
16166 ins_pipe(ialu_cr_reg_imm);
16167 %}
16168
16169 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16170 %{
16171 match(Set cr (CmpP op1 op2));
16172
16173 format %{ "cmpq $op1, $op2\t# ptr" %}
16174 ins_encode %{
16175 __ cmpq($op1$$Register, $op2$$Register);
16176 %}
16177 ins_pipe(ialu_cr_reg_reg);
16178 %}
16179
16180 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16181 %{
16182 match(Set cr (CmpP op1 (LoadP op2)));
16183 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16184
16185 ins_cost(500); // XXX
16186 format %{ "cmpq $op1, $op2\t# ptr" %}
16187 ins_encode %{
16188 __ cmpq($op1$$Register, $op2$$Address);
16189 %}
16190 ins_pipe(ialu_cr_reg_mem);
16191 %}
16192
16193 // XXX this is generalized by compP_rReg_mem???
16194 // Compare raw pointer (used in out-of-heap check).
16195 // Only works because non-oop pointers must be raw pointers
16196 // and raw pointers have no anti-dependencies.
16197 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16198 %{
16199 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16200 n->in(2)->as_Load()->barrier_data() == 0);
16201 match(Set cr (CmpP op1 (LoadP op2)));
16202
16203 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16204 ins_encode %{
16205 __ cmpq($op1$$Register, $op2$$Address);
16206 %}
16207 ins_pipe(ialu_cr_reg_mem);
16208 %}
16209
16210 // This will generate a signed flags result. This should be OK since
16211 // any compare to a zero should be eq/neq.
16212 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16213 %{
16214 match(Set cr (CmpP src zero));
16215
16216 format %{ "testq $src, $src\t# ptr" %}
16217 ins_encode %{
16218 __ testq($src$$Register, $src$$Register);
16219 %}
16220 ins_pipe(ialu_cr_reg_imm);
16221 %}
16222
16223 // This will generate a signed flags result. This should be OK since
16224 // any compare to a zero should be eq/neq.
16225 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16226 %{
16227 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16228 n->in(1)->as_Load()->barrier_data() == 0);
16229 match(Set cr (CmpP (LoadP op) zero));
16230
16231 ins_cost(500); // XXX
16232 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16233 ins_encode %{
16234 __ testq($op$$Address, 0xFFFFFFFF);
16235 %}
16236 ins_pipe(ialu_cr_reg_imm);
16237 %}
16238
16239 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16240 %{
16241 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16242 n->in(1)->as_Load()->barrier_data() == 0);
16243 match(Set cr (CmpP (LoadP mem) zero));
16244
16245 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16246 ins_encode %{
16247 __ cmpq(r12, $mem$$Address);
16248 %}
16249 ins_pipe(ialu_cr_reg_mem);
16250 %}
16251
16252 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16253 %{
16254 match(Set cr (CmpN op1 op2));
16255
16256 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16257 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16258 ins_pipe(ialu_cr_reg_reg);
16259 %}
16260
16261 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16262 %{
16263 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16264 match(Set cr (CmpN src (LoadN mem)));
16265
16266 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16267 ins_encode %{
16268 __ cmpl($src$$Register, $mem$$Address);
16269 %}
16270 ins_pipe(ialu_cr_reg_mem);
16271 %}
16272
16273 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16274 match(Set cr (CmpN op1 op2));
16275
16276 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16277 ins_encode %{
16278 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16279 %}
16280 ins_pipe(ialu_cr_reg_imm);
16281 %}
16282
16283 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16284 %{
16285 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16286 match(Set cr (CmpN src (LoadN mem)));
16287
16288 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16289 ins_encode %{
16290 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16291 %}
16292 ins_pipe(ialu_cr_reg_mem);
16293 %}
16294
16295 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16296 match(Set cr (CmpN op1 op2));
16297
16298 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16299 ins_encode %{
16300 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16301 %}
16302 ins_pipe(ialu_cr_reg_imm);
16303 %}
16304
16305 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16306 %{
16307 predicate(!UseCompactObjectHeaders);
16308 match(Set cr (CmpN src (LoadNKlass mem)));
16309
16310 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16311 ins_encode %{
16312 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16313 %}
16314 ins_pipe(ialu_cr_reg_mem);
16315 %}
16316
16317 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16318 match(Set cr (CmpN src zero));
16319
16320 format %{ "testl $src, $src\t# compressed ptr" %}
16321 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16322 ins_pipe(ialu_cr_reg_imm);
16323 %}
16324
16325 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16326 %{
16327 predicate(CompressedOops::base() != nullptr &&
16328 n->in(1)->as_Load()->barrier_data() == 0);
16329 match(Set cr (CmpN (LoadN mem) zero));
16330
16331 ins_cost(500); // XXX
16332 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16333 ins_encode %{
16334 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16335 %}
16336 ins_pipe(ialu_cr_reg_mem);
16337 %}
16338
16339 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16340 %{
16341 predicate(CompressedOops::base() == nullptr &&
16342 n->in(1)->as_Load()->barrier_data() == 0);
16343 match(Set cr (CmpN (LoadN mem) zero));
16344
16345 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16346 ins_encode %{
16347 __ cmpl(r12, $mem$$Address);
16348 %}
16349 ins_pipe(ialu_cr_reg_mem);
16350 %}
16351
16352 // Yanked all unsigned pointer compare operations.
16353 // Pointer compares are done with CmpP which is already unsigned.
16354
16355 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16356 %{
16357 match(Set cr (CmpL op1 op2));
16358
16359 format %{ "cmpq $op1, $op2" %}
16360 ins_encode %{
16361 __ cmpq($op1$$Register, $op2$$Register);
16362 %}
16363 ins_pipe(ialu_cr_reg_reg);
16364 %}
16365
16366 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16367 %{
16368 match(Set cr (CmpL op1 op2));
16369
16370 format %{ "cmpq $op1, $op2" %}
16371 ins_encode %{
16372 __ cmpq($op1$$Register, $op2$$constant);
16373 %}
16374 ins_pipe(ialu_cr_reg_imm);
16375 %}
16376
16377 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16378 %{
16379 match(Set cr (CmpL op1 (LoadL op2)));
16380
16381 format %{ "cmpq $op1, $op2" %}
16382 ins_encode %{
16383 __ cmpq($op1$$Register, $op2$$Address);
16384 %}
16385 ins_pipe(ialu_cr_reg_mem);
16386 %}
16387
16388 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16389 %{
16390 match(Set cr (CmpL src zero));
16391
16392 format %{ "testq $src, $src" %}
16393 ins_encode %{
16394 __ testq($src$$Register, $src$$Register);
16395 %}
16396 ins_pipe(ialu_cr_reg_imm);
16397 %}
16398
16399 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16400 %{
16401 match(Set cr (CmpL (AndL src con) zero));
16402
16403 format %{ "testq $src, $con\t# long" %}
16404 ins_encode %{
16405 __ testq($src$$Register, $con$$constant);
16406 %}
16407 ins_pipe(ialu_cr_reg_imm);
16408 %}
16409
16410 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16411 %{
16412 match(Set cr (CmpL (AndL src1 src2) zero));
16413
16414 format %{ "testq $src1, $src2\t# long" %}
16415 ins_encode %{
16416 __ testq($src1$$Register, $src2$$Register);
16417 %}
16418 ins_pipe(ialu_cr_reg_imm);
16419 %}
16420
16421 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16422 %{
16423 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16424
16425 format %{ "testq $src, $mem" %}
16426 ins_encode %{
16427 __ testq($src$$Register, $mem$$Address);
16428 %}
16429 ins_pipe(ialu_cr_reg_mem);
16430 %}
16431
16432 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16433 %{
16434 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16435
16436 format %{ "testq $src, $mem" %}
16437 ins_encode %{
16438 __ testq($src$$Register, $mem$$Address);
16439 %}
16440 ins_pipe(ialu_cr_reg_mem);
16441 %}
16442
16443 // Manifest a CmpU result in an integer register. Very painful.
16444 // This is the test to avoid.
16445 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16446 %{
16447 match(Set dst (CmpU3 src1 src2));
16448 effect(KILL flags);
16449
16450 ins_cost(275); // XXX
16451 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16452 "movl $dst, -1\n\t"
16453 "jb,u done\n\t"
16454 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16455 "done:" %}
16456 ins_encode %{
16457 Label done;
16458 __ cmpl($src1$$Register, $src2$$Register);
16459 __ movl($dst$$Register, -1);
16460 __ jccb(Assembler::below, done);
16461 __ setcc(Assembler::notZero, $dst$$Register);
16462 __ bind(done);
16463 %}
16464 ins_pipe(pipe_slow);
16465 %}
16466
16467 // Manifest a CmpL result in an integer register. Very painful.
16468 // This is the test to avoid.
16469 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16470 %{
16471 match(Set dst (CmpL3 src1 src2));
16472 effect(KILL flags);
16473
16474 ins_cost(275); // XXX
16475 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16476 "movl $dst, -1\n\t"
16477 "jl,s done\n\t"
16478 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16479 "done:" %}
16480 ins_encode %{
16481 Label done;
16482 __ cmpq($src1$$Register, $src2$$Register);
16483 __ movl($dst$$Register, -1);
16484 __ jccb(Assembler::less, done);
16485 __ setcc(Assembler::notZero, $dst$$Register);
16486 __ bind(done);
16487 %}
16488 ins_pipe(pipe_slow);
16489 %}
16490
16491 // Manifest a CmpUL result in an integer register. Very painful.
16492 // This is the test to avoid.
16493 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16494 %{
16495 match(Set dst (CmpUL3 src1 src2));
16496 effect(KILL flags);
16497
16498 ins_cost(275); // XXX
16499 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16500 "movl $dst, -1\n\t"
16501 "jb,u done\n\t"
16502 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16503 "done:" %}
16504 ins_encode %{
16505 Label done;
16506 __ cmpq($src1$$Register, $src2$$Register);
16507 __ movl($dst$$Register, -1);
16508 __ jccb(Assembler::below, done);
16509 __ setcc(Assembler::notZero, $dst$$Register);
16510 __ bind(done);
16511 %}
16512 ins_pipe(pipe_slow);
16513 %}
16514
16515 // Unsigned long compare Instructions; really, same as signed long except they
16516 // produce an rFlagsRegU instead of rFlagsReg.
16517 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16518 %{
16519 match(Set cr (CmpUL op1 op2));
16520
16521 format %{ "cmpq $op1, $op2\t# unsigned" %}
16522 ins_encode %{
16523 __ cmpq($op1$$Register, $op2$$Register);
16524 %}
16525 ins_pipe(ialu_cr_reg_reg);
16526 %}
16527
16528 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16529 %{
16530 match(Set cr (CmpUL op1 op2));
16531
16532 format %{ "cmpq $op1, $op2\t# unsigned" %}
16533 ins_encode %{
16534 __ cmpq($op1$$Register, $op2$$constant);
16535 %}
16536 ins_pipe(ialu_cr_reg_imm);
16537 %}
16538
16539 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16540 %{
16541 match(Set cr (CmpUL op1 (LoadL op2)));
16542
16543 format %{ "cmpq $op1, $op2\t# unsigned" %}
16544 ins_encode %{
16545 __ cmpq($op1$$Register, $op2$$Address);
16546 %}
16547 ins_pipe(ialu_cr_reg_mem);
16548 %}
16549
16550 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16551 %{
16552 match(Set cr (CmpUL src zero));
16553
16554 format %{ "testq $src, $src\t# unsigned" %}
16555 ins_encode %{
16556 __ testq($src$$Register, $src$$Register);
16557 %}
16558 ins_pipe(ialu_cr_reg_imm);
16559 %}
16560
16561 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16562 %{
16563 match(Set cr (CmpI (LoadB mem) imm));
16564
16565 ins_cost(125);
16566 format %{ "cmpb $mem, $imm" %}
16567 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16568 ins_pipe(ialu_cr_reg_mem);
16569 %}
16570
16571 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16572 %{
16573 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16574
16575 ins_cost(125);
16576 format %{ "testb $mem, $imm\t# ubyte" %}
16577 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16578 ins_pipe(ialu_cr_reg_mem);
16579 %}
16580
16581 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16582 %{
16583 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16584
16585 ins_cost(125);
16586 format %{ "testb $mem, $imm\t# byte" %}
16587 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16588 ins_pipe(ialu_cr_reg_mem);
16589 %}
16590
16591 //----------Max and Min--------------------------------------------------------
16592 // Min Instructions
16593
16594 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16595 %{
16596 predicate(!UseAPX);
16597 effect(USE_DEF dst, USE src, USE cr);
16598
16599 format %{ "cmovlgt $dst, $src\t# min" %}
16600 ins_encode %{
16601 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16602 %}
16603 ins_pipe(pipe_cmov_reg);
16604 %}
16605
16606 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16607 %{
16608 predicate(UseAPX);
16609 effect(DEF dst, USE src1, USE src2, USE cr);
16610
16611 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16612 ins_encode %{
16613 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16614 %}
16615 ins_pipe(pipe_cmov_reg);
16616 %}
16617
16618 instruct minI_rReg(rRegI dst, rRegI src)
16619 %{
16620 predicate(!UseAPX);
16621 match(Set dst (MinI dst src));
16622
16623 ins_cost(200);
16624 expand %{
16625 rFlagsReg cr;
16626 compI_rReg(cr, dst, src);
16627 cmovI_reg_g(dst, src, cr);
16628 %}
16629 %}
16630
16631 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16632 %{
16633 predicate(UseAPX);
16634 match(Set dst (MinI src1 src2));
16635 effect(DEF dst, USE src1, USE src2);
16636 flag(PD::Flag_ndd_demotable);
16637
16638 ins_cost(200);
16639 expand %{
16640 rFlagsReg cr;
16641 compI_rReg(cr, src1, src2);
16642 cmovI_reg_g_ndd(dst, src1, src2, cr);
16643 %}
16644 %}
16645
16646 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16647 %{
16648 predicate(!UseAPX);
16649 effect(USE_DEF dst, USE src, USE cr);
16650
16651 format %{ "cmovllt $dst, $src\t# max" %}
16652 ins_encode %{
16653 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16654 %}
16655 ins_pipe(pipe_cmov_reg);
16656 %}
16657
16658 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16659 %{
16660 predicate(UseAPX);
16661 effect(DEF dst, USE src1, USE src2, USE cr);
16662
16663 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16664 ins_encode %{
16665 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16666 %}
16667 ins_pipe(pipe_cmov_reg);
16668 %}
16669
16670 instruct maxI_rReg(rRegI dst, rRegI src)
16671 %{
16672 predicate(!UseAPX);
16673 match(Set dst (MaxI dst src));
16674
16675 ins_cost(200);
16676 expand %{
16677 rFlagsReg cr;
16678 compI_rReg(cr, dst, src);
16679 cmovI_reg_l(dst, src, cr);
16680 %}
16681 %}
16682
16683 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16684 %{
16685 predicate(UseAPX);
16686 match(Set dst (MaxI src1 src2));
16687 effect(DEF dst, USE src1, USE src2);
16688 flag(PD::Flag_ndd_demotable);
16689
16690 ins_cost(200);
16691 expand %{
16692 rFlagsReg cr;
16693 compI_rReg(cr, src1, src2);
16694 cmovI_reg_l_ndd(dst, src1, src2, cr);
16695 %}
16696 %}
16697
16698 // ============================================================================
16699 // Branch Instructions
16700
16701 // Jump Direct - Label defines a relative address from JMP+1
16702 instruct jmpDir(label labl)
16703 %{
16704 match(Goto);
16705 effect(USE labl);
16706
16707 ins_cost(300);
16708 format %{ "jmp $labl" %}
16709 size(5);
16710 ins_encode %{
16711 Label* L = $labl$$label;
16712 __ jmp(*L, false); // Always long jump
16713 %}
16714 ins_pipe(pipe_jmp);
16715 %}
16716
16717 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16718 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16719 %{
16720 match(If cop cr);
16721 effect(USE labl);
16722
16723 ins_cost(300);
16724 format %{ "j$cop $labl" %}
16725 size(6);
16726 ins_encode %{
16727 Label* L = $labl$$label;
16728 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16729 %}
16730 ins_pipe(pipe_jcc);
16731 %}
16732
16733 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16734 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16735 %{
16736 match(CountedLoopEnd cop cr);
16737 effect(USE labl);
16738
16739 ins_cost(300);
16740 format %{ "j$cop $labl\t# loop end" %}
16741 size(6);
16742 ins_encode %{
16743 Label* L = $labl$$label;
16744 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16745 %}
16746 ins_pipe(pipe_jcc);
16747 %}
16748
16749 // Jump Direct Conditional - using unsigned comparison
16750 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16751 match(If cop cmp);
16752 effect(USE labl);
16753
16754 ins_cost(300);
16755 format %{ "j$cop,u $labl" %}
16756 size(6);
16757 ins_encode %{
16758 Label* L = $labl$$label;
16759 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16760 %}
16761 ins_pipe(pipe_jcc);
16762 %}
16763
16764 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16765 match(If cop cmp);
16766 effect(USE labl);
16767
16768 ins_cost(200);
16769 format %{ "j$cop,u $labl" %}
16770 size(6);
16771 ins_encode %{
16772 Label* L = $labl$$label;
16773 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16774 %}
16775 ins_pipe(pipe_jcc);
16776 %}
16777
16778 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16779 match(If cop cmp);
16780 effect(USE labl);
16781
16782 ins_cost(200);
16783 format %{ $$template
16784 if ($cop$$cmpcode == Assembler::notEqual) {
16785 $$emit$$"jp,u $labl\n\t"
16786 $$emit$$"j$cop,u $labl"
16787 } else {
16788 $$emit$$"jp,u done\n\t"
16789 $$emit$$"j$cop,u $labl\n\t"
16790 $$emit$$"done:"
16791 }
16792 %}
16793 ins_encode %{
16794 Label* l = $labl$$label;
16795 if ($cop$$cmpcode == Assembler::notEqual) {
16796 __ jcc(Assembler::parity, *l, false);
16797 __ jcc(Assembler::notEqual, *l, false);
16798 } else if ($cop$$cmpcode == Assembler::equal) {
16799 Label done;
16800 __ jccb(Assembler::parity, done);
16801 __ jcc(Assembler::equal, *l, false);
16802 __ bind(done);
16803 } else {
16804 ShouldNotReachHere();
16805 }
16806 %}
16807 ins_pipe(pipe_jcc);
16808 %}
16809
16810 // ============================================================================
16811 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16812 // superklass array for an instance of the superklass. Set a hidden
16813 // internal cache on a hit (cache is checked with exposed code in
16814 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16815 // encoding ALSO sets flags.
16816
16817 instruct partialSubtypeCheck(rdi_RegP result,
16818 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16819 rFlagsReg cr)
16820 %{
16821 match(Set result (PartialSubtypeCheck sub super));
16822 predicate(!UseSecondarySupersTable);
16823 effect(KILL rcx, KILL cr);
16824
16825 ins_cost(1100); // slightly larger than the next version
16826 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16827 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16828 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16829 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16830 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16831 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16832 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16833 "miss:\t" %}
16834
16835 ins_encode %{
16836 Label miss;
16837 // NB: Callers may assume that, when $result is a valid register,
16838 // check_klass_subtype_slow_path_linear sets it to a nonzero
16839 // value.
16840 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16841 $rcx$$Register, $result$$Register,
16842 nullptr, &miss,
16843 /*set_cond_codes:*/ true);
16844 __ xorptr($result$$Register, $result$$Register);
16845 __ bind(miss);
16846 %}
16847
16848 ins_pipe(pipe_slow);
16849 %}
16850
16851 // ============================================================================
16852 // Two versions of hashtable-based partialSubtypeCheck, both used when
16853 // we need to search for a super class in the secondary supers array.
16854 // The first is used when we don't know _a priori_ the class being
16855 // searched for. The second, far more common, is used when we do know:
16856 // this is used for instanceof, checkcast, and any case where C2 can
16857 // determine it by constant propagation.
16858
16859 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16860 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16861 rFlagsReg cr)
16862 %{
16863 match(Set result (PartialSubtypeCheck sub super));
16864 predicate(UseSecondarySupersTable);
16865 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16866
16867 ins_cost(1000);
16868 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16869
16870 ins_encode %{
16871 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16872 $temp3$$Register, $temp4$$Register, $result$$Register);
16873 %}
16874
16875 ins_pipe(pipe_slow);
16876 %}
16877
16878 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16879 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16880 rFlagsReg cr)
16881 %{
16882 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16883 predicate(UseSecondarySupersTable);
16884 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16885
16886 ins_cost(700); // smaller than the next version
16887 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16888
16889 ins_encode %{
16890 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16891 if (InlineSecondarySupersTest) {
16892 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16893 $temp3$$Register, $temp4$$Register, $result$$Register,
16894 super_klass_slot);
16895 } else {
16896 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16897 }
16898 %}
16899
16900 ins_pipe(pipe_slow);
16901 %}
16902
16903 // ============================================================================
16904 // Branch Instructions -- short offset versions
16905 //
16906 // These instructions are used to replace jumps of a long offset (the default
16907 // match) with jumps of a shorter offset. These instructions are all tagged
16908 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16909 // match rules in general matching. Instead, the ADLC generates a conversion
16910 // method in the MachNode which can be used to do in-place replacement of the
16911 // long variant with the shorter variant. The compiler will determine if a
16912 // branch can be taken by the is_short_branch_offset() predicate in the machine
16913 // specific code section of the file.
16914
16915 // Jump Direct - Label defines a relative address from JMP+1
16916 instruct jmpDir_short(label labl) %{
16917 match(Goto);
16918 effect(USE labl);
16919
16920 ins_cost(300);
16921 format %{ "jmp,s $labl" %}
16922 size(2);
16923 ins_encode %{
16924 Label* L = $labl$$label;
16925 __ jmpb(*L);
16926 %}
16927 ins_pipe(pipe_jmp);
16928 ins_short_branch(1);
16929 %}
16930
16931 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16932 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16933 match(If cop cr);
16934 effect(USE labl);
16935
16936 ins_cost(300);
16937 format %{ "j$cop,s $labl" %}
16938 size(2);
16939 ins_encode %{
16940 Label* L = $labl$$label;
16941 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16942 %}
16943 ins_pipe(pipe_jcc);
16944 ins_short_branch(1);
16945 %}
16946
16947 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16948 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16949 match(CountedLoopEnd cop cr);
16950 effect(USE labl);
16951
16952 ins_cost(300);
16953 format %{ "j$cop,s $labl\t# loop end" %}
16954 size(2);
16955 ins_encode %{
16956 Label* L = $labl$$label;
16957 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16958 %}
16959 ins_pipe(pipe_jcc);
16960 ins_short_branch(1);
16961 %}
16962
16963 // Jump Direct Conditional - using unsigned comparison
16964 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16965 match(If cop cmp);
16966 effect(USE labl);
16967
16968 ins_cost(300);
16969 format %{ "j$cop,us $labl" %}
16970 size(2);
16971 ins_encode %{
16972 Label* L = $labl$$label;
16973 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16974 %}
16975 ins_pipe(pipe_jcc);
16976 ins_short_branch(1);
16977 %}
16978
16979 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16980 match(If cop cmp);
16981 effect(USE labl);
16982
16983 ins_cost(300);
16984 format %{ "j$cop,us $labl" %}
16985 size(2);
16986 ins_encode %{
16987 Label* L = $labl$$label;
16988 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16989 %}
16990 ins_pipe(pipe_jcc);
16991 ins_short_branch(1);
16992 %}
16993
16994 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16995 match(If cop cmp);
16996 effect(USE labl);
16997
16998 ins_cost(300);
16999 format %{ $$template
17000 if ($cop$$cmpcode == Assembler::notEqual) {
17001 $$emit$$"jp,u,s $labl\n\t"
17002 $$emit$$"j$cop,u,s $labl"
17003 } else {
17004 $$emit$$"jp,u,s done\n\t"
17005 $$emit$$"j$cop,u,s $labl\n\t"
17006 $$emit$$"done:"
17007 }
17008 %}
17009 size(4);
17010 ins_encode %{
17011 Label* l = $labl$$label;
17012 if ($cop$$cmpcode == Assembler::notEqual) {
17013 __ jccb(Assembler::parity, *l);
17014 __ jccb(Assembler::notEqual, *l);
17015 } else if ($cop$$cmpcode == Assembler::equal) {
17016 Label done;
17017 __ jccb(Assembler::parity, done);
17018 __ jccb(Assembler::equal, *l);
17019 __ bind(done);
17020 } else {
17021 ShouldNotReachHere();
17022 }
17023 %}
17024 ins_pipe(pipe_jcc);
17025 ins_short_branch(1);
17026 %}
17027
17028 // ============================================================================
17029 // inlined locking and unlocking
17030
17031 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17032 match(Set cr (FastLock object box));
17033 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17034 ins_cost(300);
17035 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17036 ins_encode %{
17037 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17038 %}
17039 ins_pipe(pipe_slow);
17040 %}
17041
17042 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17043 match(Set cr (FastUnlock object rax_reg));
17044 effect(TEMP tmp, USE_KILL rax_reg);
17045 ins_cost(300);
17046 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17047 ins_encode %{
17048 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17049 %}
17050 ins_pipe(pipe_slow);
17051 %}
17052
17053
17054 // ============================================================================
17055 // Safepoint Instructions
17056 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17057 %{
17058 match(SafePoint poll);
17059 effect(KILL cr, USE poll);
17060
17061 format %{ "testl rax, [$poll]\t"
17062 "# Safepoint: poll for GC" %}
17063 ins_cost(125);
17064 ins_encode %{
17065 __ relocate(relocInfo::poll_type);
17066 address pre_pc = __ pc();
17067 __ testl(rax, Address($poll$$Register, 0));
17068 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17069 %}
17070 ins_pipe(ialu_reg_mem);
17071 %}
17072
17073 instruct mask_all_evexL(kReg dst, rRegL src) %{
17074 match(Set dst (MaskAll src));
17075 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17076 ins_encode %{
17077 int mask_len = Matcher::vector_length(this);
17078 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17079 %}
17080 ins_pipe( pipe_slow );
17081 %}
17082
17083 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17084 predicate(Matcher::vector_length(n) > 32);
17085 match(Set dst (MaskAll src));
17086 effect(TEMP tmp);
17087 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17088 ins_encode %{
17089 int mask_len = Matcher::vector_length(this);
17090 __ movslq($tmp$$Register, $src$$Register);
17091 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17092 %}
17093 ins_pipe( pipe_slow );
17094 %}
17095
17096 // ============================================================================
17097 // Procedure Call/Return Instructions
17098 // Call Java Static Instruction
17099 // Note: If this code changes, the corresponding ret_addr_offset() and
17100 // compute_padding() functions will have to be adjusted.
17101 instruct CallStaticJavaDirect(method meth) %{
17102 match(CallStaticJava);
17103 effect(USE meth);
17104
17105 ins_cost(300);
17106 format %{ "call,static " %}
17107 opcode(0xE8); /* E8 cd */
17108 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17109 ins_pipe(pipe_slow);
17110 ins_alignment(4);
17111 %}
17112
17113 // Call Java Dynamic Instruction
17114 // Note: If this code changes, the corresponding ret_addr_offset() and
17115 // compute_padding() functions will have to be adjusted.
17116 instruct CallDynamicJavaDirect(method meth)
17117 %{
17118 match(CallDynamicJava);
17119 effect(USE meth);
17120
17121 ins_cost(300);
17122 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17123 "call,dynamic " %}
17124 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17125 ins_pipe(pipe_slow);
17126 ins_alignment(4);
17127 %}
17128
17129 // Call Runtime Instruction
17130 instruct CallRuntimeDirect(method meth)
17131 %{
17132 match(CallRuntime);
17133 effect(USE meth);
17134
17135 ins_cost(300);
17136 format %{ "call,runtime " %}
17137 ins_encode(clear_avx, Java_To_Runtime(meth));
17138 ins_pipe(pipe_slow);
17139 %}
17140
17141 // Call runtime without safepoint
17142 instruct CallLeafDirect(method meth)
17143 %{
17144 match(CallLeaf);
17145 effect(USE meth);
17146
17147 ins_cost(300);
17148 format %{ "call_leaf,runtime " %}
17149 ins_encode(clear_avx, Java_To_Runtime(meth));
17150 ins_pipe(pipe_slow);
17151 %}
17152
17153 // Call runtime without safepoint and with vector arguments
17154 instruct CallLeafDirectVector(method meth)
17155 %{
17156 match(CallLeafVector);
17157 effect(USE meth);
17158
17159 ins_cost(300);
17160 format %{ "call_leaf,vector " %}
17161 ins_encode(Java_To_Runtime(meth));
17162 ins_pipe(pipe_slow);
17163 %}
17164
17165 // Call runtime without safepoint
17166 instruct CallLeafNoFPDirect(method meth)
17167 %{
17168 match(CallLeafNoFP);
17169 effect(USE meth);
17170
17171 ins_cost(300);
17172 format %{ "call_leaf_nofp,runtime " %}
17173 ins_encode(clear_avx, Java_To_Runtime(meth));
17174 ins_pipe(pipe_slow);
17175 %}
17176
17177 // Return Instruction
17178 // Remove the return address & jump to it.
17179 // Notice: We always emit a nop after a ret to make sure there is room
17180 // for safepoint patching
17181 instruct Ret()
17182 %{
17183 match(Return);
17184
17185 format %{ "ret" %}
17186 ins_encode %{
17187 __ ret(0);
17188 %}
17189 ins_pipe(pipe_jmp);
17190 %}
17191
17192 // Tail Call; Jump from runtime stub to Java code.
17193 // Also known as an 'interprocedural jump'.
17194 // Target of jump will eventually return to caller.
17195 // TailJump below removes the return address.
17196 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17197 // emitted just above the TailCall which has reset rbp to the caller state.
17198 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17199 %{
17200 match(TailCall jump_target method_ptr);
17201
17202 ins_cost(300);
17203 format %{ "jmp $jump_target\t# rbx holds method" %}
17204 ins_encode %{
17205 __ jmp($jump_target$$Register);
17206 %}
17207 ins_pipe(pipe_jmp);
17208 %}
17209
17210 // Tail Jump; remove the return address; jump to target.
17211 // TailCall above leaves the return address around.
17212 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17213 %{
17214 match(TailJump jump_target ex_oop);
17215
17216 ins_cost(300);
17217 format %{ "popq rdx\t# pop return address\n\t"
17218 "jmp $jump_target" %}
17219 ins_encode %{
17220 __ popq(as_Register(RDX_enc));
17221 __ jmp($jump_target$$Register);
17222 %}
17223 ins_pipe(pipe_jmp);
17224 %}
17225
17226 // Forward exception.
17227 instruct ForwardExceptionjmp()
17228 %{
17229 match(ForwardException);
17230
17231 format %{ "jmp forward_exception_stub" %}
17232 ins_encode %{
17233 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17234 %}
17235 ins_pipe(pipe_jmp);
17236 %}
17237
17238 // Create exception oop: created by stack-crawling runtime code.
17239 // Created exception is now available to this handler, and is setup
17240 // just prior to jumping to this handler. No code emitted.
17241 instruct CreateException(rax_RegP ex_oop)
17242 %{
17243 match(Set ex_oop (CreateEx));
17244
17245 size(0);
17246 // use the following format syntax
17247 format %{ "# exception oop is in rax; no code emitted" %}
17248 ins_encode();
17249 ins_pipe(empty);
17250 %}
17251
17252 // Rethrow exception:
17253 // The exception oop will come in the first argument position.
17254 // Then JUMP (not call) to the rethrow stub code.
17255 instruct RethrowException()
17256 %{
17257 match(Rethrow);
17258
17259 // use the following format syntax
17260 format %{ "jmp rethrow_stub" %}
17261 ins_encode %{
17262 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17263 %}
17264 ins_pipe(pipe_jmp);
17265 %}
17266
17267 // ============================================================================
17268 // This name is KNOWN by the ADLC and cannot be changed.
17269 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17270 // for this guy.
17271 instruct tlsLoadP(r15_RegP dst) %{
17272 match(Set dst (ThreadLocal));
17273 effect(DEF dst);
17274
17275 size(0);
17276 format %{ "# TLS is in R15" %}
17277 ins_encode( /*empty encoding*/ );
17278 ins_pipe(ialu_reg_reg);
17279 %}
17280
17281 instruct addF_reg(regF dst, regF src) %{
17282 predicate(UseAVX == 0);
17283 match(Set dst (AddF dst src));
17284
17285 format %{ "addss $dst, $src" %}
17286 ins_cost(150);
17287 ins_encode %{
17288 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17289 %}
17290 ins_pipe(pipe_slow);
17291 %}
17292
17293 instruct addF_mem(regF dst, memory src) %{
17294 predicate(UseAVX == 0);
17295 match(Set dst (AddF dst (LoadF src)));
17296
17297 format %{ "addss $dst, $src" %}
17298 ins_cost(150);
17299 ins_encode %{
17300 __ addss($dst$$XMMRegister, $src$$Address);
17301 %}
17302 ins_pipe(pipe_slow);
17303 %}
17304
17305 instruct addF_imm(regF dst, immF con) %{
17306 predicate(UseAVX == 0);
17307 match(Set dst (AddF dst con));
17308 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17309 ins_cost(150);
17310 ins_encode %{
17311 __ addss($dst$$XMMRegister, $constantaddress($con));
17312 %}
17313 ins_pipe(pipe_slow);
17314 %}
17315
17316 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17317 predicate(UseAVX > 0);
17318 match(Set dst (AddF src1 src2));
17319
17320 format %{ "vaddss $dst, $src1, $src2" %}
17321 ins_cost(150);
17322 ins_encode %{
17323 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17324 %}
17325 ins_pipe(pipe_slow);
17326 %}
17327
17328 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17329 predicate(UseAVX > 0);
17330 match(Set dst (AddF src1 (LoadF src2)));
17331
17332 format %{ "vaddss $dst, $src1, $src2" %}
17333 ins_cost(150);
17334 ins_encode %{
17335 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17336 %}
17337 ins_pipe(pipe_slow);
17338 %}
17339
17340 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17341 predicate(UseAVX > 0);
17342 match(Set dst (AddF src con));
17343
17344 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17345 ins_cost(150);
17346 ins_encode %{
17347 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17348 %}
17349 ins_pipe(pipe_slow);
17350 %}
17351
17352 instruct addD_reg(regD dst, regD src) %{
17353 predicate(UseAVX == 0);
17354 match(Set dst (AddD dst src));
17355
17356 format %{ "addsd $dst, $src" %}
17357 ins_cost(150);
17358 ins_encode %{
17359 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17360 %}
17361 ins_pipe(pipe_slow);
17362 %}
17363
17364 instruct addD_mem(regD dst, memory src) %{
17365 predicate(UseAVX == 0);
17366 match(Set dst (AddD dst (LoadD src)));
17367
17368 format %{ "addsd $dst, $src" %}
17369 ins_cost(150);
17370 ins_encode %{
17371 __ addsd($dst$$XMMRegister, $src$$Address);
17372 %}
17373 ins_pipe(pipe_slow);
17374 %}
17375
17376 instruct addD_imm(regD dst, immD con) %{
17377 predicate(UseAVX == 0);
17378 match(Set dst (AddD dst con));
17379 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17380 ins_cost(150);
17381 ins_encode %{
17382 __ addsd($dst$$XMMRegister, $constantaddress($con));
17383 %}
17384 ins_pipe(pipe_slow);
17385 %}
17386
17387 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17388 predicate(UseAVX > 0);
17389 match(Set dst (AddD src1 src2));
17390
17391 format %{ "vaddsd $dst, $src1, $src2" %}
17392 ins_cost(150);
17393 ins_encode %{
17394 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17395 %}
17396 ins_pipe(pipe_slow);
17397 %}
17398
17399 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17400 predicate(UseAVX > 0);
17401 match(Set dst (AddD src1 (LoadD src2)));
17402
17403 format %{ "vaddsd $dst, $src1, $src2" %}
17404 ins_cost(150);
17405 ins_encode %{
17406 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17407 %}
17408 ins_pipe(pipe_slow);
17409 %}
17410
17411 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17412 predicate(UseAVX > 0);
17413 match(Set dst (AddD src con));
17414
17415 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17416 ins_cost(150);
17417 ins_encode %{
17418 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17419 %}
17420 ins_pipe(pipe_slow);
17421 %}
17422
17423 instruct subF_reg(regF dst, regF src) %{
17424 predicate(UseAVX == 0);
17425 match(Set dst (SubF dst src));
17426
17427 format %{ "subss $dst, $src" %}
17428 ins_cost(150);
17429 ins_encode %{
17430 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17431 %}
17432 ins_pipe(pipe_slow);
17433 %}
17434
17435 instruct subF_mem(regF dst, memory src) %{
17436 predicate(UseAVX == 0);
17437 match(Set dst (SubF dst (LoadF src)));
17438
17439 format %{ "subss $dst, $src" %}
17440 ins_cost(150);
17441 ins_encode %{
17442 __ subss($dst$$XMMRegister, $src$$Address);
17443 %}
17444 ins_pipe(pipe_slow);
17445 %}
17446
17447 instruct subF_imm(regF dst, immF con) %{
17448 predicate(UseAVX == 0);
17449 match(Set dst (SubF dst con));
17450 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17451 ins_cost(150);
17452 ins_encode %{
17453 __ subss($dst$$XMMRegister, $constantaddress($con));
17454 %}
17455 ins_pipe(pipe_slow);
17456 %}
17457
17458 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17459 predicate(UseAVX > 0);
17460 match(Set dst (SubF src1 src2));
17461
17462 format %{ "vsubss $dst, $src1, $src2" %}
17463 ins_cost(150);
17464 ins_encode %{
17465 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17466 %}
17467 ins_pipe(pipe_slow);
17468 %}
17469
17470 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17471 predicate(UseAVX > 0);
17472 match(Set dst (SubF src1 (LoadF src2)));
17473
17474 format %{ "vsubss $dst, $src1, $src2" %}
17475 ins_cost(150);
17476 ins_encode %{
17477 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17478 %}
17479 ins_pipe(pipe_slow);
17480 %}
17481
17482 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17483 predicate(UseAVX > 0);
17484 match(Set dst (SubF src con));
17485
17486 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17487 ins_cost(150);
17488 ins_encode %{
17489 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17490 %}
17491 ins_pipe(pipe_slow);
17492 %}
17493
17494 instruct subD_reg(regD dst, regD src) %{
17495 predicate(UseAVX == 0);
17496 match(Set dst (SubD dst src));
17497
17498 format %{ "subsd $dst, $src" %}
17499 ins_cost(150);
17500 ins_encode %{
17501 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17502 %}
17503 ins_pipe(pipe_slow);
17504 %}
17505
17506 instruct subD_mem(regD dst, memory src) %{
17507 predicate(UseAVX == 0);
17508 match(Set dst (SubD dst (LoadD src)));
17509
17510 format %{ "subsd $dst, $src" %}
17511 ins_cost(150);
17512 ins_encode %{
17513 __ subsd($dst$$XMMRegister, $src$$Address);
17514 %}
17515 ins_pipe(pipe_slow);
17516 %}
17517
17518 instruct subD_imm(regD dst, immD con) %{
17519 predicate(UseAVX == 0);
17520 match(Set dst (SubD dst con));
17521 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17522 ins_cost(150);
17523 ins_encode %{
17524 __ subsd($dst$$XMMRegister, $constantaddress($con));
17525 %}
17526 ins_pipe(pipe_slow);
17527 %}
17528
17529 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17530 predicate(UseAVX > 0);
17531 match(Set dst (SubD src1 src2));
17532
17533 format %{ "vsubsd $dst, $src1, $src2" %}
17534 ins_cost(150);
17535 ins_encode %{
17536 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17537 %}
17538 ins_pipe(pipe_slow);
17539 %}
17540
17541 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17542 predicate(UseAVX > 0);
17543 match(Set dst (SubD src1 (LoadD src2)));
17544
17545 format %{ "vsubsd $dst, $src1, $src2" %}
17546 ins_cost(150);
17547 ins_encode %{
17548 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17549 %}
17550 ins_pipe(pipe_slow);
17551 %}
17552
17553 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17554 predicate(UseAVX > 0);
17555 match(Set dst (SubD src con));
17556
17557 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17558 ins_cost(150);
17559 ins_encode %{
17560 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17561 %}
17562 ins_pipe(pipe_slow);
17563 %}
17564
17565 instruct mulF_reg(regF dst, regF src) %{
17566 predicate(UseAVX == 0);
17567 match(Set dst (MulF dst src));
17568
17569 format %{ "mulss $dst, $src" %}
17570 ins_cost(150);
17571 ins_encode %{
17572 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17573 %}
17574 ins_pipe(pipe_slow);
17575 %}
17576
17577 instruct mulF_mem(regF dst, memory src) %{
17578 predicate(UseAVX == 0);
17579 match(Set dst (MulF dst (LoadF src)));
17580
17581 format %{ "mulss $dst, $src" %}
17582 ins_cost(150);
17583 ins_encode %{
17584 __ mulss($dst$$XMMRegister, $src$$Address);
17585 %}
17586 ins_pipe(pipe_slow);
17587 %}
17588
17589 instruct mulF_imm(regF dst, immF con) %{
17590 predicate(UseAVX == 0);
17591 match(Set dst (MulF dst con));
17592 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17593 ins_cost(150);
17594 ins_encode %{
17595 __ mulss($dst$$XMMRegister, $constantaddress($con));
17596 %}
17597 ins_pipe(pipe_slow);
17598 %}
17599
17600 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17601 predicate(UseAVX > 0);
17602 match(Set dst (MulF src1 src2));
17603
17604 format %{ "vmulss $dst, $src1, $src2" %}
17605 ins_cost(150);
17606 ins_encode %{
17607 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17608 %}
17609 ins_pipe(pipe_slow);
17610 %}
17611
17612 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17613 predicate(UseAVX > 0);
17614 match(Set dst (MulF src1 (LoadF src2)));
17615
17616 format %{ "vmulss $dst, $src1, $src2" %}
17617 ins_cost(150);
17618 ins_encode %{
17619 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17620 %}
17621 ins_pipe(pipe_slow);
17622 %}
17623
17624 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17625 predicate(UseAVX > 0);
17626 match(Set dst (MulF src con));
17627
17628 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17629 ins_cost(150);
17630 ins_encode %{
17631 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17632 %}
17633 ins_pipe(pipe_slow);
17634 %}
17635
17636 instruct mulD_reg(regD dst, regD src) %{
17637 predicate(UseAVX == 0);
17638 match(Set dst (MulD dst src));
17639
17640 format %{ "mulsd $dst, $src" %}
17641 ins_cost(150);
17642 ins_encode %{
17643 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17644 %}
17645 ins_pipe(pipe_slow);
17646 %}
17647
17648 instruct mulD_mem(regD dst, memory src) %{
17649 predicate(UseAVX == 0);
17650 match(Set dst (MulD dst (LoadD src)));
17651
17652 format %{ "mulsd $dst, $src" %}
17653 ins_cost(150);
17654 ins_encode %{
17655 __ mulsd($dst$$XMMRegister, $src$$Address);
17656 %}
17657 ins_pipe(pipe_slow);
17658 %}
17659
17660 instruct mulD_imm(regD dst, immD con) %{
17661 predicate(UseAVX == 0);
17662 match(Set dst (MulD dst con));
17663 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17664 ins_cost(150);
17665 ins_encode %{
17666 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17667 %}
17668 ins_pipe(pipe_slow);
17669 %}
17670
17671 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17672 predicate(UseAVX > 0);
17673 match(Set dst (MulD src1 src2));
17674
17675 format %{ "vmulsd $dst, $src1, $src2" %}
17676 ins_cost(150);
17677 ins_encode %{
17678 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17679 %}
17680 ins_pipe(pipe_slow);
17681 %}
17682
17683 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17684 predicate(UseAVX > 0);
17685 match(Set dst (MulD src1 (LoadD src2)));
17686
17687 format %{ "vmulsd $dst, $src1, $src2" %}
17688 ins_cost(150);
17689 ins_encode %{
17690 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17691 %}
17692 ins_pipe(pipe_slow);
17693 %}
17694
17695 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17696 predicate(UseAVX > 0);
17697 match(Set dst (MulD src con));
17698
17699 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17700 ins_cost(150);
17701 ins_encode %{
17702 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17703 %}
17704 ins_pipe(pipe_slow);
17705 %}
17706
17707 instruct divF_reg(regF dst, regF src) %{
17708 predicate(UseAVX == 0);
17709 match(Set dst (DivF dst src));
17710
17711 format %{ "divss $dst, $src" %}
17712 ins_cost(150);
17713 ins_encode %{
17714 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17715 %}
17716 ins_pipe(pipe_slow);
17717 %}
17718
17719 instruct divF_mem(regF dst, memory src) %{
17720 predicate(UseAVX == 0);
17721 match(Set dst (DivF dst (LoadF src)));
17722
17723 format %{ "divss $dst, $src" %}
17724 ins_cost(150);
17725 ins_encode %{
17726 __ divss($dst$$XMMRegister, $src$$Address);
17727 %}
17728 ins_pipe(pipe_slow);
17729 %}
17730
17731 instruct divF_imm(regF dst, immF con) %{
17732 predicate(UseAVX == 0);
17733 match(Set dst (DivF dst con));
17734 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17735 ins_cost(150);
17736 ins_encode %{
17737 __ divss($dst$$XMMRegister, $constantaddress($con));
17738 %}
17739 ins_pipe(pipe_slow);
17740 %}
17741
17742 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17743 predicate(UseAVX > 0);
17744 match(Set dst (DivF src1 src2));
17745
17746 format %{ "vdivss $dst, $src1, $src2" %}
17747 ins_cost(150);
17748 ins_encode %{
17749 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17750 %}
17751 ins_pipe(pipe_slow);
17752 %}
17753
17754 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17755 predicate(UseAVX > 0);
17756 match(Set dst (DivF src1 (LoadF src2)));
17757
17758 format %{ "vdivss $dst, $src1, $src2" %}
17759 ins_cost(150);
17760 ins_encode %{
17761 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17762 %}
17763 ins_pipe(pipe_slow);
17764 %}
17765
17766 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17767 predicate(UseAVX > 0);
17768 match(Set dst (DivF src con));
17769
17770 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17771 ins_cost(150);
17772 ins_encode %{
17773 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17774 %}
17775 ins_pipe(pipe_slow);
17776 %}
17777
17778 instruct divD_reg(regD dst, regD src) %{
17779 predicate(UseAVX == 0);
17780 match(Set dst (DivD dst src));
17781
17782 format %{ "divsd $dst, $src" %}
17783 ins_cost(150);
17784 ins_encode %{
17785 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17786 %}
17787 ins_pipe(pipe_slow);
17788 %}
17789
17790 instruct divD_mem(regD dst, memory src) %{
17791 predicate(UseAVX == 0);
17792 match(Set dst (DivD dst (LoadD src)));
17793
17794 format %{ "divsd $dst, $src" %}
17795 ins_cost(150);
17796 ins_encode %{
17797 __ divsd($dst$$XMMRegister, $src$$Address);
17798 %}
17799 ins_pipe(pipe_slow);
17800 %}
17801
17802 instruct divD_imm(regD dst, immD con) %{
17803 predicate(UseAVX == 0);
17804 match(Set dst (DivD dst con));
17805 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17806 ins_cost(150);
17807 ins_encode %{
17808 __ divsd($dst$$XMMRegister, $constantaddress($con));
17809 %}
17810 ins_pipe(pipe_slow);
17811 %}
17812
17813 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17814 predicate(UseAVX > 0);
17815 match(Set dst (DivD src1 src2));
17816
17817 format %{ "vdivsd $dst, $src1, $src2" %}
17818 ins_cost(150);
17819 ins_encode %{
17820 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17821 %}
17822 ins_pipe(pipe_slow);
17823 %}
17824
17825 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17826 predicate(UseAVX > 0);
17827 match(Set dst (DivD src1 (LoadD src2)));
17828
17829 format %{ "vdivsd $dst, $src1, $src2" %}
17830 ins_cost(150);
17831 ins_encode %{
17832 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17833 %}
17834 ins_pipe(pipe_slow);
17835 %}
17836
17837 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17838 predicate(UseAVX > 0);
17839 match(Set dst (DivD src con));
17840
17841 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17842 ins_cost(150);
17843 ins_encode %{
17844 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17845 %}
17846 ins_pipe(pipe_slow);
17847 %}
17848
17849 instruct absF_reg(regF dst) %{
17850 predicate(UseAVX == 0);
17851 match(Set dst (AbsF dst));
17852 ins_cost(150);
17853 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17854 ins_encode %{
17855 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17856 %}
17857 ins_pipe(pipe_slow);
17858 %}
17859
17860 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17861 predicate(UseAVX > 0);
17862 match(Set dst (AbsF src));
17863 ins_cost(150);
17864 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17865 ins_encode %{
17866 int vlen_enc = Assembler::AVX_128bit;
17867 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17868 ExternalAddress(float_signmask()), vlen_enc);
17869 %}
17870 ins_pipe(pipe_slow);
17871 %}
17872
17873 instruct absD_reg(regD dst) %{
17874 predicate(UseAVX == 0);
17875 match(Set dst (AbsD dst));
17876 ins_cost(150);
17877 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17878 "# abs double by sign masking" %}
17879 ins_encode %{
17880 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17881 %}
17882 ins_pipe(pipe_slow);
17883 %}
17884
17885 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17886 predicate(UseAVX > 0);
17887 match(Set dst (AbsD src));
17888 ins_cost(150);
17889 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17890 "# abs double by sign masking" %}
17891 ins_encode %{
17892 int vlen_enc = Assembler::AVX_128bit;
17893 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17894 ExternalAddress(double_signmask()), vlen_enc);
17895 %}
17896 ins_pipe(pipe_slow);
17897 %}
17898
17899 instruct negF_reg(regF dst) %{
17900 predicate(UseAVX == 0);
17901 match(Set dst (NegF dst));
17902 ins_cost(150);
17903 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17904 ins_encode %{
17905 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17906 %}
17907 ins_pipe(pipe_slow);
17908 %}
17909
17910 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17911 predicate(UseAVX > 0);
17912 match(Set dst (NegF src));
17913 ins_cost(150);
17914 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17915 ins_encode %{
17916 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17917 ExternalAddress(float_signflip()));
17918 %}
17919 ins_pipe(pipe_slow);
17920 %}
17921
17922 instruct negD_reg(regD dst) %{
17923 predicate(UseAVX == 0);
17924 match(Set dst (NegD dst));
17925 ins_cost(150);
17926 format %{ "xorpd $dst, [0x8000000000000000]\t"
17927 "# neg double by sign flipping" %}
17928 ins_encode %{
17929 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17930 %}
17931 ins_pipe(pipe_slow);
17932 %}
17933
17934 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17935 predicate(UseAVX > 0);
17936 match(Set dst (NegD src));
17937 ins_cost(150);
17938 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17939 "# neg double by sign flipping" %}
17940 ins_encode %{
17941 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17942 ExternalAddress(double_signflip()));
17943 %}
17944 ins_pipe(pipe_slow);
17945 %}
17946
17947 // sqrtss instruction needs destination register to be pre initialized for best performance
17948 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17949 instruct sqrtF_reg(regF dst) %{
17950 match(Set dst (SqrtF dst));
17951 format %{ "sqrtss $dst, $dst" %}
17952 ins_encode %{
17953 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17954 %}
17955 ins_pipe(pipe_slow);
17956 %}
17957
17958 // sqrtsd instruction needs destination register to be pre initialized for best performance
17959 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17960 instruct sqrtD_reg(regD dst) %{
17961 match(Set dst (SqrtD dst));
17962 format %{ "sqrtsd $dst, $dst" %}
17963 ins_encode %{
17964 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17965 %}
17966 ins_pipe(pipe_slow);
17967 %}
17968
17969 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17970 effect(TEMP tmp);
17971 match(Set dst (ConvF2HF src));
17972 ins_cost(125);
17973 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17974 ins_encode %{
17975 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17976 %}
17977 ins_pipe( pipe_slow );
17978 %}
17979
17980 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17981 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17982 effect(TEMP ktmp, TEMP rtmp);
17983 match(Set mem (StoreC mem (ConvF2HF src)));
17984 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17985 ins_encode %{
17986 __ movl($rtmp$$Register, 0x1);
17987 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17988 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17989 %}
17990 ins_pipe( pipe_slow );
17991 %}
17992
17993 instruct vconvF2HF(vec dst, vec src) %{
17994 match(Set dst (VectorCastF2HF src));
17995 format %{ "vector_conv_F2HF $dst $src" %}
17996 ins_encode %{
17997 int vlen_enc = vector_length_encoding(this, $src);
17998 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17999 %}
18000 ins_pipe( pipe_slow );
18001 %}
18002
18003 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18004 predicate(n->as_StoreVector()->memory_size() >= 16);
18005 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18006 format %{ "vcvtps2ph $mem,$src" %}
18007 ins_encode %{
18008 int vlen_enc = vector_length_encoding(this, $src);
18009 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18010 %}
18011 ins_pipe( pipe_slow );
18012 %}
18013
18014 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18015 match(Set dst (ConvHF2F src));
18016 format %{ "vcvtph2ps $dst,$src" %}
18017 ins_encode %{
18018 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18019 %}
18020 ins_pipe( pipe_slow );
18021 %}
18022
18023 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18024 match(Set dst (VectorCastHF2F (LoadVector mem)));
18025 format %{ "vcvtph2ps $dst,$mem" %}
18026 ins_encode %{
18027 int vlen_enc = vector_length_encoding(this);
18028 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18029 %}
18030 ins_pipe( pipe_slow );
18031 %}
18032
18033 instruct vconvHF2F(vec dst, vec src) %{
18034 match(Set dst (VectorCastHF2F src));
18035 ins_cost(125);
18036 format %{ "vector_conv_HF2F $dst,$src" %}
18037 ins_encode %{
18038 int vlen_enc = vector_length_encoding(this);
18039 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18040 %}
18041 ins_pipe( pipe_slow );
18042 %}
18043
18044 // ---------------------------------------- VectorReinterpret ------------------------------------
18045 instruct reinterpret_mask(kReg dst) %{
18046 predicate(n->bottom_type()->isa_vectmask() &&
18047 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18048 match(Set dst (VectorReinterpret dst));
18049 ins_cost(125);
18050 format %{ "vector_reinterpret $dst\t!" %}
18051 ins_encode %{
18052 // empty
18053 %}
18054 ins_pipe( pipe_slow );
18055 %}
18056
18057 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18058 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18059 n->bottom_type()->isa_vectmask() &&
18060 n->in(1)->bottom_type()->isa_vectmask() &&
18061 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18062 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18063 match(Set dst (VectorReinterpret src));
18064 effect(TEMP xtmp);
18065 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18066 ins_encode %{
18067 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18068 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18069 assert(src_sz == dst_sz , "src and dst size mismatch");
18070 int vlen_enc = vector_length_encoding(src_sz);
18071 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18072 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18073 %}
18074 ins_pipe( pipe_slow );
18075 %}
18076
18077 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18078 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18079 n->bottom_type()->isa_vectmask() &&
18080 n->in(1)->bottom_type()->isa_vectmask() &&
18081 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18082 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18083 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18084 match(Set dst (VectorReinterpret src));
18085 effect(TEMP xtmp);
18086 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18087 ins_encode %{
18088 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18089 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18090 assert(src_sz == dst_sz , "src and dst size mismatch");
18091 int vlen_enc = vector_length_encoding(src_sz);
18092 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18093 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18094 %}
18095 ins_pipe( pipe_slow );
18096 %}
18097
18098 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18099 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18100 n->bottom_type()->isa_vectmask() &&
18101 n->in(1)->bottom_type()->isa_vectmask() &&
18102 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18103 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18104 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18105 match(Set dst (VectorReinterpret src));
18106 effect(TEMP xtmp);
18107 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18108 ins_encode %{
18109 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18110 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18111 assert(src_sz == dst_sz , "src and dst size mismatch");
18112 int vlen_enc = vector_length_encoding(src_sz);
18113 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18114 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18115 %}
18116 ins_pipe( pipe_slow );
18117 %}
18118
18119 instruct reinterpret(vec dst) %{
18120 predicate(!n->bottom_type()->isa_vectmask() &&
18121 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18122 match(Set dst (VectorReinterpret dst));
18123 ins_cost(125);
18124 format %{ "vector_reinterpret $dst\t!" %}
18125 ins_encode %{
18126 // empty
18127 %}
18128 ins_pipe( pipe_slow );
18129 %}
18130
18131 instruct reinterpret_expand(vec dst, vec src) %{
18132 predicate(UseAVX == 0 &&
18133 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18134 match(Set dst (VectorReinterpret src));
18135 ins_cost(125);
18136 effect(TEMP dst);
18137 format %{ "vector_reinterpret_expand $dst,$src" %}
18138 ins_encode %{
18139 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18140 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18141
18142 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18143 if (src_vlen_in_bytes == 4) {
18144 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18145 } else {
18146 assert(src_vlen_in_bytes == 8, "");
18147 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18148 }
18149 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18150 %}
18151 ins_pipe( pipe_slow );
18152 %}
18153
18154 instruct vreinterpret_expand4(legVec dst, vec src) %{
18155 predicate(UseAVX > 0 &&
18156 !n->bottom_type()->isa_vectmask() &&
18157 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18158 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18159 match(Set dst (VectorReinterpret src));
18160 ins_cost(125);
18161 format %{ "vector_reinterpret_expand $dst,$src" %}
18162 ins_encode %{
18163 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18164 %}
18165 ins_pipe( pipe_slow );
18166 %}
18167
18168
18169 instruct vreinterpret_expand(legVec dst, vec src) %{
18170 predicate(UseAVX > 0 &&
18171 !n->bottom_type()->isa_vectmask() &&
18172 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18173 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18174 match(Set dst (VectorReinterpret src));
18175 ins_cost(125);
18176 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18177 ins_encode %{
18178 switch (Matcher::vector_length_in_bytes(this, $src)) {
18179 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18180 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18181 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18182 default: ShouldNotReachHere();
18183 }
18184 %}
18185 ins_pipe( pipe_slow );
18186 %}
18187
18188 instruct reinterpret_shrink(vec dst, legVec src) %{
18189 predicate(!n->bottom_type()->isa_vectmask() &&
18190 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18191 match(Set dst (VectorReinterpret src));
18192 ins_cost(125);
18193 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18194 ins_encode %{
18195 switch (Matcher::vector_length_in_bytes(this)) {
18196 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18197 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18198 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18199 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18200 default: ShouldNotReachHere();
18201 }
18202 %}
18203 ins_pipe( pipe_slow );
18204 %}
18205
18206 // ----------------------------------------------------------------------------------------------------
18207
18208 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18209 match(Set dst (RoundDoubleMode src rmode));
18210 format %{ "roundsd $dst,$src" %}
18211 ins_cost(150);
18212 ins_encode %{
18213 assert(UseSSE >= 4, "required");
18214 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18215 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18216 }
18217 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18218 %}
18219 ins_pipe(pipe_slow);
18220 %}
18221
18222 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18223 match(Set dst (RoundDoubleMode con rmode));
18224 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18225 ins_cost(150);
18226 ins_encode %{
18227 assert(UseSSE >= 4, "required");
18228 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18229 %}
18230 ins_pipe(pipe_slow);
18231 %}
18232
18233 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18234 predicate(Matcher::vector_length(n) < 8);
18235 match(Set dst (RoundDoubleModeV src rmode));
18236 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18237 ins_encode %{
18238 assert(UseAVX > 0, "required");
18239 int vlen_enc = vector_length_encoding(this);
18240 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18241 %}
18242 ins_pipe( pipe_slow );
18243 %}
18244
18245 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18246 predicate(Matcher::vector_length(n) == 8);
18247 match(Set dst (RoundDoubleModeV src rmode));
18248 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18249 ins_encode %{
18250 assert(UseAVX > 2, "required");
18251 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18252 %}
18253 ins_pipe( pipe_slow );
18254 %}
18255
18256 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18257 predicate(Matcher::vector_length(n) < 8);
18258 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18259 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18260 ins_encode %{
18261 assert(UseAVX > 0, "required");
18262 int vlen_enc = vector_length_encoding(this);
18263 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18264 %}
18265 ins_pipe( pipe_slow );
18266 %}
18267
18268 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18269 predicate(Matcher::vector_length(n) == 8);
18270 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18271 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18272 ins_encode %{
18273 assert(UseAVX > 2, "required");
18274 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18275 %}
18276 ins_pipe( pipe_slow );
18277 %}
18278
18279 instruct onspinwait() %{
18280 match(OnSpinWait);
18281 ins_cost(200);
18282
18283 format %{
18284 $$template
18285 $$emit$$"pause\t! membar_onspinwait"
18286 %}
18287 ins_encode %{
18288 __ pause();
18289 %}
18290 ins_pipe(pipe_slow);
18291 %}
18292
18293 // a * b + c
18294 instruct fmaD_reg(regD a, regD b, regD c) %{
18295 match(Set c (FmaD c (Binary a b)));
18296 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18297 ins_cost(150);
18298 ins_encode %{
18299 assert(UseFMA, "Needs FMA instructions support.");
18300 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18301 %}
18302 ins_pipe( pipe_slow );
18303 %}
18304
18305 // a * b + c
18306 instruct fmaF_reg(regF a, regF b, regF c) %{
18307 match(Set c (FmaF c (Binary a b)));
18308 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18309 ins_cost(150);
18310 ins_encode %{
18311 assert(UseFMA, "Needs FMA instructions support.");
18312 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18313 %}
18314 ins_pipe( pipe_slow );
18315 %}
18316
18317 // ====================VECTOR INSTRUCTIONS=====================================
18318
18319 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18320 instruct MoveVec2Leg(legVec dst, vec src) %{
18321 match(Set dst src);
18322 format %{ "" %}
18323 ins_encode %{
18324 ShouldNotReachHere();
18325 %}
18326 ins_pipe( fpu_reg_reg );
18327 %}
18328
18329 instruct MoveLeg2Vec(vec dst, legVec src) %{
18330 match(Set dst src);
18331 format %{ "" %}
18332 ins_encode %{
18333 ShouldNotReachHere();
18334 %}
18335 ins_pipe( fpu_reg_reg );
18336 %}
18337
18338 // ============================================================================
18339
18340 // Load vectors generic operand pattern
18341 instruct loadV(vec dst, memory mem) %{
18342 match(Set dst (LoadVector mem));
18343 ins_cost(125);
18344 format %{ "load_vector $dst,$mem" %}
18345 ins_encode %{
18346 BasicType bt = Matcher::vector_element_basic_type(this);
18347 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18348 %}
18349 ins_pipe( pipe_slow );
18350 %}
18351
18352 // Store vectors generic operand pattern.
18353 instruct storeV(memory mem, vec src) %{
18354 match(Set mem (StoreVector mem src));
18355 ins_cost(145);
18356 format %{ "store_vector $mem,$src\n\t" %}
18357 ins_encode %{
18358 switch (Matcher::vector_length_in_bytes(this, $src)) {
18359 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18360 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18361 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18362 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18363 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18364 default: ShouldNotReachHere();
18365 }
18366 %}
18367 ins_pipe( pipe_slow );
18368 %}
18369
18370 // ---------------------------------------- Gather ------------------------------------
18371
18372 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18373
18374 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18375 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18376 Matcher::vector_length_in_bytes(n) <= 32);
18377 match(Set dst (LoadVectorGather mem idx));
18378 effect(TEMP dst, TEMP tmp, TEMP mask);
18379 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18380 ins_encode %{
18381 int vlen_enc = vector_length_encoding(this);
18382 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18383 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18384 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18385 __ lea($tmp$$Register, $mem$$Address);
18386 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18387 %}
18388 ins_pipe( pipe_slow );
18389 %}
18390
18391
18392 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18393 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18394 !is_subword_type(Matcher::vector_element_basic_type(n)));
18395 match(Set dst (LoadVectorGather mem idx));
18396 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18397 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18398 ins_encode %{
18399 int vlen_enc = vector_length_encoding(this);
18400 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18401 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18402 __ lea($tmp$$Register, $mem$$Address);
18403 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18404 %}
18405 ins_pipe( pipe_slow );
18406 %}
18407
18408 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18409 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18410 !is_subword_type(Matcher::vector_element_basic_type(n)));
18411 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18412 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18413 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18414 ins_encode %{
18415 assert(UseAVX > 2, "sanity");
18416 int vlen_enc = vector_length_encoding(this);
18417 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18418 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18419 // Note: Since gather instruction partially updates the opmask register used
18420 // for predication hense moving mask operand to a temporary.
18421 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18422 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18423 __ lea($tmp$$Register, $mem$$Address);
18424 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18425 %}
18426 ins_pipe( pipe_slow );
18427 %}
18428
18429 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18430 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18431 match(Set dst (LoadVectorGather mem idx_base));
18432 effect(TEMP tmp, TEMP rtmp);
18433 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18434 ins_encode %{
18435 int vlen_enc = vector_length_encoding(this);
18436 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18437 __ lea($tmp$$Register, $mem$$Address);
18438 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18439 %}
18440 ins_pipe( pipe_slow );
18441 %}
18442
18443 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18444 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18445 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18446 match(Set dst (LoadVectorGather mem idx_base));
18447 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18448 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18449 ins_encode %{
18450 int vlen_enc = vector_length_encoding(this);
18451 int vector_len = Matcher::vector_length(this);
18452 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18453 __ lea($tmp$$Register, $mem$$Address);
18454 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18455 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18456 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18457 %}
18458 ins_pipe( pipe_slow );
18459 %}
18460
18461 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18462 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18463 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18464 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18465 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18466 ins_encode %{
18467 int vlen_enc = vector_length_encoding(this);
18468 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18469 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18470 __ lea($tmp$$Register, $mem$$Address);
18471 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18472 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18473 %}
18474 ins_pipe( pipe_slow );
18475 %}
18476
18477 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18478 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18479 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18480 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18481 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18482 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18483 ins_encode %{
18484 int vlen_enc = vector_length_encoding(this);
18485 int vector_len = Matcher::vector_length(this);
18486 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18487 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18488 __ lea($tmp$$Register, $mem$$Address);
18489 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18490 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18491 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18492 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18493 %}
18494 ins_pipe( pipe_slow );
18495 %}
18496
18497 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18498 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18499 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18500 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18501 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18502 ins_encode %{
18503 int vlen_enc = vector_length_encoding(this);
18504 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18505 __ lea($tmp$$Register, $mem$$Address);
18506 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18507 if (elem_bt == T_SHORT) {
18508 __ movl($mask_idx$$Register, 0x55555555);
18509 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18510 }
18511 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18512 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18518 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18519 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18520 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18521 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18522 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18523 ins_encode %{
18524 int vlen_enc = vector_length_encoding(this);
18525 int vector_len = Matcher::vector_length(this);
18526 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18527 __ lea($tmp$$Register, $mem$$Address);
18528 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18529 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18530 if (elem_bt == T_SHORT) {
18531 __ movl($mask_idx$$Register, 0x55555555);
18532 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18533 }
18534 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18535 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18536 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18537 %}
18538 ins_pipe( pipe_slow );
18539 %}
18540
18541 // ====================Scatter=======================================
18542
18543 // Scatter INT, LONG, FLOAT, DOUBLE
18544
18545 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18546 predicate(UseAVX > 2);
18547 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18548 effect(TEMP tmp, TEMP ktmp);
18549 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18550 ins_encode %{
18551 int vlen_enc = vector_length_encoding(this, $src);
18552 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18553
18554 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18555 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18556
18557 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18558 __ lea($tmp$$Register, $mem$$Address);
18559 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18560 %}
18561 ins_pipe( pipe_slow );
18562 %}
18563
18564 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18565 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18566 effect(TEMP tmp, TEMP ktmp);
18567 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18568 ins_encode %{
18569 int vlen_enc = vector_length_encoding(this, $src);
18570 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18571 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18572 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18573 // Note: Since scatter instruction partially updates the opmask register used
18574 // for predication hense moving mask operand to a temporary.
18575 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18576 __ lea($tmp$$Register, $mem$$Address);
18577 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18578 %}
18579 ins_pipe( pipe_slow );
18580 %}
18581
18582 // ====================REPLICATE=======================================
18583
18584 // Replicate byte scalar to be vector
18585 instruct vReplB_reg(vec dst, rRegI src) %{
18586 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18587 match(Set dst (Replicate src));
18588 format %{ "replicateB $dst,$src" %}
18589 ins_encode %{
18590 uint vlen = Matcher::vector_length(this);
18591 if (UseAVX >= 2) {
18592 int vlen_enc = vector_length_encoding(this);
18593 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18594 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18595 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18596 } else {
18597 __ movdl($dst$$XMMRegister, $src$$Register);
18598 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18599 }
18600 } else {
18601 assert(UseAVX < 2, "");
18602 __ movdl($dst$$XMMRegister, $src$$Register);
18603 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18604 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18605 if (vlen >= 16) {
18606 assert(vlen == 16, "");
18607 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18608 }
18609 }
18610 %}
18611 ins_pipe( pipe_slow );
18612 %}
18613
18614 instruct ReplB_mem(vec dst, memory mem) %{
18615 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18616 match(Set dst (Replicate (LoadB mem)));
18617 format %{ "replicateB $dst,$mem" %}
18618 ins_encode %{
18619 int vlen_enc = vector_length_encoding(this);
18620 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18621 %}
18622 ins_pipe( pipe_slow );
18623 %}
18624
18625 // ====================ReplicateS=======================================
18626
18627 instruct vReplS_reg(vec dst, rRegI src) %{
18628 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18629 match(Set dst (Replicate src));
18630 format %{ "replicateS $dst,$src" %}
18631 ins_encode %{
18632 uint vlen = Matcher::vector_length(this);
18633 int vlen_enc = vector_length_encoding(this);
18634 if (UseAVX >= 2) {
18635 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18636 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18637 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18638 } else {
18639 __ movdl($dst$$XMMRegister, $src$$Register);
18640 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18641 }
18642 } else {
18643 assert(UseAVX < 2, "");
18644 __ movdl($dst$$XMMRegister, $src$$Register);
18645 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18646 if (vlen >= 8) {
18647 assert(vlen == 8, "");
18648 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18649 }
18650 }
18651 %}
18652 ins_pipe( pipe_slow );
18653 %}
18654
18655 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18656 match(Set dst (Replicate con));
18657 effect(TEMP rtmp);
18658 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18659 ins_encode %{
18660 int vlen_enc = vector_length_encoding(this);
18661 BasicType bt = Matcher::vector_element_basic_type(this);
18662 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18663 __ movl($rtmp$$Register, $con$$constant);
18664 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18665 %}
18666 ins_pipe( pipe_slow );
18667 %}
18668
18669 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18670 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18671 match(Set dst (Replicate src));
18672 effect(TEMP rtmp);
18673 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18674 ins_encode %{
18675 int vlen_enc = vector_length_encoding(this);
18676 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18677 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18678 %}
18679 ins_pipe( pipe_slow );
18680 %}
18681
18682 instruct ReplS_mem(vec dst, memory mem) %{
18683 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18684 match(Set dst (Replicate (LoadS mem)));
18685 format %{ "replicateS $dst,$mem" %}
18686 ins_encode %{
18687 int vlen_enc = vector_length_encoding(this);
18688 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18689 %}
18690 ins_pipe( pipe_slow );
18691 %}
18692
18693 // ====================ReplicateI=======================================
18694
18695 instruct ReplI_reg(vec dst, rRegI src) %{
18696 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18697 match(Set dst (Replicate src));
18698 format %{ "replicateI $dst,$src" %}
18699 ins_encode %{
18700 uint vlen = Matcher::vector_length(this);
18701 int vlen_enc = vector_length_encoding(this);
18702 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18703 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18704 } else if (VM_Version::supports_avx2()) {
18705 __ movdl($dst$$XMMRegister, $src$$Register);
18706 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18707 } else {
18708 __ movdl($dst$$XMMRegister, $src$$Register);
18709 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18710 }
18711 %}
18712 ins_pipe( pipe_slow );
18713 %}
18714
18715 instruct ReplI_mem(vec dst, memory mem) %{
18716 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18717 match(Set dst (Replicate (LoadI mem)));
18718 format %{ "replicateI $dst,$mem" %}
18719 ins_encode %{
18720 int vlen_enc = vector_length_encoding(this);
18721 if (VM_Version::supports_avx2()) {
18722 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18723 } else if (VM_Version::supports_avx()) {
18724 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18725 } else {
18726 __ movdl($dst$$XMMRegister, $mem$$Address);
18727 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18728 }
18729 %}
18730 ins_pipe( pipe_slow );
18731 %}
18732
18733 instruct ReplI_imm(vec dst, immI con) %{
18734 predicate(Matcher::is_non_long_integral_vector(n));
18735 match(Set dst (Replicate con));
18736 format %{ "replicateI $dst,$con" %}
18737 ins_encode %{
18738 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18739 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18740 type2aelembytes(Matcher::vector_element_basic_type(this))));
18741 BasicType bt = Matcher::vector_element_basic_type(this);
18742 int vlen = Matcher::vector_length_in_bytes(this);
18743 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18744 %}
18745 ins_pipe( pipe_slow );
18746 %}
18747
18748 // Replicate scalar zero to be vector
18749 instruct ReplI_zero(vec dst, immI_0 zero) %{
18750 predicate(Matcher::is_non_long_integral_vector(n));
18751 match(Set dst (Replicate zero));
18752 format %{ "replicateI $dst,$zero" %}
18753 ins_encode %{
18754 int vlen_enc = vector_length_encoding(this);
18755 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18756 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18757 } else {
18758 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18759 }
18760 %}
18761 ins_pipe( fpu_reg_reg );
18762 %}
18763
18764 instruct ReplI_M1(vec dst, immI_M1 con) %{
18765 predicate(Matcher::is_non_long_integral_vector(n));
18766 match(Set dst (Replicate con));
18767 format %{ "vallones $dst" %}
18768 ins_encode %{
18769 int vector_len = vector_length_encoding(this);
18770 __ vallones($dst$$XMMRegister, vector_len);
18771 %}
18772 ins_pipe( pipe_slow );
18773 %}
18774
18775 // ====================ReplicateL=======================================
18776
18777 // Replicate long (8 byte) scalar to be vector
18778 instruct ReplL_reg(vec dst, rRegL src) %{
18779 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18780 match(Set dst (Replicate src));
18781 format %{ "replicateL $dst,$src" %}
18782 ins_encode %{
18783 int vlen = Matcher::vector_length(this);
18784 int vlen_enc = vector_length_encoding(this);
18785 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18786 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18787 } else if (VM_Version::supports_avx2()) {
18788 __ movdq($dst$$XMMRegister, $src$$Register);
18789 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18790 } else {
18791 __ movdq($dst$$XMMRegister, $src$$Register);
18792 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18793 }
18794 %}
18795 ins_pipe( pipe_slow );
18796 %}
18797
18798 instruct ReplL_mem(vec dst, memory mem) %{
18799 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18800 match(Set dst (Replicate (LoadL mem)));
18801 format %{ "replicateL $dst,$mem" %}
18802 ins_encode %{
18803 int vlen_enc = vector_length_encoding(this);
18804 if (VM_Version::supports_avx2()) {
18805 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18806 } else if (VM_Version::supports_sse3()) {
18807 __ movddup($dst$$XMMRegister, $mem$$Address);
18808 } else {
18809 __ movq($dst$$XMMRegister, $mem$$Address);
18810 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18811 }
18812 %}
18813 ins_pipe( pipe_slow );
18814 %}
18815
18816 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18817 instruct ReplL_imm(vec dst, immL con) %{
18818 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18819 match(Set dst (Replicate con));
18820 format %{ "replicateL $dst,$con" %}
18821 ins_encode %{
18822 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18823 int vlen = Matcher::vector_length_in_bytes(this);
18824 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18825 %}
18826 ins_pipe( pipe_slow );
18827 %}
18828
18829 instruct ReplL_zero(vec dst, immL0 zero) %{
18830 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18831 match(Set dst (Replicate zero));
18832 format %{ "replicateL $dst,$zero" %}
18833 ins_encode %{
18834 int vlen_enc = vector_length_encoding(this);
18835 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18836 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18837 } else {
18838 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18839 }
18840 %}
18841 ins_pipe( fpu_reg_reg );
18842 %}
18843
18844 instruct ReplL_M1(vec dst, immL_M1 con) %{
18845 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18846 match(Set dst (Replicate con));
18847 format %{ "vallones $dst" %}
18848 ins_encode %{
18849 int vector_len = vector_length_encoding(this);
18850 __ vallones($dst$$XMMRegister, vector_len);
18851 %}
18852 ins_pipe( pipe_slow );
18853 %}
18854
18855 // ====================ReplicateF=======================================
18856
18857 instruct vReplF_reg(vec dst, vlRegF src) %{
18858 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18859 match(Set dst (Replicate src));
18860 format %{ "replicateF $dst,$src" %}
18861 ins_encode %{
18862 uint vlen = Matcher::vector_length(this);
18863 int vlen_enc = vector_length_encoding(this);
18864 if (vlen <= 4) {
18865 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18866 } else if (VM_Version::supports_avx2()) {
18867 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18868 } else {
18869 assert(vlen == 8, "sanity");
18870 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18871 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18872 }
18873 %}
18874 ins_pipe( pipe_slow );
18875 %}
18876
18877 instruct ReplF_reg(vec dst, vlRegF src) %{
18878 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18879 match(Set dst (Replicate src));
18880 format %{ "replicateF $dst,$src" %}
18881 ins_encode %{
18882 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18883 %}
18884 ins_pipe( pipe_slow );
18885 %}
18886
18887 instruct ReplF_mem(vec dst, memory mem) %{
18888 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18889 match(Set dst (Replicate (LoadF mem)));
18890 format %{ "replicateF $dst,$mem" %}
18891 ins_encode %{
18892 int vlen_enc = vector_length_encoding(this);
18893 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18894 %}
18895 ins_pipe( pipe_slow );
18896 %}
18897
18898 // Replicate float scalar immediate to be vector by loading from const table.
18899 instruct ReplF_imm(vec dst, immF con) %{
18900 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18901 match(Set dst (Replicate con));
18902 format %{ "replicateF $dst,$con" %}
18903 ins_encode %{
18904 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18905 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18906 int vlen = Matcher::vector_length_in_bytes(this);
18907 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18908 %}
18909 ins_pipe( pipe_slow );
18910 %}
18911
18912 instruct ReplF_zero(vec dst, immF0 zero) %{
18913 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18914 match(Set dst (Replicate zero));
18915 format %{ "replicateF $dst,$zero" %}
18916 ins_encode %{
18917 int vlen_enc = vector_length_encoding(this);
18918 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18919 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18920 } else {
18921 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18922 }
18923 %}
18924 ins_pipe( fpu_reg_reg );
18925 %}
18926
18927 // ====================ReplicateD=======================================
18928
18929 // Replicate double (8 bytes) scalar to be vector
18930 instruct vReplD_reg(vec dst, vlRegD src) %{
18931 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18932 match(Set dst (Replicate src));
18933 format %{ "replicateD $dst,$src" %}
18934 ins_encode %{
18935 uint vlen = Matcher::vector_length(this);
18936 int vlen_enc = vector_length_encoding(this);
18937 if (vlen <= 2) {
18938 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18939 } else if (VM_Version::supports_avx2()) {
18940 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18941 } else {
18942 assert(vlen == 4, "sanity");
18943 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18944 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18945 }
18946 %}
18947 ins_pipe( pipe_slow );
18948 %}
18949
18950 instruct ReplD_reg(vec dst, vlRegD src) %{
18951 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18952 match(Set dst (Replicate src));
18953 format %{ "replicateD $dst,$src" %}
18954 ins_encode %{
18955 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18956 %}
18957 ins_pipe( pipe_slow );
18958 %}
18959
18960 instruct ReplD_mem(vec dst, memory mem) %{
18961 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18962 match(Set dst (Replicate (LoadD mem)));
18963 format %{ "replicateD $dst,$mem" %}
18964 ins_encode %{
18965 if (Matcher::vector_length(this) >= 4) {
18966 int vlen_enc = vector_length_encoding(this);
18967 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18968 } else {
18969 __ movddup($dst$$XMMRegister, $mem$$Address);
18970 }
18971 %}
18972 ins_pipe( pipe_slow );
18973 %}
18974
18975 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18976 instruct ReplD_imm(vec dst, immD con) %{
18977 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18978 match(Set dst (Replicate con));
18979 format %{ "replicateD $dst,$con" %}
18980 ins_encode %{
18981 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18982 int vlen = Matcher::vector_length_in_bytes(this);
18983 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18984 %}
18985 ins_pipe( pipe_slow );
18986 %}
18987
18988 instruct ReplD_zero(vec dst, immD0 zero) %{
18989 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18990 match(Set dst (Replicate zero));
18991 format %{ "replicateD $dst,$zero" %}
18992 ins_encode %{
18993 int vlen_enc = vector_length_encoding(this);
18994 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18995 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18996 } else {
18997 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18998 }
18999 %}
19000 ins_pipe( fpu_reg_reg );
19001 %}
19002
19003 // ====================VECTOR INSERT=======================================
19004
19005 instruct insert(vec dst, rRegI val, immU8 idx) %{
19006 predicate(Matcher::vector_length_in_bytes(n) < 32);
19007 match(Set dst (VectorInsert (Binary dst val) idx));
19008 format %{ "vector_insert $dst,$val,$idx" %}
19009 ins_encode %{
19010 assert(UseSSE >= 4, "required");
19011 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19012
19013 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19014
19015 assert(is_integral_type(elem_bt), "");
19016 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19017
19018 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19019 %}
19020 ins_pipe( pipe_slow );
19021 %}
19022
19023 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19024 predicate(Matcher::vector_length_in_bytes(n) == 32);
19025 match(Set dst (VectorInsert (Binary src val) idx));
19026 effect(TEMP vtmp);
19027 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19028 ins_encode %{
19029 int vlen_enc = Assembler::AVX_256bit;
19030 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19031 int elem_per_lane = 16/type2aelembytes(elem_bt);
19032 int log2epr = log2(elem_per_lane);
19033
19034 assert(is_integral_type(elem_bt), "sanity");
19035 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19036
19037 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19038 uint y_idx = ($idx$$constant >> log2epr) & 1;
19039 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19040 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19041 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19042 %}
19043 ins_pipe( pipe_slow );
19044 %}
19045
19046 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19047 predicate(Matcher::vector_length_in_bytes(n) == 64);
19048 match(Set dst (VectorInsert (Binary src val) idx));
19049 effect(TEMP vtmp);
19050 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19051 ins_encode %{
19052 assert(UseAVX > 2, "sanity");
19053
19054 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19055 int elem_per_lane = 16/type2aelembytes(elem_bt);
19056 int log2epr = log2(elem_per_lane);
19057
19058 assert(is_integral_type(elem_bt), "");
19059 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19060
19061 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19062 uint y_idx = ($idx$$constant >> log2epr) & 3;
19063 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19064 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19065 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19066 %}
19067 ins_pipe( pipe_slow );
19068 %}
19069
19070 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19071 predicate(Matcher::vector_length(n) == 2);
19072 match(Set dst (VectorInsert (Binary dst val) idx));
19073 format %{ "vector_insert $dst,$val,$idx" %}
19074 ins_encode %{
19075 assert(UseSSE >= 4, "required");
19076 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19077 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19078
19079 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19080 %}
19081 ins_pipe( pipe_slow );
19082 %}
19083
19084 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19085 predicate(Matcher::vector_length(n) == 4);
19086 match(Set dst (VectorInsert (Binary src val) idx));
19087 effect(TEMP vtmp);
19088 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19089 ins_encode %{
19090 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19091 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19092
19093 uint x_idx = $idx$$constant & right_n_bits(1);
19094 uint y_idx = ($idx$$constant >> 1) & 1;
19095 int vlen_enc = Assembler::AVX_256bit;
19096 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19097 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19098 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19099 %}
19100 ins_pipe( pipe_slow );
19101 %}
19102
19103 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19104 predicate(Matcher::vector_length(n) == 8);
19105 match(Set dst (VectorInsert (Binary src val) idx));
19106 effect(TEMP vtmp);
19107 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19108 ins_encode %{
19109 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19110 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19111
19112 uint x_idx = $idx$$constant & right_n_bits(1);
19113 uint y_idx = ($idx$$constant >> 1) & 3;
19114 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19115 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19116 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19117 %}
19118 ins_pipe( pipe_slow );
19119 %}
19120
19121 instruct insertF(vec dst, regF val, immU8 idx) %{
19122 predicate(Matcher::vector_length(n) < 8);
19123 match(Set dst (VectorInsert (Binary dst val) idx));
19124 format %{ "vector_insert $dst,$val,$idx" %}
19125 ins_encode %{
19126 assert(UseSSE >= 4, "sanity");
19127
19128 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19129 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19130
19131 uint x_idx = $idx$$constant & right_n_bits(2);
19132 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19133 %}
19134 ins_pipe( pipe_slow );
19135 %}
19136
19137 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19138 predicate(Matcher::vector_length(n) >= 8);
19139 match(Set dst (VectorInsert (Binary src val) idx));
19140 effect(TEMP vtmp);
19141 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19142 ins_encode %{
19143 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19144 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19145
19146 int vlen = Matcher::vector_length(this);
19147 uint x_idx = $idx$$constant & right_n_bits(2);
19148 if (vlen == 8) {
19149 uint y_idx = ($idx$$constant >> 2) & 1;
19150 int vlen_enc = Assembler::AVX_256bit;
19151 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19152 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19153 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19154 } else {
19155 assert(vlen == 16, "sanity");
19156 uint y_idx = ($idx$$constant >> 2) & 3;
19157 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19158 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19159 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19160 }
19161 %}
19162 ins_pipe( pipe_slow );
19163 %}
19164
19165 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19166 predicate(Matcher::vector_length(n) == 2);
19167 match(Set dst (VectorInsert (Binary dst val) idx));
19168 effect(TEMP tmp);
19169 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19170 ins_encode %{
19171 assert(UseSSE >= 4, "sanity");
19172 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19173 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19174
19175 __ movq($tmp$$Register, $val$$XMMRegister);
19176 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19177 %}
19178 ins_pipe( pipe_slow );
19179 %}
19180
19181 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19182 predicate(Matcher::vector_length(n) == 4);
19183 match(Set dst (VectorInsert (Binary src val) idx));
19184 effect(TEMP vtmp, TEMP tmp);
19185 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19186 ins_encode %{
19187 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19188 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19189
19190 uint x_idx = $idx$$constant & right_n_bits(1);
19191 uint y_idx = ($idx$$constant >> 1) & 1;
19192 int vlen_enc = Assembler::AVX_256bit;
19193 __ movq($tmp$$Register, $val$$XMMRegister);
19194 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19195 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19196 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19197 %}
19198 ins_pipe( pipe_slow );
19199 %}
19200
19201 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19202 predicate(Matcher::vector_length(n) == 8);
19203 match(Set dst (VectorInsert (Binary src val) idx));
19204 effect(TEMP tmp, TEMP vtmp);
19205 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19206 ins_encode %{
19207 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19208 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19209
19210 uint x_idx = $idx$$constant & right_n_bits(1);
19211 uint y_idx = ($idx$$constant >> 1) & 3;
19212 __ movq($tmp$$Register, $val$$XMMRegister);
19213 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19214 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19215 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19216 %}
19217 ins_pipe( pipe_slow );
19218 %}
19219
19220 // ====================REDUCTION ARITHMETIC=======================================
19221
19222 // =======================Int Reduction==========================================
19223
19224 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19225 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19226 match(Set dst (AddReductionVI src1 src2));
19227 match(Set dst (MulReductionVI src1 src2));
19228 match(Set dst (AndReductionV src1 src2));
19229 match(Set dst ( OrReductionV src1 src2));
19230 match(Set dst (XorReductionV src1 src2));
19231 match(Set dst (MinReductionV src1 src2));
19232 match(Set dst (MaxReductionV src1 src2));
19233 effect(TEMP vtmp1, TEMP vtmp2);
19234 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19235 ins_encode %{
19236 int opcode = this->ideal_Opcode();
19237 int vlen = Matcher::vector_length(this, $src2);
19238 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19239 %}
19240 ins_pipe( pipe_slow );
19241 %}
19242
19243 // =======================Long Reduction==========================================
19244
19245 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19246 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19247 match(Set dst (AddReductionVL src1 src2));
19248 match(Set dst (MulReductionVL src1 src2));
19249 match(Set dst (AndReductionV src1 src2));
19250 match(Set dst ( OrReductionV src1 src2));
19251 match(Set dst (XorReductionV src1 src2));
19252 match(Set dst (MinReductionV src1 src2));
19253 match(Set dst (MaxReductionV src1 src2));
19254 effect(TEMP vtmp1, TEMP vtmp2);
19255 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19256 ins_encode %{
19257 int opcode = this->ideal_Opcode();
19258 int vlen = Matcher::vector_length(this, $src2);
19259 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19260 %}
19261 ins_pipe( pipe_slow );
19262 %}
19263
19264 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19265 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19266 match(Set dst (AddReductionVL src1 src2));
19267 match(Set dst (MulReductionVL src1 src2));
19268 match(Set dst (AndReductionV src1 src2));
19269 match(Set dst ( OrReductionV src1 src2));
19270 match(Set dst (XorReductionV src1 src2));
19271 match(Set dst (MinReductionV src1 src2));
19272 match(Set dst (MaxReductionV src1 src2));
19273 effect(TEMP vtmp1, TEMP vtmp2);
19274 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19275 ins_encode %{
19276 int opcode = this->ideal_Opcode();
19277 int vlen = Matcher::vector_length(this, $src2);
19278 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19279 %}
19280 ins_pipe( pipe_slow );
19281 %}
19282
19283 // =======================Float Reduction==========================================
19284
19285 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19286 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19287 match(Set dst (AddReductionVF dst src));
19288 match(Set dst (MulReductionVF dst src));
19289 effect(TEMP dst, TEMP vtmp);
19290 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19291 ins_encode %{
19292 int opcode = this->ideal_Opcode();
19293 int vlen = Matcher::vector_length(this, $src);
19294 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19295 %}
19296 ins_pipe( pipe_slow );
19297 %}
19298
19299 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19300 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19301 match(Set dst (AddReductionVF dst src));
19302 match(Set dst (MulReductionVF dst src));
19303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19304 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19305 ins_encode %{
19306 int opcode = this->ideal_Opcode();
19307 int vlen = Matcher::vector_length(this, $src);
19308 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19309 %}
19310 ins_pipe( pipe_slow );
19311 %}
19312
19313 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19314 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19315 match(Set dst (AddReductionVF dst src));
19316 match(Set dst (MulReductionVF dst src));
19317 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19318 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19319 ins_encode %{
19320 int opcode = this->ideal_Opcode();
19321 int vlen = Matcher::vector_length(this, $src);
19322 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19323 %}
19324 ins_pipe( pipe_slow );
19325 %}
19326
19327
19328 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19329 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19330 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19331 // src1 contains reduction identity
19332 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19333 match(Set dst (AddReductionVF src1 src2));
19334 match(Set dst (MulReductionVF src1 src2));
19335 effect(TEMP dst);
19336 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19337 ins_encode %{
19338 int opcode = this->ideal_Opcode();
19339 int vlen = Matcher::vector_length(this, $src2);
19340 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19341 %}
19342 ins_pipe( pipe_slow );
19343 %}
19344
19345 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19346 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19347 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19348 // src1 contains reduction identity
19349 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19350 match(Set dst (AddReductionVF src1 src2));
19351 match(Set dst (MulReductionVF src1 src2));
19352 effect(TEMP dst, TEMP vtmp);
19353 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19354 ins_encode %{
19355 int opcode = this->ideal_Opcode();
19356 int vlen = Matcher::vector_length(this, $src2);
19357 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19358 %}
19359 ins_pipe( pipe_slow );
19360 %}
19361
19362 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19363 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19364 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19365 // src1 contains reduction identity
19366 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19367 match(Set dst (AddReductionVF src1 src2));
19368 match(Set dst (MulReductionVF src1 src2));
19369 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19370 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19371 ins_encode %{
19372 int opcode = this->ideal_Opcode();
19373 int vlen = Matcher::vector_length(this, $src2);
19374 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19375 %}
19376 ins_pipe( pipe_slow );
19377 %}
19378
19379 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19380 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19381 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19382 // src1 contains reduction identity
19383 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19384 match(Set dst (AddReductionVF src1 src2));
19385 match(Set dst (MulReductionVF src1 src2));
19386 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19387 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19388 ins_encode %{
19389 int opcode = this->ideal_Opcode();
19390 int vlen = Matcher::vector_length(this, $src2);
19391 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19392 %}
19393 ins_pipe( pipe_slow );
19394 %}
19395
19396 // =======================Double Reduction==========================================
19397
19398 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19399 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19400 match(Set dst (AddReductionVD dst src));
19401 match(Set dst (MulReductionVD dst src));
19402 effect(TEMP dst, TEMP vtmp);
19403 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19404 ins_encode %{
19405 int opcode = this->ideal_Opcode();
19406 int vlen = Matcher::vector_length(this, $src);
19407 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19408 %}
19409 ins_pipe( pipe_slow );
19410 %}
19411
19412 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19413 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19414 match(Set dst (AddReductionVD dst src));
19415 match(Set dst (MulReductionVD dst src));
19416 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19417 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19418 ins_encode %{
19419 int opcode = this->ideal_Opcode();
19420 int vlen = Matcher::vector_length(this, $src);
19421 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19422 %}
19423 ins_pipe( pipe_slow );
19424 %}
19425
19426 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19427 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19428 match(Set dst (AddReductionVD dst src));
19429 match(Set dst (MulReductionVD dst src));
19430 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19431 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19432 ins_encode %{
19433 int opcode = this->ideal_Opcode();
19434 int vlen = Matcher::vector_length(this, $src);
19435 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19436 %}
19437 ins_pipe( pipe_slow );
19438 %}
19439
19440 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19441 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19442 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19443 // src1 contains reduction identity
19444 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19445 match(Set dst (AddReductionVD src1 src2));
19446 match(Set dst (MulReductionVD src1 src2));
19447 effect(TEMP dst);
19448 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19449 ins_encode %{
19450 int opcode = this->ideal_Opcode();
19451 int vlen = Matcher::vector_length(this, $src2);
19452 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19453 %}
19454 ins_pipe( pipe_slow );
19455 %}
19456
19457 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19458 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19459 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19460 // src1 contains reduction identity
19461 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19462 match(Set dst (AddReductionVD src1 src2));
19463 match(Set dst (MulReductionVD src1 src2));
19464 effect(TEMP dst, TEMP vtmp);
19465 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19466 ins_encode %{
19467 int opcode = this->ideal_Opcode();
19468 int vlen = Matcher::vector_length(this, $src2);
19469 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19470 %}
19471 ins_pipe( pipe_slow );
19472 %}
19473
19474 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19475 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19476 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19477 // src1 contains reduction identity
19478 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19479 match(Set dst (AddReductionVD src1 src2));
19480 match(Set dst (MulReductionVD src1 src2));
19481 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19482 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19483 ins_encode %{
19484 int opcode = this->ideal_Opcode();
19485 int vlen = Matcher::vector_length(this, $src2);
19486 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19487 %}
19488 ins_pipe( pipe_slow );
19489 %}
19490
19491 // =======================Byte Reduction==========================================
19492
19493 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19494 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19495 match(Set dst (AddReductionVI src1 src2));
19496 match(Set dst (AndReductionV src1 src2));
19497 match(Set dst ( OrReductionV src1 src2));
19498 match(Set dst (XorReductionV src1 src2));
19499 match(Set dst (MinReductionV src1 src2));
19500 match(Set dst (MaxReductionV src1 src2));
19501 effect(TEMP vtmp1, TEMP vtmp2);
19502 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19503 ins_encode %{
19504 int opcode = this->ideal_Opcode();
19505 int vlen = Matcher::vector_length(this, $src2);
19506 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19507 %}
19508 ins_pipe( pipe_slow );
19509 %}
19510
19511 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19512 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19513 match(Set dst (AddReductionVI src1 src2));
19514 match(Set dst (AndReductionV src1 src2));
19515 match(Set dst ( OrReductionV src1 src2));
19516 match(Set dst (XorReductionV src1 src2));
19517 match(Set dst (MinReductionV src1 src2));
19518 match(Set dst (MaxReductionV src1 src2));
19519 effect(TEMP vtmp1, TEMP vtmp2);
19520 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19521 ins_encode %{
19522 int opcode = this->ideal_Opcode();
19523 int vlen = Matcher::vector_length(this, $src2);
19524 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19525 %}
19526 ins_pipe( pipe_slow );
19527 %}
19528
19529 // =======================Short Reduction==========================================
19530
19531 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19532 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19533 match(Set dst (AddReductionVI src1 src2));
19534 match(Set dst (MulReductionVI src1 src2));
19535 match(Set dst (AndReductionV src1 src2));
19536 match(Set dst ( OrReductionV src1 src2));
19537 match(Set dst (XorReductionV src1 src2));
19538 match(Set dst (MinReductionV src1 src2));
19539 match(Set dst (MaxReductionV src1 src2));
19540 effect(TEMP vtmp1, TEMP vtmp2);
19541 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19542 ins_encode %{
19543 int opcode = this->ideal_Opcode();
19544 int vlen = Matcher::vector_length(this, $src2);
19545 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19546 %}
19547 ins_pipe( pipe_slow );
19548 %}
19549
19550 // =======================Mul Reduction==========================================
19551
19552 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19553 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19554 Matcher::vector_length(n->in(2)) <= 32); // src2
19555 match(Set dst (MulReductionVI src1 src2));
19556 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19557 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19558 ins_encode %{
19559 int opcode = this->ideal_Opcode();
19560 int vlen = Matcher::vector_length(this, $src2);
19561 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19562 %}
19563 ins_pipe( pipe_slow );
19564 %}
19565
19566 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19567 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19568 Matcher::vector_length(n->in(2)) == 64); // src2
19569 match(Set dst (MulReductionVI src1 src2));
19570 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19571 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19572 ins_encode %{
19573 int opcode = this->ideal_Opcode();
19574 int vlen = Matcher::vector_length(this, $src2);
19575 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19576 %}
19577 ins_pipe( pipe_slow );
19578 %}
19579
19580 //--------------------Min/Max Float Reduction --------------------
19581 // Float Min Reduction
19582 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19583 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19584 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19585 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19586 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19587 Matcher::vector_length(n->in(2)) == 2);
19588 match(Set dst (MinReductionV src1 src2));
19589 match(Set dst (MaxReductionV src1 src2));
19590 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19591 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19592 ins_encode %{
19593 assert(UseAVX > 0, "sanity");
19594
19595 int opcode = this->ideal_Opcode();
19596 int vlen = Matcher::vector_length(this, $src2);
19597 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19598 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19599 %}
19600 ins_pipe( pipe_slow );
19601 %}
19602
19603 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19604 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19605 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19606 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19607 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19608 Matcher::vector_length(n->in(2)) >= 4);
19609 match(Set dst (MinReductionV src1 src2));
19610 match(Set dst (MaxReductionV src1 src2));
19611 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19612 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19613 ins_encode %{
19614 assert(UseAVX > 0, "sanity");
19615
19616 int opcode = this->ideal_Opcode();
19617 int vlen = Matcher::vector_length(this, $src2);
19618 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19619 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19620 %}
19621 ins_pipe( pipe_slow );
19622 %}
19623
19624 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19625 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19626 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19627 Matcher::vector_length(n->in(2)) == 2);
19628 match(Set dst (MinReductionV dst src));
19629 match(Set dst (MaxReductionV dst src));
19630 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19631 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19632 ins_encode %{
19633 assert(UseAVX > 0, "sanity");
19634
19635 int opcode = this->ideal_Opcode();
19636 int vlen = Matcher::vector_length(this, $src);
19637 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19638 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19639 %}
19640 ins_pipe( pipe_slow );
19641 %}
19642
19643
19644 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19645 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19646 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19647 Matcher::vector_length(n->in(2)) >= 4);
19648 match(Set dst (MinReductionV dst src));
19649 match(Set dst (MaxReductionV dst src));
19650 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19651 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19652 ins_encode %{
19653 assert(UseAVX > 0, "sanity");
19654
19655 int opcode = this->ideal_Opcode();
19656 int vlen = Matcher::vector_length(this, $src);
19657 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19658 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19659 %}
19660 ins_pipe( pipe_slow );
19661 %}
19662
19663 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19664 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19665 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19666 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19667 Matcher::vector_length(n->in(2)) == 2);
19668 match(Set dst (MinReductionV src1 src2));
19669 match(Set dst (MaxReductionV src1 src2));
19670 effect(TEMP dst, TEMP xtmp1);
19671 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19672 ins_encode %{
19673 int opcode = this->ideal_Opcode();
19674 int vlen = Matcher::vector_length(this, $src2);
19675 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19677 %}
19678 ins_pipe( pipe_slow );
19679 %}
19680
19681 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19682 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19683 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19684 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19685 Matcher::vector_length(n->in(2)) >= 4);
19686 match(Set dst (MinReductionV src1 src2));
19687 match(Set dst (MaxReductionV src1 src2));
19688 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19689 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19690 ins_encode %{
19691 int opcode = this->ideal_Opcode();
19692 int vlen = Matcher::vector_length(this, $src2);
19693 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19694 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19695 %}
19696 ins_pipe( pipe_slow );
19697 %}
19698
19699 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19700 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19701 Matcher::vector_length(n->in(2)) == 2);
19702 match(Set dst (MinReductionV dst src));
19703 match(Set dst (MaxReductionV dst src));
19704 effect(TEMP dst, TEMP xtmp1);
19705 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19706 ins_encode %{
19707 int opcode = this->ideal_Opcode();
19708 int vlen = Matcher::vector_length(this, $src);
19709 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19710 $xtmp1$$XMMRegister);
19711 %}
19712 ins_pipe( pipe_slow );
19713 %}
19714
19715 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19716 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19717 Matcher::vector_length(n->in(2)) >= 4);
19718 match(Set dst (MinReductionV dst src));
19719 match(Set dst (MaxReductionV dst src));
19720 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19721 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19722 ins_encode %{
19723 int opcode = this->ideal_Opcode();
19724 int vlen = Matcher::vector_length(this, $src);
19725 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19726 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19727 %}
19728 ins_pipe( pipe_slow );
19729 %}
19730
19731 //--------------------Min Double Reduction --------------------
19732 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19733 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19734 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19735 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19736 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19737 Matcher::vector_length(n->in(2)) == 2);
19738 match(Set dst (MinReductionV src1 src2));
19739 match(Set dst (MaxReductionV src1 src2));
19740 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19741 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19742 ins_encode %{
19743 assert(UseAVX > 0, "sanity");
19744
19745 int opcode = this->ideal_Opcode();
19746 int vlen = Matcher::vector_length(this, $src2);
19747 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19748 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19749 %}
19750 ins_pipe( pipe_slow );
19751 %}
19752
19753 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19754 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19755 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19756 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19757 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19758 Matcher::vector_length(n->in(2)) >= 4);
19759 match(Set dst (MinReductionV src1 src2));
19760 match(Set dst (MaxReductionV src1 src2));
19761 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19762 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19763 ins_encode %{
19764 assert(UseAVX > 0, "sanity");
19765
19766 int opcode = this->ideal_Opcode();
19767 int vlen = Matcher::vector_length(this, $src2);
19768 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19769 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19770 %}
19771 ins_pipe( pipe_slow );
19772 %}
19773
19774
19775 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19776 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19777 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19778 Matcher::vector_length(n->in(2)) == 2);
19779 match(Set dst (MinReductionV dst src));
19780 match(Set dst (MaxReductionV dst src));
19781 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19782 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19783 ins_encode %{
19784 assert(UseAVX > 0, "sanity");
19785
19786 int opcode = this->ideal_Opcode();
19787 int vlen = Matcher::vector_length(this, $src);
19788 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19789 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19790 %}
19791 ins_pipe( pipe_slow );
19792 %}
19793
19794 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19795 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19796 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19797 Matcher::vector_length(n->in(2)) >= 4);
19798 match(Set dst (MinReductionV dst src));
19799 match(Set dst (MaxReductionV dst src));
19800 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19801 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19802 ins_encode %{
19803 assert(UseAVX > 0, "sanity");
19804
19805 int opcode = this->ideal_Opcode();
19806 int vlen = Matcher::vector_length(this, $src);
19807 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19808 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19809 %}
19810 ins_pipe( pipe_slow );
19811 %}
19812
19813 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19814 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19815 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19816 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19817 Matcher::vector_length(n->in(2)) == 2);
19818 match(Set dst (MinReductionV src1 src2));
19819 match(Set dst (MaxReductionV src1 src2));
19820 effect(TEMP dst, TEMP xtmp1);
19821 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19822 ins_encode %{
19823 int opcode = this->ideal_Opcode();
19824 int vlen = Matcher::vector_length(this, $src2);
19825 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19826 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19827 %}
19828 ins_pipe( pipe_slow );
19829 %}
19830
19831 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19832 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19833 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19834 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19835 Matcher::vector_length(n->in(2)) >= 4);
19836 match(Set dst (MinReductionV src1 src2));
19837 match(Set dst (MaxReductionV src1 src2));
19838 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19839 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19840 ins_encode %{
19841 int opcode = this->ideal_Opcode();
19842 int vlen = Matcher::vector_length(this, $src2);
19843 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19844 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19845 %}
19846 ins_pipe( pipe_slow );
19847 %}
19848
19849
19850 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19851 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19852 Matcher::vector_length(n->in(2)) == 2);
19853 match(Set dst (MinReductionV dst src));
19854 match(Set dst (MaxReductionV dst src));
19855 effect(TEMP dst, TEMP xtmp1);
19856 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19857 ins_encode %{
19858 int opcode = this->ideal_Opcode();
19859 int vlen = Matcher::vector_length(this, $src);
19860 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19861 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19862 %}
19863 ins_pipe( pipe_slow );
19864 %}
19865
19866 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19867 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19868 Matcher::vector_length(n->in(2)) >= 4);
19869 match(Set dst (MinReductionV dst src));
19870 match(Set dst (MaxReductionV dst src));
19871 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19872 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19873 ins_encode %{
19874 int opcode = this->ideal_Opcode();
19875 int vlen = Matcher::vector_length(this, $src);
19876 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19877 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19878 %}
19879 ins_pipe( pipe_slow );
19880 %}
19881
19882 // ====================VECTOR ARITHMETIC=======================================
19883
19884 // --------------------------------- ADD --------------------------------------
19885
19886 // Bytes vector add
19887 instruct vaddB(vec dst, vec src) %{
19888 predicate(UseAVX == 0);
19889 match(Set dst (AddVB dst src));
19890 format %{ "paddb $dst,$src\t! add packedB" %}
19891 ins_encode %{
19892 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19893 %}
19894 ins_pipe( pipe_slow );
19895 %}
19896
19897 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19898 predicate(UseAVX > 0);
19899 match(Set dst (AddVB src1 src2));
19900 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19901 ins_encode %{
19902 int vlen_enc = vector_length_encoding(this);
19903 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19904 %}
19905 ins_pipe( pipe_slow );
19906 %}
19907
19908 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19909 predicate((UseAVX > 0) &&
19910 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19911 match(Set dst (AddVB src (LoadVector mem)));
19912 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19913 ins_encode %{
19914 int vlen_enc = vector_length_encoding(this);
19915 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19916 %}
19917 ins_pipe( pipe_slow );
19918 %}
19919
19920 // Shorts/Chars vector add
19921 instruct vaddS(vec dst, vec src) %{
19922 predicate(UseAVX == 0);
19923 match(Set dst (AddVS dst src));
19924 format %{ "paddw $dst,$src\t! add packedS" %}
19925 ins_encode %{
19926 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19927 %}
19928 ins_pipe( pipe_slow );
19929 %}
19930
19931 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19932 predicate(UseAVX > 0);
19933 match(Set dst (AddVS src1 src2));
19934 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19935 ins_encode %{
19936 int vlen_enc = vector_length_encoding(this);
19937 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19938 %}
19939 ins_pipe( pipe_slow );
19940 %}
19941
19942 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19943 predicate((UseAVX > 0) &&
19944 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19945 match(Set dst (AddVS src (LoadVector mem)));
19946 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19947 ins_encode %{
19948 int vlen_enc = vector_length_encoding(this);
19949 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19950 %}
19951 ins_pipe( pipe_slow );
19952 %}
19953
19954 // Integers vector add
19955 instruct vaddI(vec dst, vec src) %{
19956 predicate(UseAVX == 0);
19957 match(Set dst (AddVI dst src));
19958 format %{ "paddd $dst,$src\t! add packedI" %}
19959 ins_encode %{
19960 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19961 %}
19962 ins_pipe( pipe_slow );
19963 %}
19964
19965 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19966 predicate(UseAVX > 0);
19967 match(Set dst (AddVI src1 src2));
19968 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19969 ins_encode %{
19970 int vlen_enc = vector_length_encoding(this);
19971 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19972 %}
19973 ins_pipe( pipe_slow );
19974 %}
19975
19976
19977 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19978 predicate((UseAVX > 0) &&
19979 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19980 match(Set dst (AddVI src (LoadVector mem)));
19981 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19982 ins_encode %{
19983 int vlen_enc = vector_length_encoding(this);
19984 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19985 %}
19986 ins_pipe( pipe_slow );
19987 %}
19988
19989 // Longs vector add
19990 instruct vaddL(vec dst, vec src) %{
19991 predicate(UseAVX == 0);
19992 match(Set dst (AddVL dst src));
19993 format %{ "paddq $dst,$src\t! add packedL" %}
19994 ins_encode %{
19995 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19996 %}
19997 ins_pipe( pipe_slow );
19998 %}
19999
20000 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20001 predicate(UseAVX > 0);
20002 match(Set dst (AddVL src1 src2));
20003 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20004 ins_encode %{
20005 int vlen_enc = vector_length_encoding(this);
20006 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20007 %}
20008 ins_pipe( pipe_slow );
20009 %}
20010
20011 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20012 predicate((UseAVX > 0) &&
20013 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20014 match(Set dst (AddVL src (LoadVector mem)));
20015 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20016 ins_encode %{
20017 int vlen_enc = vector_length_encoding(this);
20018 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20019 %}
20020 ins_pipe( pipe_slow );
20021 %}
20022
20023 // Floats vector add
20024 instruct vaddF(vec dst, vec src) %{
20025 predicate(UseAVX == 0);
20026 match(Set dst (AddVF dst src));
20027 format %{ "addps $dst,$src\t! add packedF" %}
20028 ins_encode %{
20029 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20030 %}
20031 ins_pipe( pipe_slow );
20032 %}
20033
20034 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20035 predicate(UseAVX > 0);
20036 match(Set dst (AddVF src1 src2));
20037 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20038 ins_encode %{
20039 int vlen_enc = vector_length_encoding(this);
20040 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20041 %}
20042 ins_pipe( pipe_slow );
20043 %}
20044
20045 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20046 predicate((UseAVX > 0) &&
20047 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20048 match(Set dst (AddVF src (LoadVector mem)));
20049 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20050 ins_encode %{
20051 int vlen_enc = vector_length_encoding(this);
20052 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20053 %}
20054 ins_pipe( pipe_slow );
20055 %}
20056
20057 // Doubles vector add
20058 instruct vaddD(vec dst, vec src) %{
20059 predicate(UseAVX == 0);
20060 match(Set dst (AddVD dst src));
20061 format %{ "addpd $dst,$src\t! add packedD" %}
20062 ins_encode %{
20063 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20064 %}
20065 ins_pipe( pipe_slow );
20066 %}
20067
20068 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20069 predicate(UseAVX > 0);
20070 match(Set dst (AddVD src1 src2));
20071 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20072 ins_encode %{
20073 int vlen_enc = vector_length_encoding(this);
20074 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20075 %}
20076 ins_pipe( pipe_slow );
20077 %}
20078
20079 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20080 predicate((UseAVX > 0) &&
20081 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20082 match(Set dst (AddVD src (LoadVector mem)));
20083 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20084 ins_encode %{
20085 int vlen_enc = vector_length_encoding(this);
20086 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20087 %}
20088 ins_pipe( pipe_slow );
20089 %}
20090
20091 // --------------------------------- SUB --------------------------------------
20092
20093 // Bytes vector sub
20094 instruct vsubB(vec dst, vec src) %{
20095 predicate(UseAVX == 0);
20096 match(Set dst (SubVB dst src));
20097 format %{ "psubb $dst,$src\t! sub packedB" %}
20098 ins_encode %{
20099 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20100 %}
20101 ins_pipe( pipe_slow );
20102 %}
20103
20104 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20105 predicate(UseAVX > 0);
20106 match(Set dst (SubVB src1 src2));
20107 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20108 ins_encode %{
20109 int vlen_enc = vector_length_encoding(this);
20110 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20111 %}
20112 ins_pipe( pipe_slow );
20113 %}
20114
20115 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20116 predicate((UseAVX > 0) &&
20117 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20118 match(Set dst (SubVB src (LoadVector mem)));
20119 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20120 ins_encode %{
20121 int vlen_enc = vector_length_encoding(this);
20122 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20123 %}
20124 ins_pipe( pipe_slow );
20125 %}
20126
20127 // Shorts/Chars vector sub
20128 instruct vsubS(vec dst, vec src) %{
20129 predicate(UseAVX == 0);
20130 match(Set dst (SubVS dst src));
20131 format %{ "psubw $dst,$src\t! sub packedS" %}
20132 ins_encode %{
20133 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20134 %}
20135 ins_pipe( pipe_slow );
20136 %}
20137
20138
20139 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20140 predicate(UseAVX > 0);
20141 match(Set dst (SubVS src1 src2));
20142 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20143 ins_encode %{
20144 int vlen_enc = vector_length_encoding(this);
20145 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20146 %}
20147 ins_pipe( pipe_slow );
20148 %}
20149
20150 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20151 predicate((UseAVX > 0) &&
20152 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20153 match(Set dst (SubVS src (LoadVector mem)));
20154 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20155 ins_encode %{
20156 int vlen_enc = vector_length_encoding(this);
20157 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20158 %}
20159 ins_pipe( pipe_slow );
20160 %}
20161
20162 // Integers vector sub
20163 instruct vsubI(vec dst, vec src) %{
20164 predicate(UseAVX == 0);
20165 match(Set dst (SubVI dst src));
20166 format %{ "psubd $dst,$src\t! sub packedI" %}
20167 ins_encode %{
20168 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20169 %}
20170 ins_pipe( pipe_slow );
20171 %}
20172
20173 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20174 predicate(UseAVX > 0);
20175 match(Set dst (SubVI src1 src2));
20176 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20177 ins_encode %{
20178 int vlen_enc = vector_length_encoding(this);
20179 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20180 %}
20181 ins_pipe( pipe_slow );
20182 %}
20183
20184 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20185 predicate((UseAVX > 0) &&
20186 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20187 match(Set dst (SubVI src (LoadVector mem)));
20188 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20189 ins_encode %{
20190 int vlen_enc = vector_length_encoding(this);
20191 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20192 %}
20193 ins_pipe( pipe_slow );
20194 %}
20195
20196 // Longs vector sub
20197 instruct vsubL(vec dst, vec src) %{
20198 predicate(UseAVX == 0);
20199 match(Set dst (SubVL dst src));
20200 format %{ "psubq $dst,$src\t! sub packedL" %}
20201 ins_encode %{
20202 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20203 %}
20204 ins_pipe( pipe_slow );
20205 %}
20206
20207 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20208 predicate(UseAVX > 0);
20209 match(Set dst (SubVL src1 src2));
20210 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20211 ins_encode %{
20212 int vlen_enc = vector_length_encoding(this);
20213 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20214 %}
20215 ins_pipe( pipe_slow );
20216 %}
20217
20218
20219 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20220 predicate((UseAVX > 0) &&
20221 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20222 match(Set dst (SubVL src (LoadVector mem)));
20223 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20224 ins_encode %{
20225 int vlen_enc = vector_length_encoding(this);
20226 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20227 %}
20228 ins_pipe( pipe_slow );
20229 %}
20230
20231 // Floats vector sub
20232 instruct vsubF(vec dst, vec src) %{
20233 predicate(UseAVX == 0);
20234 match(Set dst (SubVF dst src));
20235 format %{ "subps $dst,$src\t! sub packedF" %}
20236 ins_encode %{
20237 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20238 %}
20239 ins_pipe( pipe_slow );
20240 %}
20241
20242 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20243 predicate(UseAVX > 0);
20244 match(Set dst (SubVF src1 src2));
20245 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20246 ins_encode %{
20247 int vlen_enc = vector_length_encoding(this);
20248 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20249 %}
20250 ins_pipe( pipe_slow );
20251 %}
20252
20253 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20254 predicate((UseAVX > 0) &&
20255 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20256 match(Set dst (SubVF src (LoadVector mem)));
20257 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20258 ins_encode %{
20259 int vlen_enc = vector_length_encoding(this);
20260 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20261 %}
20262 ins_pipe( pipe_slow );
20263 %}
20264
20265 // Doubles vector sub
20266 instruct vsubD(vec dst, vec src) %{
20267 predicate(UseAVX == 0);
20268 match(Set dst (SubVD dst src));
20269 format %{ "subpd $dst,$src\t! sub packedD" %}
20270 ins_encode %{
20271 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20272 %}
20273 ins_pipe( pipe_slow );
20274 %}
20275
20276 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20277 predicate(UseAVX > 0);
20278 match(Set dst (SubVD src1 src2));
20279 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20280 ins_encode %{
20281 int vlen_enc = vector_length_encoding(this);
20282 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20283 %}
20284 ins_pipe( pipe_slow );
20285 %}
20286
20287 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20288 predicate((UseAVX > 0) &&
20289 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20290 match(Set dst (SubVD src (LoadVector mem)));
20291 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20292 ins_encode %{
20293 int vlen_enc = vector_length_encoding(this);
20294 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20295 %}
20296 ins_pipe( pipe_slow );
20297 %}
20298
20299 // --------------------------------- MUL --------------------------------------
20300
20301 // Byte vector mul
20302 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20303 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20304 match(Set dst (MulVB src1 src2));
20305 effect(TEMP dst, TEMP xtmp);
20306 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20307 ins_encode %{
20308 assert(UseSSE > 3, "required");
20309 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20310 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20311 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20312 __ psllw($dst$$XMMRegister, 8);
20313 __ psrlw($dst$$XMMRegister, 8);
20314 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20315 %}
20316 ins_pipe( pipe_slow );
20317 %}
20318
20319 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20320 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20321 match(Set dst (MulVB src1 src2));
20322 effect(TEMP dst, TEMP xtmp);
20323 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20324 ins_encode %{
20325 assert(UseSSE > 3, "required");
20326 // Odd-index elements
20327 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20328 __ psrlw($dst$$XMMRegister, 8);
20329 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20330 __ psrlw($xtmp$$XMMRegister, 8);
20331 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20332 __ psllw($dst$$XMMRegister, 8);
20333 // Even-index elements
20334 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20335 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20336 __ psllw($xtmp$$XMMRegister, 8);
20337 __ psrlw($xtmp$$XMMRegister, 8);
20338 // Combine
20339 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20340 %}
20341 ins_pipe( pipe_slow );
20342 %}
20343
20344 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20345 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20346 match(Set dst (MulVB src1 src2));
20347 effect(TEMP xtmp1, TEMP xtmp2);
20348 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20349 ins_encode %{
20350 int vlen_enc = vector_length_encoding(this);
20351 // Odd-index elements
20352 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20353 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20354 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20355 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20356 // Even-index elements
20357 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20358 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20359 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20360 // Combine
20361 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20362 %}
20363 ins_pipe( pipe_slow );
20364 %}
20365
20366 // Shorts/Chars vector mul
20367 instruct vmulS(vec dst, vec src) %{
20368 predicate(UseAVX == 0);
20369 match(Set dst (MulVS dst src));
20370 format %{ "pmullw $dst,$src\t! mul packedS" %}
20371 ins_encode %{
20372 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20373 %}
20374 ins_pipe( pipe_slow );
20375 %}
20376
20377 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20378 predicate(UseAVX > 0);
20379 match(Set dst (MulVS src1 src2));
20380 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20381 ins_encode %{
20382 int vlen_enc = vector_length_encoding(this);
20383 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20384 %}
20385 ins_pipe( pipe_slow );
20386 %}
20387
20388 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20389 predicate((UseAVX > 0) &&
20390 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20391 match(Set dst (MulVS src (LoadVector mem)));
20392 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20393 ins_encode %{
20394 int vlen_enc = vector_length_encoding(this);
20395 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20396 %}
20397 ins_pipe( pipe_slow );
20398 %}
20399
20400 // Integers vector mul
20401 instruct vmulI(vec dst, vec src) %{
20402 predicate(UseAVX == 0);
20403 match(Set dst (MulVI dst src));
20404 format %{ "pmulld $dst,$src\t! mul packedI" %}
20405 ins_encode %{
20406 assert(UseSSE > 3, "required");
20407 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20408 %}
20409 ins_pipe( pipe_slow );
20410 %}
20411
20412 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20413 predicate(UseAVX > 0);
20414 match(Set dst (MulVI src1 src2));
20415 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20416 ins_encode %{
20417 int vlen_enc = vector_length_encoding(this);
20418 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20419 %}
20420 ins_pipe( pipe_slow );
20421 %}
20422
20423 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20424 predicate((UseAVX > 0) &&
20425 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20426 match(Set dst (MulVI src (LoadVector mem)));
20427 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20428 ins_encode %{
20429 int vlen_enc = vector_length_encoding(this);
20430 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20431 %}
20432 ins_pipe( pipe_slow );
20433 %}
20434
20435 // Longs vector mul
20436 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20437 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20438 VM_Version::supports_avx512dq()) ||
20439 VM_Version::supports_avx512vldq());
20440 match(Set dst (MulVL src1 src2));
20441 ins_cost(500);
20442 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20443 ins_encode %{
20444 assert(UseAVX > 2, "required");
20445 int vlen_enc = vector_length_encoding(this);
20446 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20447 %}
20448 ins_pipe( pipe_slow );
20449 %}
20450
20451 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20452 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20453 VM_Version::supports_avx512dq()) ||
20454 (Matcher::vector_length_in_bytes(n) > 8 &&
20455 VM_Version::supports_avx512vldq()));
20456 match(Set dst (MulVL src (LoadVector mem)));
20457 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20458 ins_cost(500);
20459 ins_encode %{
20460 assert(UseAVX > 2, "required");
20461 int vlen_enc = vector_length_encoding(this);
20462 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20468 predicate(UseAVX == 0);
20469 match(Set dst (MulVL src1 src2));
20470 ins_cost(500);
20471 effect(TEMP dst, TEMP xtmp);
20472 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20473 ins_encode %{
20474 assert(VM_Version::supports_sse4_1(), "required");
20475 // Get the lo-hi products, only the lower 32 bits is in concerns
20476 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20477 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20478 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20479 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20480 __ psllq($dst$$XMMRegister, 32);
20481 // Get the lo-lo products
20482 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20483 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20484 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20485 %}
20486 ins_pipe( pipe_slow );
20487 %}
20488
20489 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20490 predicate(UseAVX > 0 &&
20491 ((Matcher::vector_length_in_bytes(n) == 64 &&
20492 !VM_Version::supports_avx512dq()) ||
20493 (Matcher::vector_length_in_bytes(n) < 64 &&
20494 !VM_Version::supports_avx512vldq())));
20495 match(Set dst (MulVL src1 src2));
20496 effect(TEMP xtmp1, TEMP xtmp2);
20497 ins_cost(500);
20498 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20499 ins_encode %{
20500 int vlen_enc = vector_length_encoding(this);
20501 // Get the lo-hi products, only the lower 32 bits is in concerns
20502 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20503 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20504 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20505 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20506 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20507 // Get the lo-lo products
20508 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20509 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20510 %}
20511 ins_pipe( pipe_slow );
20512 %}
20513
20514 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20515 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20516 match(Set dst (MulVL src1 src2));
20517 ins_cost(100);
20518 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20519 ins_encode %{
20520 int vlen_enc = vector_length_encoding(this);
20521 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20522 %}
20523 ins_pipe( pipe_slow );
20524 %}
20525
20526 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20527 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20528 match(Set dst (MulVL src1 src2));
20529 ins_cost(100);
20530 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20531 ins_encode %{
20532 int vlen_enc = vector_length_encoding(this);
20533 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20534 %}
20535 ins_pipe( pipe_slow );
20536 %}
20537
20538 // Floats vector mul
20539 instruct vmulF(vec dst, vec src) %{
20540 predicate(UseAVX == 0);
20541 match(Set dst (MulVF dst src));
20542 format %{ "mulps $dst,$src\t! mul packedF" %}
20543 ins_encode %{
20544 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20545 %}
20546 ins_pipe( pipe_slow );
20547 %}
20548
20549 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20550 predicate(UseAVX > 0);
20551 match(Set dst (MulVF src1 src2));
20552 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20553 ins_encode %{
20554 int vlen_enc = vector_length_encoding(this);
20555 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20556 %}
20557 ins_pipe( pipe_slow );
20558 %}
20559
20560 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20561 predicate((UseAVX > 0) &&
20562 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20563 match(Set dst (MulVF src (LoadVector mem)));
20564 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20565 ins_encode %{
20566 int vlen_enc = vector_length_encoding(this);
20567 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20568 %}
20569 ins_pipe( pipe_slow );
20570 %}
20571
20572 // Doubles vector mul
20573 instruct vmulD(vec dst, vec src) %{
20574 predicate(UseAVX == 0);
20575 match(Set dst (MulVD dst src));
20576 format %{ "mulpd $dst,$src\t! mul packedD" %}
20577 ins_encode %{
20578 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20579 %}
20580 ins_pipe( pipe_slow );
20581 %}
20582
20583 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20584 predicate(UseAVX > 0);
20585 match(Set dst (MulVD src1 src2));
20586 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20587 ins_encode %{
20588 int vlen_enc = vector_length_encoding(this);
20589 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20590 %}
20591 ins_pipe( pipe_slow );
20592 %}
20593
20594 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20595 predicate((UseAVX > 0) &&
20596 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20597 match(Set dst (MulVD src (LoadVector mem)));
20598 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20599 ins_encode %{
20600 int vlen_enc = vector_length_encoding(this);
20601 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20602 %}
20603 ins_pipe( pipe_slow );
20604 %}
20605
20606 // --------------------------------- DIV --------------------------------------
20607
20608 // Floats vector div
20609 instruct vdivF(vec dst, vec src) %{
20610 predicate(UseAVX == 0);
20611 match(Set dst (DivVF dst src));
20612 format %{ "divps $dst,$src\t! div packedF" %}
20613 ins_encode %{
20614 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20615 %}
20616 ins_pipe( pipe_slow );
20617 %}
20618
20619 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20620 predicate(UseAVX > 0);
20621 match(Set dst (DivVF src1 src2));
20622 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20623 ins_encode %{
20624 int vlen_enc = vector_length_encoding(this);
20625 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20626 %}
20627 ins_pipe( pipe_slow );
20628 %}
20629
20630 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20631 predicate((UseAVX > 0) &&
20632 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20633 match(Set dst (DivVF src (LoadVector mem)));
20634 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20635 ins_encode %{
20636 int vlen_enc = vector_length_encoding(this);
20637 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20638 %}
20639 ins_pipe( pipe_slow );
20640 %}
20641
20642 // Doubles vector div
20643 instruct vdivD(vec dst, vec src) %{
20644 predicate(UseAVX == 0);
20645 match(Set dst (DivVD dst src));
20646 format %{ "divpd $dst,$src\t! div packedD" %}
20647 ins_encode %{
20648 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20649 %}
20650 ins_pipe( pipe_slow );
20651 %}
20652
20653 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20654 predicate(UseAVX > 0);
20655 match(Set dst (DivVD src1 src2));
20656 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20657 ins_encode %{
20658 int vlen_enc = vector_length_encoding(this);
20659 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20660 %}
20661 ins_pipe( pipe_slow );
20662 %}
20663
20664 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20665 predicate((UseAVX > 0) &&
20666 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20667 match(Set dst (DivVD src (LoadVector mem)));
20668 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20669 ins_encode %{
20670 int vlen_enc = vector_length_encoding(this);
20671 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20672 %}
20673 ins_pipe( pipe_slow );
20674 %}
20675
20676 // ------------------------------ MinMax ---------------------------------------
20677
20678 // Byte, Short, Int vector Min/Max
20679 instruct minmax_reg_sse(vec dst, vec src) %{
20680 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20681 UseAVX == 0);
20682 match(Set dst (MinV dst src));
20683 match(Set dst (MaxV dst src));
20684 format %{ "vector_minmax $dst,$src\t! " %}
20685 ins_encode %{
20686 assert(UseSSE >= 4, "required");
20687
20688 int opcode = this->ideal_Opcode();
20689 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20690 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20691 %}
20692 ins_pipe( pipe_slow );
20693 %}
20694
20695 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20696 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20697 UseAVX > 0);
20698 match(Set dst (MinV src1 src2));
20699 match(Set dst (MaxV src1 src2));
20700 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20701 ins_encode %{
20702 int opcode = this->ideal_Opcode();
20703 int vlen_enc = vector_length_encoding(this);
20704 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20705
20706 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20707 %}
20708 ins_pipe( pipe_slow );
20709 %}
20710
20711 // Long vector Min/Max
20712 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20713 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20714 UseAVX == 0);
20715 match(Set dst (MinV dst src));
20716 match(Set dst (MaxV src dst));
20717 effect(TEMP dst, TEMP tmp);
20718 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20719 ins_encode %{
20720 assert(UseSSE >= 4, "required");
20721
20722 int opcode = this->ideal_Opcode();
20723 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20724 assert(elem_bt == T_LONG, "sanity");
20725
20726 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20727 %}
20728 ins_pipe( pipe_slow );
20729 %}
20730
20731 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20732 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20733 UseAVX > 0 && !VM_Version::supports_avx512vl());
20734 match(Set dst (MinV src1 src2));
20735 match(Set dst (MaxV src1 src2));
20736 effect(TEMP dst);
20737 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20738 ins_encode %{
20739 int vlen_enc = vector_length_encoding(this);
20740 int opcode = this->ideal_Opcode();
20741 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20742 assert(elem_bt == T_LONG, "sanity");
20743
20744 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20745 %}
20746 ins_pipe( pipe_slow );
20747 %}
20748
20749 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20750 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20751 Matcher::vector_element_basic_type(n) == T_LONG);
20752 match(Set dst (MinV src1 src2));
20753 match(Set dst (MaxV src1 src2));
20754 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20755 ins_encode %{
20756 assert(UseAVX > 2, "required");
20757
20758 int vlen_enc = vector_length_encoding(this);
20759 int opcode = this->ideal_Opcode();
20760 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20761 assert(elem_bt == T_LONG, "sanity");
20762
20763 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20764 %}
20765 ins_pipe( pipe_slow );
20766 %}
20767
20768 // Float/Double vector Min/Max
20769 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20770 predicate(VM_Version::supports_avx10_2() &&
20771 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20772 match(Set dst (MinV a b));
20773 match(Set dst (MaxV a b));
20774 format %{ "vector_minmaxFP $dst, $a, $b" %}
20775 ins_encode %{
20776 int vlen_enc = vector_length_encoding(this);
20777 int opcode = this->ideal_Opcode();
20778 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20779 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20780 %}
20781 ins_pipe( pipe_slow );
20782 %}
20783
20784 // Float/Double vector Min/Max
20785 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20786 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20787 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20788 UseAVX > 0);
20789 match(Set dst (MinV a b));
20790 match(Set dst (MaxV a b));
20791 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20792 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20793 ins_encode %{
20794 assert(UseAVX > 0, "required");
20795
20796 int opcode = this->ideal_Opcode();
20797 int vlen_enc = vector_length_encoding(this);
20798 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20799
20800 __ vminmax_fp(opcode, elem_bt,
20801 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20802 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20803 %}
20804 ins_pipe( pipe_slow );
20805 %}
20806
20807 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20808 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20809 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20810 match(Set dst (MinV a b));
20811 match(Set dst (MaxV a b));
20812 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20813 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20814 ins_encode %{
20815 assert(UseAVX > 2, "required");
20816
20817 int opcode = this->ideal_Opcode();
20818 int vlen_enc = vector_length_encoding(this);
20819 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20820
20821 __ evminmax_fp(opcode, elem_bt,
20822 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20823 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20824 %}
20825 ins_pipe( pipe_slow );
20826 %}
20827
20828 // ------------------------------ Unsigned vector Min/Max ----------------------
20829
20830 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20831 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20832 match(Set dst (UMinV a b));
20833 match(Set dst (UMaxV a b));
20834 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20835 ins_encode %{
20836 int opcode = this->ideal_Opcode();
20837 int vlen_enc = vector_length_encoding(this);
20838 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20839 assert(is_integral_type(elem_bt), "");
20840 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20841 %}
20842 ins_pipe( pipe_slow );
20843 %}
20844
20845 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20846 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20847 match(Set dst (UMinV a (LoadVector b)));
20848 match(Set dst (UMaxV a (LoadVector b)));
20849 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20850 ins_encode %{
20851 int opcode = this->ideal_Opcode();
20852 int vlen_enc = vector_length_encoding(this);
20853 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20854 assert(is_integral_type(elem_bt), "");
20855 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20856 %}
20857 ins_pipe( pipe_slow );
20858 %}
20859
20860 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20861 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20862 match(Set dst (UMinV a b));
20863 match(Set dst (UMaxV a b));
20864 effect(TEMP xtmp1, TEMP xtmp2);
20865 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20866 ins_encode %{
20867 int opcode = this->ideal_Opcode();
20868 int vlen_enc = vector_length_encoding(this);
20869 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20870 %}
20871 ins_pipe( pipe_slow );
20872 %}
20873
20874 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20875 match(Set dst (UMinV (Binary dst src2) mask));
20876 match(Set dst (UMaxV (Binary dst src2) mask));
20877 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20878 ins_encode %{
20879 int vlen_enc = vector_length_encoding(this);
20880 BasicType bt = Matcher::vector_element_basic_type(this);
20881 int opc = this->ideal_Opcode();
20882 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20883 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20884 %}
20885 ins_pipe( pipe_slow );
20886 %}
20887
20888 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20889 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20890 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20891 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20892 ins_encode %{
20893 int vlen_enc = vector_length_encoding(this);
20894 BasicType bt = Matcher::vector_element_basic_type(this);
20895 int opc = this->ideal_Opcode();
20896 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20897 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20898 %}
20899 ins_pipe( pipe_slow );
20900 %}
20901
20902 // --------------------------------- Signum/CopySign ---------------------------
20903
20904 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20905 match(Set dst (SignumF dst (Binary zero one)));
20906 effect(KILL cr);
20907 format %{ "signumF $dst, $dst" %}
20908 ins_encode %{
20909 int opcode = this->ideal_Opcode();
20910 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20911 %}
20912 ins_pipe( pipe_slow );
20913 %}
20914
20915 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20916 match(Set dst (SignumD dst (Binary zero one)));
20917 effect(KILL cr);
20918 format %{ "signumD $dst, $dst" %}
20919 ins_encode %{
20920 int opcode = this->ideal_Opcode();
20921 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20922 %}
20923 ins_pipe( pipe_slow );
20924 %}
20925
20926 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20927 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20928 match(Set dst (SignumVF src (Binary zero one)));
20929 match(Set dst (SignumVD src (Binary zero one)));
20930 effect(TEMP dst, TEMP xtmp1);
20931 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20932 ins_encode %{
20933 int opcode = this->ideal_Opcode();
20934 int vec_enc = vector_length_encoding(this);
20935 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20936 $xtmp1$$XMMRegister, vec_enc);
20937 %}
20938 ins_pipe( pipe_slow );
20939 %}
20940
20941 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20942 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20943 match(Set dst (SignumVF src (Binary zero one)));
20944 match(Set dst (SignumVD src (Binary zero one)));
20945 effect(TEMP dst, TEMP ktmp1);
20946 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20947 ins_encode %{
20948 int opcode = this->ideal_Opcode();
20949 int vec_enc = vector_length_encoding(this);
20950 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20951 $ktmp1$$KRegister, vec_enc);
20952 %}
20953 ins_pipe( pipe_slow );
20954 %}
20955
20956 // ---------------------------------------
20957 // For copySign use 0xE4 as writemask for vpternlog
20958 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20959 // C (xmm2) is set to 0x7FFFFFFF
20960 // Wherever xmm2 is 0, we want to pick from B (sign)
20961 // Wherever xmm2 is 1, we want to pick from A (src)
20962 //
20963 // A B C Result
20964 // 0 0 0 0
20965 // 0 0 1 0
20966 // 0 1 0 1
20967 // 0 1 1 0
20968 // 1 0 0 0
20969 // 1 0 1 1
20970 // 1 1 0 1
20971 // 1 1 1 1
20972 //
20973 // Result going from high bit to low bit is 0x11100100 = 0xe4
20974 // ---------------------------------------
20975
20976 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20977 match(Set dst (CopySignF dst src));
20978 effect(TEMP tmp1, TEMP tmp2);
20979 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20980 ins_encode %{
20981 __ movl($tmp2$$Register, 0x7FFFFFFF);
20982 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20983 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20984 %}
20985 ins_pipe( pipe_slow );
20986 %}
20987
20988 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20989 match(Set dst (CopySignD dst (Binary src zero)));
20990 ins_cost(100);
20991 effect(TEMP tmp1, TEMP tmp2);
20992 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20993 ins_encode %{
20994 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20995 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20996 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20997 %}
20998 ins_pipe( pipe_slow );
20999 %}
21000
21001 //----------------------------- CompressBits/ExpandBits ------------------------
21002
21003 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21004 predicate(n->bottom_type()->isa_int());
21005 match(Set dst (CompressBits src mask));
21006 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21007 ins_encode %{
21008 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21009 %}
21010 ins_pipe( pipe_slow );
21011 %}
21012
21013 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21014 predicate(n->bottom_type()->isa_int());
21015 match(Set dst (ExpandBits src mask));
21016 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21017 ins_encode %{
21018 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21019 %}
21020 ins_pipe( pipe_slow );
21021 %}
21022
21023 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21024 predicate(n->bottom_type()->isa_int());
21025 match(Set dst (CompressBits src (LoadI mask)));
21026 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21027 ins_encode %{
21028 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21029 %}
21030 ins_pipe( pipe_slow );
21031 %}
21032
21033 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21034 predicate(n->bottom_type()->isa_int());
21035 match(Set dst (ExpandBits src (LoadI mask)));
21036 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21037 ins_encode %{
21038 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21039 %}
21040 ins_pipe( pipe_slow );
21041 %}
21042
21043 // --------------------------------- Sqrt --------------------------------------
21044
21045 instruct vsqrtF_reg(vec dst, vec src) %{
21046 match(Set dst (SqrtVF src));
21047 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21048 ins_encode %{
21049 assert(UseAVX > 0, "required");
21050 int vlen_enc = vector_length_encoding(this);
21051 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21052 %}
21053 ins_pipe( pipe_slow );
21054 %}
21055
21056 instruct vsqrtF_mem(vec dst, memory mem) %{
21057 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21058 match(Set dst (SqrtVF (LoadVector mem)));
21059 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21060 ins_encode %{
21061 assert(UseAVX > 0, "required");
21062 int vlen_enc = vector_length_encoding(this);
21063 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21064 %}
21065 ins_pipe( pipe_slow );
21066 %}
21067
21068 // Floating point vector sqrt
21069 instruct vsqrtD_reg(vec dst, vec src) %{
21070 match(Set dst (SqrtVD src));
21071 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21072 ins_encode %{
21073 assert(UseAVX > 0, "required");
21074 int vlen_enc = vector_length_encoding(this);
21075 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21076 %}
21077 ins_pipe( pipe_slow );
21078 %}
21079
21080 instruct vsqrtD_mem(vec dst, memory mem) %{
21081 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21082 match(Set dst (SqrtVD (LoadVector mem)));
21083 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21084 ins_encode %{
21085 assert(UseAVX > 0, "required");
21086 int vlen_enc = vector_length_encoding(this);
21087 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21088 %}
21089 ins_pipe( pipe_slow );
21090 %}
21091
21092 // ------------------------------ Shift ---------------------------------------
21093
21094 // Left and right shift count vectors are the same on x86
21095 // (only lowest bits of xmm reg are used for count).
21096 instruct vshiftcnt(vec dst, rRegI cnt) %{
21097 match(Set dst (LShiftCntV cnt));
21098 match(Set dst (RShiftCntV cnt));
21099 format %{ "movdl $dst,$cnt\t! load shift count" %}
21100 ins_encode %{
21101 __ movdl($dst$$XMMRegister, $cnt$$Register);
21102 %}
21103 ins_pipe( pipe_slow );
21104 %}
21105
21106 // Byte vector shift
21107 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21108 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21109 match(Set dst ( LShiftVB src shift));
21110 match(Set dst ( RShiftVB src shift));
21111 match(Set dst (URShiftVB src shift));
21112 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21113 format %{"vector_byte_shift $dst,$src,$shift" %}
21114 ins_encode %{
21115 assert(UseSSE > 3, "required");
21116 int opcode = this->ideal_Opcode();
21117 bool sign = (opcode != Op_URShiftVB);
21118 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21119 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21120 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21121 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21122 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21123 %}
21124 ins_pipe( pipe_slow );
21125 %}
21126
21127 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21128 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21129 UseAVX <= 1);
21130 match(Set dst ( LShiftVB src shift));
21131 match(Set dst ( RShiftVB src shift));
21132 match(Set dst (URShiftVB src shift));
21133 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21134 format %{"vector_byte_shift $dst,$src,$shift" %}
21135 ins_encode %{
21136 assert(UseSSE > 3, "required");
21137 int opcode = this->ideal_Opcode();
21138 bool sign = (opcode != Op_URShiftVB);
21139 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21140 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21141 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21142 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21143 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21144 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21145 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21146 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21147 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21148 %}
21149 ins_pipe( pipe_slow );
21150 %}
21151
21152 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21153 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21154 UseAVX > 1);
21155 match(Set dst ( LShiftVB src shift));
21156 match(Set dst ( RShiftVB src shift));
21157 match(Set dst (URShiftVB src shift));
21158 effect(TEMP dst, TEMP tmp);
21159 format %{"vector_byte_shift $dst,$src,$shift" %}
21160 ins_encode %{
21161 int opcode = this->ideal_Opcode();
21162 bool sign = (opcode != Op_URShiftVB);
21163 int vlen_enc = Assembler::AVX_256bit;
21164 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21165 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21166 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21167 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21168 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21169 %}
21170 ins_pipe( pipe_slow );
21171 %}
21172
21173 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21174 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21175 match(Set dst ( LShiftVB src shift));
21176 match(Set dst ( RShiftVB src shift));
21177 match(Set dst (URShiftVB src shift));
21178 effect(TEMP dst, TEMP tmp);
21179 format %{"vector_byte_shift $dst,$src,$shift" %}
21180 ins_encode %{
21181 assert(UseAVX > 1, "required");
21182 int opcode = this->ideal_Opcode();
21183 bool sign = (opcode != Op_URShiftVB);
21184 int vlen_enc = Assembler::AVX_256bit;
21185 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21186 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21187 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21188 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21189 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21190 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21191 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21192 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21193 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21194 %}
21195 ins_pipe( pipe_slow );
21196 %}
21197
21198 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21199 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21200 match(Set dst ( LShiftVB src shift));
21201 match(Set dst (RShiftVB src shift));
21202 match(Set dst (URShiftVB src shift));
21203 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21204 format %{"vector_byte_shift $dst,$src,$shift" %}
21205 ins_encode %{
21206 assert(UseAVX > 2, "required");
21207 int opcode = this->ideal_Opcode();
21208 bool sign = (opcode != Op_URShiftVB);
21209 int vlen_enc = Assembler::AVX_512bit;
21210 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21211 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21212 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21213 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21214 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21216 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21217 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21218 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21219 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21220 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21221 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21222 %}
21223 ins_pipe( pipe_slow );
21224 %}
21225
21226 // Shorts vector logical right shift produces incorrect Java result
21227 // for negative data because java code convert short value into int with
21228 // sign extension before a shift. But char vectors are fine since chars are
21229 // unsigned values.
21230 // Shorts/Chars vector left shift
21231 instruct vshiftS(vec dst, vec src, vec shift) %{
21232 predicate(!n->as_ShiftV()->is_var_shift());
21233 match(Set dst ( LShiftVS src shift));
21234 match(Set dst ( RShiftVS src shift));
21235 match(Set dst (URShiftVS src shift));
21236 effect(TEMP dst, USE src, USE shift);
21237 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21238 ins_encode %{
21239 int opcode = this->ideal_Opcode();
21240 if (UseAVX > 0) {
21241 int vlen_enc = vector_length_encoding(this);
21242 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21243 } else {
21244 int vlen = Matcher::vector_length(this);
21245 if (vlen == 2) {
21246 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21247 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21248 } else if (vlen == 4) {
21249 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21250 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21251 } else {
21252 assert (vlen == 8, "sanity");
21253 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21254 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21255 }
21256 }
21257 %}
21258 ins_pipe( pipe_slow );
21259 %}
21260
21261 // Integers vector left shift
21262 instruct vshiftI(vec dst, vec src, vec shift) %{
21263 predicate(!n->as_ShiftV()->is_var_shift());
21264 match(Set dst ( LShiftVI src shift));
21265 match(Set dst ( RShiftVI src shift));
21266 match(Set dst (URShiftVI src shift));
21267 effect(TEMP dst, USE src, USE shift);
21268 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21269 ins_encode %{
21270 int opcode = this->ideal_Opcode();
21271 if (UseAVX > 0) {
21272 int vlen_enc = vector_length_encoding(this);
21273 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21274 } else {
21275 int vlen = Matcher::vector_length(this);
21276 if (vlen == 2) {
21277 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21278 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21279 } else {
21280 assert(vlen == 4, "sanity");
21281 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21282 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21283 }
21284 }
21285 %}
21286 ins_pipe( pipe_slow );
21287 %}
21288
21289 // Integers vector left constant shift
21290 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21291 match(Set dst (LShiftVI src (LShiftCntV shift)));
21292 match(Set dst (RShiftVI src (RShiftCntV shift)));
21293 match(Set dst (URShiftVI src (RShiftCntV shift)));
21294 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21295 ins_encode %{
21296 int opcode = this->ideal_Opcode();
21297 if (UseAVX > 0) {
21298 int vector_len = vector_length_encoding(this);
21299 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21300 } else {
21301 int vlen = Matcher::vector_length(this);
21302 if (vlen == 2) {
21303 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21304 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21305 } else {
21306 assert(vlen == 4, "sanity");
21307 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21308 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21309 }
21310 }
21311 %}
21312 ins_pipe( pipe_slow );
21313 %}
21314
21315 // Longs vector shift
21316 instruct vshiftL(vec dst, vec src, vec shift) %{
21317 predicate(!n->as_ShiftV()->is_var_shift());
21318 match(Set dst ( LShiftVL src shift));
21319 match(Set dst (URShiftVL src shift));
21320 effect(TEMP dst, USE src, USE shift);
21321 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21322 ins_encode %{
21323 int opcode = this->ideal_Opcode();
21324 if (UseAVX > 0) {
21325 int vlen_enc = vector_length_encoding(this);
21326 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21327 } else {
21328 assert(Matcher::vector_length(this) == 2, "");
21329 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21330 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21331 }
21332 %}
21333 ins_pipe( pipe_slow );
21334 %}
21335
21336 // Longs vector constant shift
21337 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21338 match(Set dst (LShiftVL src (LShiftCntV shift)));
21339 match(Set dst (URShiftVL src (RShiftCntV shift)));
21340 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21341 ins_encode %{
21342 int opcode = this->ideal_Opcode();
21343 if (UseAVX > 0) {
21344 int vector_len = vector_length_encoding(this);
21345 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21346 } else {
21347 assert(Matcher::vector_length(this) == 2, "");
21348 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21349 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21350 }
21351 %}
21352 ins_pipe( pipe_slow );
21353 %}
21354
21355 // -------------------ArithmeticRightShift -----------------------------------
21356 // Long vector arithmetic right shift
21357 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21358 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21359 match(Set dst (RShiftVL src shift));
21360 effect(TEMP dst, TEMP tmp);
21361 format %{ "vshiftq $dst,$src,$shift" %}
21362 ins_encode %{
21363 uint vlen = Matcher::vector_length(this);
21364 if (vlen == 2) {
21365 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21366 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21367 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21368 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21369 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21370 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21371 } else {
21372 assert(vlen == 4, "sanity");
21373 assert(UseAVX > 1, "required");
21374 int vlen_enc = Assembler::AVX_256bit;
21375 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21377 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21378 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21379 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380 }
21381 %}
21382 ins_pipe( pipe_slow );
21383 %}
21384
21385 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21386 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21387 match(Set dst (RShiftVL src shift));
21388 format %{ "vshiftq $dst,$src,$shift" %}
21389 ins_encode %{
21390 int vlen_enc = vector_length_encoding(this);
21391 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21392 %}
21393 ins_pipe( pipe_slow );
21394 %}
21395
21396 // ------------------- Variable Shift -----------------------------
21397 // Byte variable shift
21398 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21399 predicate(Matcher::vector_length(n) <= 8 &&
21400 n->as_ShiftV()->is_var_shift() &&
21401 !VM_Version::supports_avx512bw());
21402 match(Set dst ( LShiftVB src shift));
21403 match(Set dst ( RShiftVB src shift));
21404 match(Set dst (URShiftVB src shift));
21405 effect(TEMP dst, TEMP vtmp);
21406 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21407 ins_encode %{
21408 assert(UseAVX >= 2, "required");
21409
21410 int opcode = this->ideal_Opcode();
21411 int vlen_enc = Assembler::AVX_128bit;
21412 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21413 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21414 %}
21415 ins_pipe( pipe_slow );
21416 %}
21417
21418 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21419 predicate(Matcher::vector_length(n) == 16 &&
21420 n->as_ShiftV()->is_var_shift() &&
21421 !VM_Version::supports_avx512bw());
21422 match(Set dst ( LShiftVB src shift));
21423 match(Set dst ( RShiftVB src shift));
21424 match(Set dst (URShiftVB src shift));
21425 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21426 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21427 ins_encode %{
21428 assert(UseAVX >= 2, "required");
21429
21430 int opcode = this->ideal_Opcode();
21431 int vlen_enc = Assembler::AVX_128bit;
21432 // Shift lower half and get word result in dst
21433 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21434
21435 // Shift upper half and get word result in vtmp1
21436 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21437 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21438 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21439
21440 // Merge and down convert the two word results to byte in dst
21441 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21442 %}
21443 ins_pipe( pipe_slow );
21444 %}
21445
21446 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21447 predicate(Matcher::vector_length(n) == 32 &&
21448 n->as_ShiftV()->is_var_shift() &&
21449 !VM_Version::supports_avx512bw());
21450 match(Set dst ( LShiftVB src shift));
21451 match(Set dst ( RShiftVB src shift));
21452 match(Set dst (URShiftVB src shift));
21453 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21454 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21455 ins_encode %{
21456 assert(UseAVX >= 2, "required");
21457
21458 int opcode = this->ideal_Opcode();
21459 int vlen_enc = Assembler::AVX_128bit;
21460 // Process lower 128 bits and get result in dst
21461 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21462 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21463 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21464 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21465 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21466
21467 // Process higher 128 bits and get result in vtmp3
21468 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21469 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21470 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21471 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21472 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21473 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21474 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21475
21476 // Merge the two results in dst
21477 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21478 %}
21479 ins_pipe( pipe_slow );
21480 %}
21481
21482 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21483 predicate(Matcher::vector_length(n) <= 32 &&
21484 n->as_ShiftV()->is_var_shift() &&
21485 VM_Version::supports_avx512bw());
21486 match(Set dst ( LShiftVB src shift));
21487 match(Set dst ( RShiftVB src shift));
21488 match(Set dst (URShiftVB src shift));
21489 effect(TEMP dst, TEMP vtmp);
21490 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21491 ins_encode %{
21492 assert(UseAVX > 2, "required");
21493
21494 int opcode = this->ideal_Opcode();
21495 int vlen_enc = vector_length_encoding(this);
21496 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21497 %}
21498 ins_pipe( pipe_slow );
21499 %}
21500
21501 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21502 predicate(Matcher::vector_length(n) == 64 &&
21503 n->as_ShiftV()->is_var_shift() &&
21504 VM_Version::supports_avx512bw());
21505 match(Set dst ( LShiftVB src shift));
21506 match(Set dst ( RShiftVB src shift));
21507 match(Set dst (URShiftVB src shift));
21508 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21509 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21510 ins_encode %{
21511 assert(UseAVX > 2, "required");
21512
21513 int opcode = this->ideal_Opcode();
21514 int vlen_enc = Assembler::AVX_256bit;
21515 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21516 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21517 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21518 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21519 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21520 %}
21521 ins_pipe( pipe_slow );
21522 %}
21523
21524 // Short variable shift
21525 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21526 predicate(Matcher::vector_length(n) <= 8 &&
21527 n->as_ShiftV()->is_var_shift() &&
21528 !VM_Version::supports_avx512bw());
21529 match(Set dst ( LShiftVS src shift));
21530 match(Set dst ( RShiftVS src shift));
21531 match(Set dst (URShiftVS src shift));
21532 effect(TEMP dst, TEMP vtmp);
21533 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21534 ins_encode %{
21535 assert(UseAVX >= 2, "required");
21536
21537 int opcode = this->ideal_Opcode();
21538 bool sign = (opcode != Op_URShiftVS);
21539 int vlen_enc = Assembler::AVX_256bit;
21540 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21541 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21542 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21543 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21544 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21545 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21546 %}
21547 ins_pipe( pipe_slow );
21548 %}
21549
21550 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21551 predicate(Matcher::vector_length(n) == 16 &&
21552 n->as_ShiftV()->is_var_shift() &&
21553 !VM_Version::supports_avx512bw());
21554 match(Set dst ( LShiftVS src shift));
21555 match(Set dst ( RShiftVS src shift));
21556 match(Set dst (URShiftVS src shift));
21557 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21558 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21559 ins_encode %{
21560 assert(UseAVX >= 2, "required");
21561
21562 int opcode = this->ideal_Opcode();
21563 bool sign = (opcode != Op_URShiftVS);
21564 int vlen_enc = Assembler::AVX_256bit;
21565 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21566 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21567 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21568 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21569 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21570
21571 // Shift upper half, with result in dst using vtmp1 as TEMP
21572 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21573 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21574 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21575 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21576 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21577 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21578
21579 // Merge lower and upper half result into dst
21580 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21581 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21582 %}
21583 ins_pipe( pipe_slow );
21584 %}
21585
21586 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21587 predicate(n->as_ShiftV()->is_var_shift() &&
21588 VM_Version::supports_avx512bw());
21589 match(Set dst ( LShiftVS src shift));
21590 match(Set dst ( RShiftVS src shift));
21591 match(Set dst (URShiftVS src shift));
21592 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21593 ins_encode %{
21594 assert(UseAVX > 2, "required");
21595
21596 int opcode = this->ideal_Opcode();
21597 int vlen_enc = vector_length_encoding(this);
21598 if (!VM_Version::supports_avx512vl()) {
21599 vlen_enc = Assembler::AVX_512bit;
21600 }
21601 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21602 %}
21603 ins_pipe( pipe_slow );
21604 %}
21605
21606 //Integer variable shift
21607 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21608 predicate(n->as_ShiftV()->is_var_shift());
21609 match(Set dst ( LShiftVI src shift));
21610 match(Set dst ( RShiftVI src shift));
21611 match(Set dst (URShiftVI src shift));
21612 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21613 ins_encode %{
21614 assert(UseAVX >= 2, "required");
21615
21616 int opcode = this->ideal_Opcode();
21617 int vlen_enc = vector_length_encoding(this);
21618 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21619 %}
21620 ins_pipe( pipe_slow );
21621 %}
21622
21623 //Long variable shift
21624 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21625 predicate(n->as_ShiftV()->is_var_shift());
21626 match(Set dst ( LShiftVL src shift));
21627 match(Set dst (URShiftVL src shift));
21628 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21629 ins_encode %{
21630 assert(UseAVX >= 2, "required");
21631
21632 int opcode = this->ideal_Opcode();
21633 int vlen_enc = vector_length_encoding(this);
21634 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21635 %}
21636 ins_pipe( pipe_slow );
21637 %}
21638
21639 //Long variable right shift arithmetic
21640 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21641 predicate(Matcher::vector_length(n) <= 4 &&
21642 n->as_ShiftV()->is_var_shift() &&
21643 UseAVX == 2);
21644 match(Set dst (RShiftVL src shift));
21645 effect(TEMP dst, TEMP vtmp);
21646 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21647 ins_encode %{
21648 int opcode = this->ideal_Opcode();
21649 int vlen_enc = vector_length_encoding(this);
21650 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21651 $vtmp$$XMMRegister);
21652 %}
21653 ins_pipe( pipe_slow );
21654 %}
21655
21656 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21657 predicate(n->as_ShiftV()->is_var_shift() &&
21658 UseAVX > 2);
21659 match(Set dst (RShiftVL src shift));
21660 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21661 ins_encode %{
21662 int opcode = this->ideal_Opcode();
21663 int vlen_enc = vector_length_encoding(this);
21664 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21665 %}
21666 ins_pipe( pipe_slow );
21667 %}
21668
21669 // --------------------------------- AND --------------------------------------
21670
21671 instruct vand(vec dst, vec src) %{
21672 predicate(UseAVX == 0);
21673 match(Set dst (AndV dst src));
21674 format %{ "pand $dst,$src\t! and vectors" %}
21675 ins_encode %{
21676 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21677 %}
21678 ins_pipe( pipe_slow );
21679 %}
21680
21681 instruct vand_reg(vec dst, vec src1, vec src2) %{
21682 predicate(UseAVX > 0);
21683 match(Set dst (AndV src1 src2));
21684 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21685 ins_encode %{
21686 int vlen_enc = vector_length_encoding(this);
21687 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21688 %}
21689 ins_pipe( pipe_slow );
21690 %}
21691
21692 instruct vand_mem(vec dst, vec src, memory mem) %{
21693 predicate((UseAVX > 0) &&
21694 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21695 match(Set dst (AndV src (LoadVector mem)));
21696 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21697 ins_encode %{
21698 int vlen_enc = vector_length_encoding(this);
21699 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21700 %}
21701 ins_pipe( pipe_slow );
21702 %}
21703
21704 // --------------------------------- OR ---------------------------------------
21705
21706 instruct vor(vec dst, vec src) %{
21707 predicate(UseAVX == 0);
21708 match(Set dst (OrV dst src));
21709 format %{ "por $dst,$src\t! or vectors" %}
21710 ins_encode %{
21711 __ por($dst$$XMMRegister, $src$$XMMRegister);
21712 %}
21713 ins_pipe( pipe_slow );
21714 %}
21715
21716 instruct vor_reg(vec dst, vec src1, vec src2) %{
21717 predicate(UseAVX > 0);
21718 match(Set dst (OrV src1 src2));
21719 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21720 ins_encode %{
21721 int vlen_enc = vector_length_encoding(this);
21722 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 instruct vor_mem(vec dst, vec src, memory mem) %{
21728 predicate((UseAVX > 0) &&
21729 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21730 match(Set dst (OrV src (LoadVector mem)));
21731 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21732 ins_encode %{
21733 int vlen_enc = vector_length_encoding(this);
21734 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21735 %}
21736 ins_pipe( pipe_slow );
21737 %}
21738
21739 // --------------------------------- XOR --------------------------------------
21740
21741 instruct vxor(vec dst, vec src) %{
21742 predicate(UseAVX == 0);
21743 match(Set dst (XorV dst src));
21744 format %{ "pxor $dst,$src\t! xor vectors" %}
21745 ins_encode %{
21746 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21747 %}
21748 ins_pipe( pipe_slow );
21749 %}
21750
21751 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21752 predicate(UseAVX > 0);
21753 match(Set dst (XorV src1 src2));
21754 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21755 ins_encode %{
21756 int vlen_enc = vector_length_encoding(this);
21757 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21758 %}
21759 ins_pipe( pipe_slow );
21760 %}
21761
21762 instruct vxor_mem(vec dst, vec src, memory mem) %{
21763 predicate((UseAVX > 0) &&
21764 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21765 match(Set dst (XorV src (LoadVector mem)));
21766 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21767 ins_encode %{
21768 int vlen_enc = vector_length_encoding(this);
21769 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21770 %}
21771 ins_pipe( pipe_slow );
21772 %}
21773
21774 // --------------------------------- VectorCast --------------------------------------
21775
21776 instruct vcastBtoX(vec dst, vec src) %{
21777 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21778 match(Set dst (VectorCastB2X src));
21779 format %{ "vector_cast_b2x $dst,$src\t!" %}
21780 ins_encode %{
21781 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21782 int vlen_enc = vector_length_encoding(this);
21783 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21784 %}
21785 ins_pipe( pipe_slow );
21786 %}
21787
21788 instruct vcastBtoD(legVec dst, legVec src) %{
21789 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21790 match(Set dst (VectorCastB2X src));
21791 format %{ "vector_cast_b2x $dst,$src\t!" %}
21792 ins_encode %{
21793 int vlen_enc = vector_length_encoding(this);
21794 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21795 %}
21796 ins_pipe( pipe_slow );
21797 %}
21798
21799 instruct castStoX(vec dst, vec src) %{
21800 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21801 Matcher::vector_length(n->in(1)) <= 8 && // src
21802 Matcher::vector_element_basic_type(n) == T_BYTE);
21803 match(Set dst (VectorCastS2X src));
21804 format %{ "vector_cast_s2x $dst,$src" %}
21805 ins_encode %{
21806 assert(UseAVX > 0, "required");
21807
21808 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21809 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21810 %}
21811 ins_pipe( pipe_slow );
21812 %}
21813
21814 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21815 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21816 Matcher::vector_length(n->in(1)) == 16 && // src
21817 Matcher::vector_element_basic_type(n) == T_BYTE);
21818 effect(TEMP dst, TEMP vtmp);
21819 match(Set dst (VectorCastS2X src));
21820 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21821 ins_encode %{
21822 assert(UseAVX > 0, "required");
21823
21824 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21825 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21826 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21827 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21828 %}
21829 ins_pipe( pipe_slow );
21830 %}
21831
21832 instruct vcastStoX_evex(vec dst, vec src) %{
21833 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21834 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21835 match(Set dst (VectorCastS2X src));
21836 format %{ "vector_cast_s2x $dst,$src\t!" %}
21837 ins_encode %{
21838 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21839 int src_vlen_enc = vector_length_encoding(this, $src);
21840 int vlen_enc = vector_length_encoding(this);
21841 switch (to_elem_bt) {
21842 case T_BYTE:
21843 if (!VM_Version::supports_avx512vl()) {
21844 vlen_enc = Assembler::AVX_512bit;
21845 }
21846 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21847 break;
21848 case T_INT:
21849 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21850 break;
21851 case T_FLOAT:
21852 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21853 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21854 break;
21855 case T_LONG:
21856 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21857 break;
21858 case T_DOUBLE: {
21859 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21860 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21861 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21862 break;
21863 }
21864 default:
21865 ShouldNotReachHere();
21866 }
21867 %}
21868 ins_pipe( pipe_slow );
21869 %}
21870
21871 instruct castItoX(vec dst, vec src) %{
21872 predicate(UseAVX <= 2 &&
21873 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21874 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21875 match(Set dst (VectorCastI2X src));
21876 format %{ "vector_cast_i2x $dst,$src" %}
21877 ins_encode %{
21878 assert(UseAVX > 0, "required");
21879
21880 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21881 int vlen_enc = vector_length_encoding(this, $src);
21882
21883 if (to_elem_bt == T_BYTE) {
21884 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21885 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21886 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21887 } else {
21888 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21889 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21890 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21891 }
21892 %}
21893 ins_pipe( pipe_slow );
21894 %}
21895
21896 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21897 predicate(UseAVX <= 2 &&
21898 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21899 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21900 match(Set dst (VectorCastI2X src));
21901 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21902 effect(TEMP dst, TEMP vtmp);
21903 ins_encode %{
21904 assert(UseAVX > 0, "required");
21905
21906 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21907 int vlen_enc = vector_length_encoding(this, $src);
21908
21909 if (to_elem_bt == T_BYTE) {
21910 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21911 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21912 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21913 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21914 } else {
21915 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21916 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21917 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21918 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21919 }
21920 %}
21921 ins_pipe( pipe_slow );
21922 %}
21923
21924 instruct vcastItoX_evex(vec dst, vec src) %{
21925 predicate(UseAVX > 2 ||
21926 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21927 match(Set dst (VectorCastI2X src));
21928 format %{ "vector_cast_i2x $dst,$src\t!" %}
21929 ins_encode %{
21930 assert(UseAVX > 0, "required");
21931
21932 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21933 int src_vlen_enc = vector_length_encoding(this, $src);
21934 int dst_vlen_enc = vector_length_encoding(this);
21935 switch (dst_elem_bt) {
21936 case T_BYTE:
21937 if (!VM_Version::supports_avx512vl()) {
21938 src_vlen_enc = Assembler::AVX_512bit;
21939 }
21940 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21941 break;
21942 case T_SHORT:
21943 if (!VM_Version::supports_avx512vl()) {
21944 src_vlen_enc = Assembler::AVX_512bit;
21945 }
21946 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21947 break;
21948 case T_FLOAT:
21949 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21950 break;
21951 case T_LONG:
21952 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21953 break;
21954 case T_DOUBLE:
21955 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21956 break;
21957 default:
21958 ShouldNotReachHere();
21959 }
21960 %}
21961 ins_pipe( pipe_slow );
21962 %}
21963
21964 instruct vcastLtoBS(vec dst, vec src) %{
21965 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21966 UseAVX <= 2);
21967 match(Set dst (VectorCastL2X src));
21968 format %{ "vector_cast_l2x $dst,$src" %}
21969 ins_encode %{
21970 assert(UseAVX > 0, "required");
21971
21972 int vlen = Matcher::vector_length_in_bytes(this, $src);
21973 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21974 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21975 : ExternalAddress(vector_int_to_short_mask());
21976 if (vlen <= 16) {
21977 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21978 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21979 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21980 } else {
21981 assert(vlen <= 32, "required");
21982 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21983 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21984 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21985 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21986 }
21987 if (to_elem_bt == T_BYTE) {
21988 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21989 }
21990 %}
21991 ins_pipe( pipe_slow );
21992 %}
21993
21994 instruct vcastLtoX_evex(vec dst, vec src) %{
21995 predicate(UseAVX > 2 ||
21996 (Matcher::vector_element_basic_type(n) == T_INT ||
21997 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21998 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21999 match(Set dst (VectorCastL2X src));
22000 format %{ "vector_cast_l2x $dst,$src\t!" %}
22001 ins_encode %{
22002 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22003 int vlen = Matcher::vector_length_in_bytes(this, $src);
22004 int vlen_enc = vector_length_encoding(this, $src);
22005 switch (to_elem_bt) {
22006 case T_BYTE:
22007 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22008 vlen_enc = Assembler::AVX_512bit;
22009 }
22010 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22011 break;
22012 case T_SHORT:
22013 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22014 vlen_enc = Assembler::AVX_512bit;
22015 }
22016 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22017 break;
22018 case T_INT:
22019 if (vlen == 8) {
22020 if ($dst$$XMMRegister != $src$$XMMRegister) {
22021 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22022 }
22023 } else if (vlen == 16) {
22024 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22025 } else if (vlen == 32) {
22026 if (UseAVX > 2) {
22027 if (!VM_Version::supports_avx512vl()) {
22028 vlen_enc = Assembler::AVX_512bit;
22029 }
22030 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22031 } else {
22032 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22033 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22034 }
22035 } else { // vlen == 64
22036 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037 }
22038 break;
22039 case T_FLOAT:
22040 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22041 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22042 break;
22043 case T_DOUBLE:
22044 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22045 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22046 break;
22047
22048 default: assert(false, "%s", type2name(to_elem_bt));
22049 }
22050 %}
22051 ins_pipe( pipe_slow );
22052 %}
22053
22054 instruct vcastFtoD_reg(vec dst, vec src) %{
22055 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22056 match(Set dst (VectorCastF2X src));
22057 format %{ "vector_cast_f2d $dst,$src\t!" %}
22058 ins_encode %{
22059 int vlen_enc = vector_length_encoding(this);
22060 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22061 %}
22062 ins_pipe( pipe_slow );
22063 %}
22064
22065
22066 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22067 predicate(!VM_Version::supports_avx10_2() &&
22068 !VM_Version::supports_avx512vl() &&
22069 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22070 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22071 is_integral_type(Matcher::vector_element_basic_type(n)));
22072 match(Set dst (VectorCastF2X src));
22073 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22074 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22075 ins_encode %{
22076 int vlen_enc = vector_length_encoding(this, $src);
22077 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22078 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22079 // 32 bit addresses for register indirect addressing mode since stub constants
22080 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22081 // However, targets are free to increase this limit, but having a large code cache size
22082 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22083 // cap we save a temporary register allocation which in limiting case can prevent
22084 // spilling in high register pressure blocks.
22085 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22086 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22087 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22088 %}
22089 ins_pipe( pipe_slow );
22090 %}
22091
22092 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22093 predicate(!VM_Version::supports_avx10_2() &&
22094 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22095 is_integral_type(Matcher::vector_element_basic_type(n)));
22096 match(Set dst (VectorCastF2X src));
22097 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22098 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22099 ins_encode %{
22100 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22101 if (to_elem_bt == T_LONG) {
22102 int vlen_enc = vector_length_encoding(this);
22103 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22104 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22105 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22106 } else {
22107 int vlen_enc = vector_length_encoding(this, $src);
22108 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22109 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22110 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22111 }
22112 %}
22113 ins_pipe( pipe_slow );
22114 %}
22115
22116 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22117 predicate(VM_Version::supports_avx10_2() &&
22118 is_integral_type(Matcher::vector_element_basic_type(n)));
22119 match(Set dst (VectorCastF2X src));
22120 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22121 ins_encode %{
22122 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22123 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22124 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22125 %}
22126 ins_pipe( pipe_slow );
22127 %}
22128
22129 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22130 predicate(VM_Version::supports_avx10_2() &&
22131 is_integral_type(Matcher::vector_element_basic_type(n)));
22132 match(Set dst (VectorCastF2X (LoadVector src)));
22133 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22134 ins_encode %{
22135 int vlen = Matcher::vector_length(this);
22136 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22137 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22138 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22139 %}
22140 ins_pipe( pipe_slow );
22141 %}
22142
22143 instruct vcastDtoF_reg(vec dst, vec src) %{
22144 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22145 match(Set dst (VectorCastD2X src));
22146 format %{ "vector_cast_d2x $dst,$src\t!" %}
22147 ins_encode %{
22148 int vlen_enc = vector_length_encoding(this, $src);
22149 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22150 %}
22151 ins_pipe( pipe_slow );
22152 %}
22153
22154 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22155 predicate(!VM_Version::supports_avx10_2() &&
22156 !VM_Version::supports_avx512vl() &&
22157 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22158 is_integral_type(Matcher::vector_element_basic_type(n)));
22159 match(Set dst (VectorCastD2X src));
22160 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22161 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22162 ins_encode %{
22163 int vlen_enc = vector_length_encoding(this, $src);
22164 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22165 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22166 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22167 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22168 %}
22169 ins_pipe( pipe_slow );
22170 %}
22171
22172 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22173 predicate(!VM_Version::supports_avx10_2() &&
22174 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22175 is_integral_type(Matcher::vector_element_basic_type(n)));
22176 match(Set dst (VectorCastD2X src));
22177 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22178 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22179 ins_encode %{
22180 int vlen_enc = vector_length_encoding(this, $src);
22181 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22182 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22183 ExternalAddress(vector_float_signflip());
22184 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22185 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22186 %}
22187 ins_pipe( pipe_slow );
22188 %}
22189
22190 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22191 predicate(VM_Version::supports_avx10_2() &&
22192 is_integral_type(Matcher::vector_element_basic_type(n)));
22193 match(Set dst (VectorCastD2X src));
22194 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22195 ins_encode %{
22196 int vlen_enc = vector_length_encoding(this, $src);
22197 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22198 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22199 %}
22200 ins_pipe( pipe_slow );
22201 %}
22202
22203 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22204 predicate(VM_Version::supports_avx10_2() &&
22205 is_integral_type(Matcher::vector_element_basic_type(n)));
22206 match(Set dst (VectorCastD2X (LoadVector src)));
22207 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22208 ins_encode %{
22209 int vlen = Matcher::vector_length(this);
22210 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22211 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22212 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22213 %}
22214 ins_pipe( pipe_slow );
22215 %}
22216
22217 instruct vucast(vec dst, vec src) %{
22218 match(Set dst (VectorUCastB2X src));
22219 match(Set dst (VectorUCastS2X src));
22220 match(Set dst (VectorUCastI2X src));
22221 format %{ "vector_ucast $dst,$src\t!" %}
22222 ins_encode %{
22223 assert(UseAVX > 0, "required");
22224
22225 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22226 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22227 int vlen_enc = vector_length_encoding(this);
22228 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22229 %}
22230 ins_pipe( pipe_slow );
22231 %}
22232
22233 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22234 predicate(!VM_Version::supports_avx512vl() &&
22235 Matcher::vector_length_in_bytes(n) < 64 &&
22236 Matcher::vector_element_basic_type(n) == T_INT);
22237 match(Set dst (RoundVF src));
22238 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22239 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22240 ins_encode %{
22241 int vlen_enc = vector_length_encoding(this);
22242 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22243 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22244 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22245 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22246 %}
22247 ins_pipe( pipe_slow );
22248 %}
22249
22250 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22251 predicate((VM_Version::supports_avx512vl() ||
22252 Matcher::vector_length_in_bytes(n) == 64) &&
22253 Matcher::vector_element_basic_type(n) == T_INT);
22254 match(Set dst (RoundVF src));
22255 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22256 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22257 ins_encode %{
22258 int vlen_enc = vector_length_encoding(this);
22259 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22260 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22261 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22262 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22263 %}
22264 ins_pipe( pipe_slow );
22265 %}
22266
22267 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22268 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22269 match(Set dst (RoundVD src));
22270 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22271 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22272 ins_encode %{
22273 int vlen_enc = vector_length_encoding(this);
22274 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22275 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22276 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22277 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22278 %}
22279 ins_pipe( pipe_slow );
22280 %}
22281
22282 // --------------------------------- VectorMaskCmp --------------------------------------
22283
22284 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22285 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22286 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22287 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22288 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22289 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22290 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22291 ins_encode %{
22292 int vlen_enc = vector_length_encoding(this, $src1);
22293 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22294 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22295 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22296 } else {
22297 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22298 }
22299 %}
22300 ins_pipe( pipe_slow );
22301 %}
22302
22303 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22304 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22305 n->bottom_type()->isa_vectmask() == nullptr &&
22306 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22307 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22308 effect(TEMP ktmp);
22309 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22310 ins_encode %{
22311 int vlen_enc = Assembler::AVX_512bit;
22312 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22313 KRegister mask = k0; // The comparison itself is not being masked.
22314 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22315 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22316 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22317 } else {
22318 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22319 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22320 }
22321 %}
22322 ins_pipe( pipe_slow );
22323 %}
22324
22325 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22326 predicate(n->bottom_type()->isa_vectmask() &&
22327 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22328 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22329 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22330 ins_encode %{
22331 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22332 int vlen_enc = vector_length_encoding(this, $src1);
22333 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22334 KRegister mask = k0; // The comparison itself is not being masked.
22335 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22336 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22337 } else {
22338 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22339 }
22340 %}
22341 ins_pipe( pipe_slow );
22342 %}
22343
22344 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22345 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22346 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22347 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22348 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22349 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22350 (n->in(2)->get_int() == BoolTest::eq ||
22351 n->in(2)->get_int() == BoolTest::lt ||
22352 n->in(2)->get_int() == BoolTest::gt)); // cond
22353 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22354 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22355 ins_encode %{
22356 int vlen_enc = vector_length_encoding(this, $src1);
22357 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22358 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22359 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22360 %}
22361 ins_pipe( pipe_slow );
22362 %}
22363
22364 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22365 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22366 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22367 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22368 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22369 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22370 (n->in(2)->get_int() == BoolTest::ne ||
22371 n->in(2)->get_int() == BoolTest::le ||
22372 n->in(2)->get_int() == BoolTest::ge)); // cond
22373 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22374 effect(TEMP dst, TEMP xtmp);
22375 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22376 ins_encode %{
22377 int vlen_enc = vector_length_encoding(this, $src1);
22378 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22379 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22380 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22381 %}
22382 ins_pipe( pipe_slow );
22383 %}
22384
22385 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22386 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22387 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22388 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22389 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22390 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22391 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22392 effect(TEMP dst, TEMP xtmp);
22393 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22394 ins_encode %{
22395 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22396 int vlen_enc = vector_length_encoding(this, $src1);
22397 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22398 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22399
22400 if (vlen_enc == Assembler::AVX_128bit) {
22401 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22402 } else {
22403 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22404 }
22405 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22406 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22407 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22408 %}
22409 ins_pipe( pipe_slow );
22410 %}
22411
22412 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22413 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22414 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22415 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22416 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22417 effect(TEMP ktmp);
22418 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22419 ins_encode %{
22420 assert(UseAVX > 2, "required");
22421
22422 int vlen_enc = vector_length_encoding(this, $src1);
22423 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22424 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22425 KRegister mask = k0; // The comparison itself is not being masked.
22426 bool merge = false;
22427 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22428
22429 switch (src1_elem_bt) {
22430 case T_INT: {
22431 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22432 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22433 break;
22434 }
22435 case T_LONG: {
22436 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22437 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22438 break;
22439 }
22440 default: assert(false, "%s", type2name(src1_elem_bt));
22441 }
22442 %}
22443 ins_pipe( pipe_slow );
22444 %}
22445
22446
22447 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22448 predicate(n->bottom_type()->isa_vectmask() &&
22449 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22450 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22451 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22452 ins_encode %{
22453 assert(UseAVX > 2, "required");
22454 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22455
22456 int vlen_enc = vector_length_encoding(this, $src1);
22457 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22458 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22459 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22460
22461 // Comparison i
22462 switch (src1_elem_bt) {
22463 case T_BYTE: {
22464 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22465 break;
22466 }
22467 case T_SHORT: {
22468 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22469 break;
22470 }
22471 case T_INT: {
22472 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22473 break;
22474 }
22475 case T_LONG: {
22476 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22477 break;
22478 }
22479 default: assert(false, "%s", type2name(src1_elem_bt));
22480 }
22481 %}
22482 ins_pipe( pipe_slow );
22483 %}
22484
22485 // Extract
22486
22487 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22488 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22489 match(Set dst (ExtractI src idx));
22490 match(Set dst (ExtractS src idx));
22491 match(Set dst (ExtractB src idx));
22492 format %{ "extractI $dst,$src,$idx\t!" %}
22493 ins_encode %{
22494 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22495
22496 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22497 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22498 %}
22499 ins_pipe( pipe_slow );
22500 %}
22501
22502 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22503 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22504 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22505 match(Set dst (ExtractI src idx));
22506 match(Set dst (ExtractS src idx));
22507 match(Set dst (ExtractB src idx));
22508 effect(TEMP vtmp);
22509 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22510 ins_encode %{
22511 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22512
22513 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22514 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22515 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22516 %}
22517 ins_pipe( pipe_slow );
22518 %}
22519
22520 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22521 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22522 match(Set dst (ExtractL src idx));
22523 format %{ "extractL $dst,$src,$idx\t!" %}
22524 ins_encode %{
22525 assert(UseSSE >= 4, "required");
22526 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22527
22528 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22529 %}
22530 ins_pipe( pipe_slow );
22531 %}
22532
22533 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22534 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22535 Matcher::vector_length(n->in(1)) == 8); // src
22536 match(Set dst (ExtractL src idx));
22537 effect(TEMP vtmp);
22538 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22539 ins_encode %{
22540 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22541
22542 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22543 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22544 %}
22545 ins_pipe( pipe_slow );
22546 %}
22547
22548 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22549 predicate(Matcher::vector_length(n->in(1)) <= 4);
22550 match(Set dst (ExtractF src idx));
22551 effect(TEMP dst, TEMP vtmp);
22552 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22553 ins_encode %{
22554 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22555
22556 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22557 %}
22558 ins_pipe( pipe_slow );
22559 %}
22560
22561 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22562 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22563 Matcher::vector_length(n->in(1)/*src*/) == 16);
22564 match(Set dst (ExtractF src idx));
22565 effect(TEMP vtmp);
22566 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22567 ins_encode %{
22568 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22569
22570 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22571 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22572 %}
22573 ins_pipe( pipe_slow );
22574 %}
22575
22576 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22577 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22578 match(Set dst (ExtractD src idx));
22579 format %{ "extractD $dst,$src,$idx\t!" %}
22580 ins_encode %{
22581 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22582
22583 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22584 %}
22585 ins_pipe( pipe_slow );
22586 %}
22587
22588 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22589 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22590 Matcher::vector_length(n->in(1)) == 8); // src
22591 match(Set dst (ExtractD src idx));
22592 effect(TEMP vtmp);
22593 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22594 ins_encode %{
22595 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22596
22597 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22598 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22599 %}
22600 ins_pipe( pipe_slow );
22601 %}
22602
22603 // --------------------------------- Vector Blend --------------------------------------
22604
22605 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22606 predicate(UseAVX == 0);
22607 match(Set dst (VectorBlend (Binary dst src) mask));
22608 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22609 effect(TEMP tmp);
22610 ins_encode %{
22611 assert(UseSSE >= 4, "required");
22612
22613 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22614 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22615 }
22616 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22617 %}
22618 ins_pipe( pipe_slow );
22619 %}
22620
22621 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22622 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22623 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22624 Matcher::vector_length_in_bytes(n) <= 32 &&
22625 is_integral_type(Matcher::vector_element_basic_type(n)));
22626 match(Set dst (VectorBlend (Binary src1 src2) mask));
22627 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22628 ins_encode %{
22629 int vlen_enc = vector_length_encoding(this);
22630 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22631 %}
22632 ins_pipe( pipe_slow );
22633 %}
22634
22635 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22636 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22637 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22638 Matcher::vector_length_in_bytes(n) <= 32 &&
22639 !is_integral_type(Matcher::vector_element_basic_type(n)));
22640 match(Set dst (VectorBlend (Binary src1 src2) mask));
22641 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22642 ins_encode %{
22643 int vlen_enc = vector_length_encoding(this);
22644 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22645 %}
22646 ins_pipe( pipe_slow );
22647 %}
22648
22649 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22650 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22651 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22652 Matcher::vector_length_in_bytes(n) <= 32);
22653 match(Set dst (VectorBlend (Binary src1 src2) mask));
22654 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22655 effect(TEMP vtmp, TEMP dst);
22656 ins_encode %{
22657 int vlen_enc = vector_length_encoding(this);
22658 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22659 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22660 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22661 %}
22662 ins_pipe( pipe_slow );
22663 %}
22664
22665 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22666 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22667 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22668 match(Set dst (VectorBlend (Binary src1 src2) mask));
22669 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22670 effect(TEMP ktmp);
22671 ins_encode %{
22672 int vlen_enc = Assembler::AVX_512bit;
22673 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22674 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22675 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22676 %}
22677 ins_pipe( pipe_slow );
22678 %}
22679
22680
22681 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22682 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22683 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22684 VM_Version::supports_avx512bw()));
22685 match(Set dst (VectorBlend (Binary src1 src2) mask));
22686 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22687 ins_encode %{
22688 int vlen_enc = vector_length_encoding(this);
22689 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22690 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22691 %}
22692 ins_pipe( pipe_slow );
22693 %}
22694
22695 // --------------------------------- ABS --------------------------------------
22696 // a = |a|
22697 instruct vabsB_reg(vec dst, vec src) %{
22698 match(Set dst (AbsVB src));
22699 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22700 ins_encode %{
22701 uint vlen = Matcher::vector_length(this);
22702 if (vlen <= 16) {
22703 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22704 } else {
22705 int vlen_enc = vector_length_encoding(this);
22706 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22707 }
22708 %}
22709 ins_pipe( pipe_slow );
22710 %}
22711
22712 instruct vabsS_reg(vec dst, vec src) %{
22713 match(Set dst (AbsVS src));
22714 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22715 ins_encode %{
22716 uint vlen = Matcher::vector_length(this);
22717 if (vlen <= 8) {
22718 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22719 } else {
22720 int vlen_enc = vector_length_encoding(this);
22721 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22722 }
22723 %}
22724 ins_pipe( pipe_slow );
22725 %}
22726
22727 instruct vabsI_reg(vec dst, vec src) %{
22728 match(Set dst (AbsVI src));
22729 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22730 ins_encode %{
22731 uint vlen = Matcher::vector_length(this);
22732 if (vlen <= 4) {
22733 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22734 } else {
22735 int vlen_enc = vector_length_encoding(this);
22736 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22737 }
22738 %}
22739 ins_pipe( pipe_slow );
22740 %}
22741
22742 instruct vabsL_reg(vec dst, vec src) %{
22743 match(Set dst (AbsVL src));
22744 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22745 ins_encode %{
22746 assert(UseAVX > 2, "required");
22747 int vlen_enc = vector_length_encoding(this);
22748 if (!VM_Version::supports_avx512vl()) {
22749 vlen_enc = Assembler::AVX_512bit;
22750 }
22751 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22752 %}
22753 ins_pipe( pipe_slow );
22754 %}
22755
22756 // --------------------------------- ABSNEG --------------------------------------
22757
22758 instruct vabsnegF(vec dst, vec src) %{
22759 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22760 match(Set dst (AbsVF src));
22761 match(Set dst (NegVF src));
22762 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22763 ins_cost(150);
22764 ins_encode %{
22765 int opcode = this->ideal_Opcode();
22766 int vlen = Matcher::vector_length(this);
22767 if (vlen == 2) {
22768 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22769 } else {
22770 assert(vlen == 8 || vlen == 16, "required");
22771 int vlen_enc = vector_length_encoding(this);
22772 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22773 }
22774 %}
22775 ins_pipe( pipe_slow );
22776 %}
22777
22778 instruct vabsneg4F(vec dst) %{
22779 predicate(Matcher::vector_length(n) == 4);
22780 match(Set dst (AbsVF dst));
22781 match(Set dst (NegVF dst));
22782 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22783 ins_cost(150);
22784 ins_encode %{
22785 int opcode = this->ideal_Opcode();
22786 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22787 %}
22788 ins_pipe( pipe_slow );
22789 %}
22790
22791 instruct vabsnegD(vec dst, vec src) %{
22792 match(Set dst (AbsVD src));
22793 match(Set dst (NegVD src));
22794 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22795 ins_encode %{
22796 int opcode = this->ideal_Opcode();
22797 uint vlen = Matcher::vector_length(this);
22798 if (vlen == 2) {
22799 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22800 } else {
22801 int vlen_enc = vector_length_encoding(this);
22802 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22803 }
22804 %}
22805 ins_pipe( pipe_slow );
22806 %}
22807
22808 //------------------------------------- VectorTest --------------------------------------------
22809
22810 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22811 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22812 match(Set cr (VectorTest src1 src2));
22813 effect(TEMP vtmp);
22814 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22815 ins_encode %{
22816 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22817 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22818 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22819 %}
22820 ins_pipe( pipe_slow );
22821 %}
22822
22823 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22824 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22825 match(Set cr (VectorTest src1 src2));
22826 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22827 ins_encode %{
22828 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22829 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22830 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22831 %}
22832 ins_pipe( pipe_slow );
22833 %}
22834
22835 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22836 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22837 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22838 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22839 match(Set cr (VectorTest src1 src2));
22840 effect(TEMP tmp);
22841 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22842 ins_encode %{
22843 uint masklen = Matcher::vector_length(this, $src1);
22844 __ kmovwl($tmp$$Register, $src1$$KRegister);
22845 __ andl($tmp$$Register, (1 << masklen) - 1);
22846 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22847 %}
22848 ins_pipe( pipe_slow );
22849 %}
22850
22851 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22852 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22853 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22854 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22855 match(Set cr (VectorTest src1 src2));
22856 effect(TEMP tmp);
22857 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22858 ins_encode %{
22859 uint masklen = Matcher::vector_length(this, $src1);
22860 __ kmovwl($tmp$$Register, $src1$$KRegister);
22861 __ andl($tmp$$Register, (1 << masklen) - 1);
22862 %}
22863 ins_pipe( pipe_slow );
22864 %}
22865
22866 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22867 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22868 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22869 match(Set cr (VectorTest src1 src2));
22870 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22871 ins_encode %{
22872 uint masklen = Matcher::vector_length(this, $src1);
22873 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22874 %}
22875 ins_pipe( pipe_slow );
22876 %}
22877
22878 //------------------------------------- LoadMask --------------------------------------------
22879
22880 instruct loadMask(legVec dst, legVec src) %{
22881 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22882 match(Set dst (VectorLoadMask src));
22883 effect(TEMP dst);
22884 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22885 ins_encode %{
22886 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22887 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22888 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22889 %}
22890 ins_pipe( pipe_slow );
22891 %}
22892
22893 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22894 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22895 match(Set dst (VectorLoadMask src));
22896 effect(TEMP xtmp);
22897 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22898 ins_encode %{
22899 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22900 true, Assembler::AVX_512bit);
22901 %}
22902 ins_pipe( pipe_slow );
22903 %}
22904
22905 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22906 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22907 match(Set dst (VectorLoadMask src));
22908 effect(TEMP xtmp);
22909 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22910 ins_encode %{
22911 int vlen_enc = vector_length_encoding(in(1));
22912 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22913 false, vlen_enc);
22914 %}
22915 ins_pipe( pipe_slow );
22916 %}
22917
22918 //------------------------------------- StoreMask --------------------------------------------
22919
22920 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22921 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22922 match(Set dst (VectorStoreMask src size));
22923 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22924 ins_encode %{
22925 int vlen = Matcher::vector_length(this);
22926 if (vlen <= 16 && UseAVX <= 2) {
22927 assert(UseSSE >= 3, "required");
22928 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22929 } else {
22930 assert(UseAVX > 0, "required");
22931 int src_vlen_enc = vector_length_encoding(this, $src);
22932 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22933 }
22934 %}
22935 ins_pipe( pipe_slow );
22936 %}
22937
22938 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22939 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22940 match(Set dst (VectorStoreMask src size));
22941 effect(TEMP_DEF dst, TEMP xtmp);
22942 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22943 ins_encode %{
22944 int vlen_enc = Assembler::AVX_128bit;
22945 int vlen = Matcher::vector_length(this);
22946 if (vlen <= 8) {
22947 assert(UseSSE >= 3, "required");
22948 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22949 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22950 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22951 } else {
22952 assert(UseAVX > 0, "required");
22953 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22954 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22955 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22956 }
22957 %}
22958 ins_pipe( pipe_slow );
22959 %}
22960
22961 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22962 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22963 match(Set dst (VectorStoreMask src size));
22964 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22965 effect(TEMP_DEF dst, TEMP xtmp);
22966 ins_encode %{
22967 int vlen_enc = Assembler::AVX_128bit;
22968 int vlen = Matcher::vector_length(this);
22969 if (vlen <= 4) {
22970 assert(UseSSE >= 3, "required");
22971 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22972 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22973 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22974 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22975 } else {
22976 assert(UseAVX > 0, "required");
22977 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22978 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22979 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22980 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22981 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22982 }
22983 %}
22984 ins_pipe( pipe_slow );
22985 %}
22986
22987 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22988 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22989 match(Set dst (VectorStoreMask src size));
22990 effect(TEMP_DEF dst, TEMP xtmp);
22991 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22992 ins_encode %{
22993 assert(UseSSE >= 3, "required");
22994 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22995 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22996 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22997 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22998 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22999 %}
23000 ins_pipe( pipe_slow );
23001 %}
23002
23003 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23004 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23005 match(Set dst (VectorStoreMask src size));
23006 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23007 effect(TEMP_DEF dst, TEMP vtmp);
23008 ins_encode %{
23009 int vlen_enc = Assembler::AVX_128bit;
23010 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23011 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23012 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23013 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23014 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23015 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23016 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23017 %}
23018 ins_pipe( pipe_slow );
23019 %}
23020
23021 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23022 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23023 match(Set dst (VectorStoreMask src size));
23024 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23025 ins_encode %{
23026 int src_vlen_enc = vector_length_encoding(this, $src);
23027 int dst_vlen_enc = vector_length_encoding(this);
23028 if (!VM_Version::supports_avx512vl()) {
23029 src_vlen_enc = Assembler::AVX_512bit;
23030 }
23031 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23032 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23033 %}
23034 ins_pipe( pipe_slow );
23035 %}
23036
23037 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23038 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23039 match(Set dst (VectorStoreMask src size));
23040 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23041 ins_encode %{
23042 int src_vlen_enc = vector_length_encoding(this, $src);
23043 int dst_vlen_enc = vector_length_encoding(this);
23044 if (!VM_Version::supports_avx512vl()) {
23045 src_vlen_enc = Assembler::AVX_512bit;
23046 }
23047 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23048 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23049 %}
23050 ins_pipe( pipe_slow );
23051 %}
23052
23053 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23054 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23055 match(Set dst (VectorStoreMask mask size));
23056 effect(TEMP_DEF dst);
23057 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23058 ins_encode %{
23059 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23060 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23061 false, Assembler::AVX_512bit, noreg);
23062 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23063 %}
23064 ins_pipe( pipe_slow );
23065 %}
23066
23067 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23068 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23069 match(Set dst (VectorStoreMask mask size));
23070 effect(TEMP_DEF dst);
23071 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23072 ins_encode %{
23073 int dst_vlen_enc = vector_length_encoding(this);
23074 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23075 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23076 %}
23077 ins_pipe( pipe_slow );
23078 %}
23079
23080 instruct vmaskcast_evex(kReg dst) %{
23081 match(Set dst (VectorMaskCast dst));
23082 ins_cost(0);
23083 format %{ "vector_mask_cast $dst" %}
23084 ins_encode %{
23085 // empty
23086 %}
23087 ins_pipe(empty);
23088 %}
23089
23090 instruct vmaskcast(vec dst) %{
23091 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23092 match(Set dst (VectorMaskCast dst));
23093 ins_cost(0);
23094 format %{ "vector_mask_cast $dst" %}
23095 ins_encode %{
23096 // empty
23097 %}
23098 ins_pipe(empty);
23099 %}
23100
23101 instruct vmaskcast_avx(vec dst, vec src) %{
23102 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23103 match(Set dst (VectorMaskCast src));
23104 format %{ "vector_mask_cast $dst, $src" %}
23105 ins_encode %{
23106 int vlen = Matcher::vector_length(this);
23107 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23108 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23109 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23110 %}
23111 ins_pipe(pipe_slow);
23112 %}
23113
23114 //-------------------------------- Load Iota Indices ----------------------------------
23115
23116 instruct loadIotaIndices(vec dst, immI_0 src) %{
23117 match(Set dst (VectorLoadConst src));
23118 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23119 ins_encode %{
23120 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23121 BasicType bt = Matcher::vector_element_basic_type(this);
23122 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23123 %}
23124 ins_pipe( pipe_slow );
23125 %}
23126
23127 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23128 match(Set dst (PopulateIndex src1 src2));
23129 effect(TEMP dst, TEMP vtmp);
23130 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23131 ins_encode %{
23132 assert($src2$$constant == 1, "required");
23133 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23134 int vlen_enc = vector_length_encoding(this);
23135 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23136 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23137 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23138 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23139 %}
23140 ins_pipe( pipe_slow );
23141 %}
23142
23143 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23144 match(Set dst (PopulateIndex src1 src2));
23145 effect(TEMP dst, TEMP vtmp);
23146 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23147 ins_encode %{
23148 assert($src2$$constant == 1, "required");
23149 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23150 int vlen_enc = vector_length_encoding(this);
23151 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23152 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23153 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23154 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23155 %}
23156 ins_pipe( pipe_slow );
23157 %}
23158
23159 //-------------------------------- Rearrange ----------------------------------
23160
23161 // LoadShuffle/Rearrange for Byte
23162 instruct rearrangeB(vec dst, vec shuffle) %{
23163 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23164 Matcher::vector_length(n) < 32);
23165 match(Set dst (VectorRearrange dst shuffle));
23166 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23167 ins_encode %{
23168 assert(UseSSE >= 4, "required");
23169 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23175 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23176 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23177 match(Set dst (VectorRearrange src shuffle));
23178 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23179 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23180 ins_encode %{
23181 assert(UseAVX >= 2, "required");
23182 // Swap src into vtmp1
23183 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23184 // Shuffle swapped src to get entries from other 128 bit lane
23185 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23186 // Shuffle original src to get entries from self 128 bit lane
23187 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23188 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23189 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23190 // Perform the blend
23191 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23192 %}
23193 ins_pipe( pipe_slow );
23194 %}
23195
23196
23197 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23198 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23199 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23200 match(Set dst (VectorRearrange src shuffle));
23201 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23202 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23203 ins_encode %{
23204 int vlen_enc = vector_length_encoding(this);
23205 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23206 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23207 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23208 %}
23209 ins_pipe( pipe_slow );
23210 %}
23211
23212 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23213 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23214 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23215 match(Set dst (VectorRearrange src shuffle));
23216 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23217 ins_encode %{
23218 int vlen_enc = vector_length_encoding(this);
23219 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23220 %}
23221 ins_pipe( pipe_slow );
23222 %}
23223
23224 // LoadShuffle/Rearrange for Short
23225
23226 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23227 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23228 !VM_Version::supports_avx512bw());
23229 match(Set dst (VectorLoadShuffle src));
23230 effect(TEMP dst, TEMP vtmp);
23231 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23232 ins_encode %{
23233 // Create a byte shuffle mask from short shuffle mask
23234 // only byte shuffle instruction available on these platforms
23235 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23236 if (UseAVX == 0) {
23237 assert(vlen_in_bytes <= 16, "required");
23238 // Multiply each shuffle by two to get byte index
23239 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23240 __ psllw($vtmp$$XMMRegister, 1);
23241
23242 // Duplicate to create 2 copies of byte index
23243 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23244 __ psllw($dst$$XMMRegister, 8);
23245 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23246
23247 // Add one to get alternate byte index
23248 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23249 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23250 } else {
23251 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23252 int vlen_enc = vector_length_encoding(this);
23253 // Multiply each shuffle by two to get byte index
23254 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23255
23256 // Duplicate to create 2 copies of byte index
23257 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23258 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23259
23260 // Add one to get alternate byte index
23261 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23262 }
23263 %}
23264 ins_pipe( pipe_slow );
23265 %}
23266
23267 instruct rearrangeS(vec dst, vec shuffle) %{
23268 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23269 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23270 match(Set dst (VectorRearrange dst shuffle));
23271 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23272 ins_encode %{
23273 assert(UseSSE >= 4, "required");
23274 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23275 %}
23276 ins_pipe( pipe_slow );
23277 %}
23278
23279 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23280 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23281 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23282 match(Set dst (VectorRearrange src shuffle));
23283 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23284 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23285 ins_encode %{
23286 assert(UseAVX >= 2, "required");
23287 // Swap src into vtmp1
23288 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23289 // Shuffle swapped src to get entries from other 128 bit lane
23290 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23291 // Shuffle original src to get entries from self 128 bit lane
23292 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23293 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23294 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23295 // Perform the blend
23296 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23297 %}
23298 ins_pipe( pipe_slow );
23299 %}
23300
23301 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23302 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23303 VM_Version::supports_avx512bw());
23304 match(Set dst (VectorRearrange src shuffle));
23305 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23306 ins_encode %{
23307 int vlen_enc = vector_length_encoding(this);
23308 if (!VM_Version::supports_avx512vl()) {
23309 vlen_enc = Assembler::AVX_512bit;
23310 }
23311 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23312 %}
23313 ins_pipe( pipe_slow );
23314 %}
23315
23316 // LoadShuffle/Rearrange for Integer and Float
23317
23318 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23319 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23320 Matcher::vector_length(n) == 4 && UseAVX == 0);
23321 match(Set dst (VectorLoadShuffle src));
23322 effect(TEMP dst, TEMP vtmp);
23323 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23324 ins_encode %{
23325 assert(UseSSE >= 4, "required");
23326
23327 // Create a byte shuffle mask from int shuffle mask
23328 // only byte shuffle instruction available on these platforms
23329
23330 // Duplicate and multiply each shuffle by 4
23331 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23332 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23333 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23334 __ psllw($vtmp$$XMMRegister, 2);
23335
23336 // Duplicate again to create 4 copies of byte index
23337 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23338 __ psllw($dst$$XMMRegister, 8);
23339 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23340
23341 // Add 3,2,1,0 to get alternate byte index
23342 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23343 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23344 %}
23345 ins_pipe( pipe_slow );
23346 %}
23347
23348 instruct rearrangeI(vec dst, vec shuffle) %{
23349 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23350 UseAVX == 0);
23351 match(Set dst (VectorRearrange dst shuffle));
23352 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23353 ins_encode %{
23354 assert(UseSSE >= 4, "required");
23355 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23356 %}
23357 ins_pipe( pipe_slow );
23358 %}
23359
23360 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23361 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23362 UseAVX > 0);
23363 match(Set dst (VectorRearrange src shuffle));
23364 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23365 ins_encode %{
23366 int vlen_enc = vector_length_encoding(this);
23367 BasicType bt = Matcher::vector_element_basic_type(this);
23368 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23369 %}
23370 ins_pipe( pipe_slow );
23371 %}
23372
23373 // LoadShuffle/Rearrange for Long and Double
23374
23375 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23376 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23377 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23378 match(Set dst (VectorLoadShuffle src));
23379 effect(TEMP dst, TEMP vtmp);
23380 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23381 ins_encode %{
23382 assert(UseAVX >= 2, "required");
23383
23384 int vlen_enc = vector_length_encoding(this);
23385 // Create a double word shuffle mask from long shuffle mask
23386 // only double word shuffle instruction available on these platforms
23387
23388 // Multiply each shuffle by two to get double word index
23389 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23390
23391 // Duplicate each double word shuffle
23392 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23393 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23394
23395 // Add one to get alternate double word index
23396 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23397 %}
23398 ins_pipe( pipe_slow );
23399 %}
23400
23401 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23402 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23403 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23404 match(Set dst (VectorRearrange src shuffle));
23405 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23406 ins_encode %{
23407 assert(UseAVX >= 2, "required");
23408
23409 int vlen_enc = vector_length_encoding(this);
23410 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23411 %}
23412 ins_pipe( pipe_slow );
23413 %}
23414
23415 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23416 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23417 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23418 match(Set dst (VectorRearrange src shuffle));
23419 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23420 ins_encode %{
23421 assert(UseAVX > 2, "required");
23422
23423 int vlen_enc = vector_length_encoding(this);
23424 if (vlen_enc == Assembler::AVX_128bit) {
23425 vlen_enc = Assembler::AVX_256bit;
23426 }
23427 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23428 %}
23429 ins_pipe( pipe_slow );
23430 %}
23431
23432 // --------------------------------- FMA --------------------------------------
23433 // a * b + c
23434
23435 instruct vfmaF_reg(vec a, vec b, vec c) %{
23436 match(Set c (FmaVF c (Binary a b)));
23437 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23438 ins_cost(150);
23439 ins_encode %{
23440 assert(UseFMA, "not enabled");
23441 int vlen_enc = vector_length_encoding(this);
23442 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23443 %}
23444 ins_pipe( pipe_slow );
23445 %}
23446
23447 instruct vfmaF_mem(vec a, memory b, vec c) %{
23448 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23449 match(Set c (FmaVF c (Binary a (LoadVector b))));
23450 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23451 ins_cost(150);
23452 ins_encode %{
23453 assert(UseFMA, "not enabled");
23454 int vlen_enc = vector_length_encoding(this);
23455 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23456 %}
23457 ins_pipe( pipe_slow );
23458 %}
23459
23460 instruct vfmaD_reg(vec a, vec b, vec c) %{
23461 match(Set c (FmaVD c (Binary a b)));
23462 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23463 ins_cost(150);
23464 ins_encode %{
23465 assert(UseFMA, "not enabled");
23466 int vlen_enc = vector_length_encoding(this);
23467 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23468 %}
23469 ins_pipe( pipe_slow );
23470 %}
23471
23472 instruct vfmaD_mem(vec a, memory b, vec c) %{
23473 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23474 match(Set c (FmaVD c (Binary a (LoadVector b))));
23475 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23476 ins_cost(150);
23477 ins_encode %{
23478 assert(UseFMA, "not enabled");
23479 int vlen_enc = vector_length_encoding(this);
23480 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23481 %}
23482 ins_pipe( pipe_slow );
23483 %}
23484
23485 // --------------------------------- Vector Multiply Add --------------------------------------
23486
23487 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23488 predicate(UseAVX == 0);
23489 match(Set dst (MulAddVS2VI dst src1));
23490 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23491 ins_encode %{
23492 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23493 %}
23494 ins_pipe( pipe_slow );
23495 %}
23496
23497 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23498 predicate(UseAVX > 0);
23499 match(Set dst (MulAddVS2VI src1 src2));
23500 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23501 ins_encode %{
23502 int vlen_enc = vector_length_encoding(this);
23503 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23504 %}
23505 ins_pipe( pipe_slow );
23506 %}
23507
23508 // --------------------------------- Vector Multiply Add Add ----------------------------------
23509
23510 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23511 predicate(VM_Version::supports_avx512_vnni());
23512 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23513 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23514 ins_encode %{
23515 assert(UseAVX > 2, "required");
23516 int vlen_enc = vector_length_encoding(this);
23517 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23518 %}
23519 ins_pipe( pipe_slow );
23520 ins_cost(10);
23521 %}
23522
23523 // --------------------------------- PopCount --------------------------------------
23524
23525 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23526 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23527 match(Set dst (PopCountVI src));
23528 match(Set dst (PopCountVL src));
23529 format %{ "vector_popcount_integral $dst, $src" %}
23530 ins_encode %{
23531 int opcode = this->ideal_Opcode();
23532 int vlen_enc = vector_length_encoding(this, $src);
23533 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23534 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23535 %}
23536 ins_pipe( pipe_slow );
23537 %}
23538
23539 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23540 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23541 match(Set dst (PopCountVI src mask));
23542 match(Set dst (PopCountVL src mask));
23543 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23544 ins_encode %{
23545 int vlen_enc = vector_length_encoding(this, $src);
23546 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23547 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23548 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23549 %}
23550 ins_pipe( pipe_slow );
23551 %}
23552
23553 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23554 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23555 match(Set dst (PopCountVI src));
23556 match(Set dst (PopCountVL src));
23557 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23558 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23559 ins_encode %{
23560 int opcode = this->ideal_Opcode();
23561 int vlen_enc = vector_length_encoding(this, $src);
23562 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23563 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23564 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23565 %}
23566 ins_pipe( pipe_slow );
23567 %}
23568
23569 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23570
23571 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23572 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23573 Matcher::vector_length_in_bytes(n->in(1))));
23574 match(Set dst (CountTrailingZerosV src));
23575 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23576 ins_cost(400);
23577 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23578 ins_encode %{
23579 int vlen_enc = vector_length_encoding(this, $src);
23580 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23581 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23582 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23583 %}
23584 ins_pipe( pipe_slow );
23585 %}
23586
23587 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23588 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23589 VM_Version::supports_avx512cd() &&
23590 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23591 match(Set dst (CountTrailingZerosV src));
23592 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23593 ins_cost(400);
23594 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23595 ins_encode %{
23596 int vlen_enc = vector_length_encoding(this, $src);
23597 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23598 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23599 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23600 %}
23601 ins_pipe( pipe_slow );
23602 %}
23603
23604 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23605 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23606 match(Set dst (CountTrailingZerosV src));
23607 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23608 ins_cost(400);
23609 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23610 ins_encode %{
23611 int vlen_enc = vector_length_encoding(this, $src);
23612 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23613 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23614 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23615 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23616 %}
23617 ins_pipe( pipe_slow );
23618 %}
23619
23620 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23621 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23622 match(Set dst (CountTrailingZerosV src));
23623 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23624 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23625 ins_encode %{
23626 int vlen_enc = vector_length_encoding(this, $src);
23627 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23628 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23629 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23630 %}
23631 ins_pipe( pipe_slow );
23632 %}
23633
23634
23635 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23636
23637 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23638 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23639 effect(TEMP dst);
23640 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23641 ins_encode %{
23642 int vector_len = vector_length_encoding(this);
23643 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23644 %}
23645 ins_pipe( pipe_slow );
23646 %}
23647
23648 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23649 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23650 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23651 effect(TEMP dst);
23652 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23653 ins_encode %{
23654 int vector_len = vector_length_encoding(this);
23655 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23656 %}
23657 ins_pipe( pipe_slow );
23658 %}
23659
23660 // --------------------------------- Rotation Operations ----------------------------------
23661 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23662 match(Set dst (RotateLeftV src shift));
23663 match(Set dst (RotateRightV src shift));
23664 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23665 ins_encode %{
23666 int opcode = this->ideal_Opcode();
23667 int vector_len = vector_length_encoding(this);
23668 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23669 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23670 %}
23671 ins_pipe( pipe_slow );
23672 %}
23673
23674 instruct vprorate(vec dst, vec src, vec shift) %{
23675 match(Set dst (RotateLeftV src shift));
23676 match(Set dst (RotateRightV src shift));
23677 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23678 ins_encode %{
23679 int opcode = this->ideal_Opcode();
23680 int vector_len = vector_length_encoding(this);
23681 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23682 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23683 %}
23684 ins_pipe( pipe_slow );
23685 %}
23686
23687 // ---------------------------------- Masked Operations ------------------------------------
23688 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23689 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23690 match(Set dst (LoadVectorMasked mem mask));
23691 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23692 ins_encode %{
23693 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23694 int vlen_enc = vector_length_encoding(this);
23695 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23696 %}
23697 ins_pipe( pipe_slow );
23698 %}
23699
23700
23701 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23702 predicate(n->in(3)->bottom_type()->isa_vectmask());
23703 match(Set dst (LoadVectorMasked mem mask));
23704 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23705 ins_encode %{
23706 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23707 int vector_len = vector_length_encoding(this);
23708 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23709 %}
23710 ins_pipe( pipe_slow );
23711 %}
23712
23713 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23714 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23715 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23716 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23717 ins_encode %{
23718 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23719 int vlen_enc = vector_length_encoding(src_node);
23720 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23721 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23722 %}
23723 ins_pipe( pipe_slow );
23724 %}
23725
23726 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23727 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23728 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23729 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23730 ins_encode %{
23731 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23732 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23733 int vlen_enc = vector_length_encoding(src_node);
23734 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23735 %}
23736 ins_pipe( pipe_slow );
23737 %}
23738
23739 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23740 match(Set addr (VerifyVectorAlignment addr mask));
23741 effect(KILL cr);
23742 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23743 ins_encode %{
23744 Label Lskip;
23745 // check if masked bits of addr are zero
23746 __ testq($addr$$Register, $mask$$constant);
23747 __ jccb(Assembler::equal, Lskip);
23748 __ stop("verify_vector_alignment found a misaligned vector memory access");
23749 __ bind(Lskip);
23750 %}
23751 ins_pipe(pipe_slow);
23752 %}
23753
23754 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23755 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23756 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23757 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23758 ins_encode %{
23759 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23760 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23761
23762 Label DONE;
23763 int vlen_enc = vector_length_encoding(this, $src1);
23764 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23765
23766 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23767 __ mov64($dst$$Register, -1L);
23768 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23769 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23770 __ jccb(Assembler::carrySet, DONE);
23771 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23772 __ notq($dst$$Register);
23773 __ tzcntq($dst$$Register, $dst$$Register);
23774 __ bind(DONE);
23775 %}
23776 ins_pipe( pipe_slow );
23777 %}
23778
23779
23780 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23781 match(Set dst (VectorMaskGen len));
23782 effect(TEMP temp, KILL cr);
23783 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23784 ins_encode %{
23785 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23786 %}
23787 ins_pipe( pipe_slow );
23788 %}
23789
23790 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23791 match(Set dst (VectorMaskGen len));
23792 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23793 effect(TEMP temp);
23794 ins_encode %{
23795 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23796 __ kmovql($dst$$KRegister, $temp$$Register);
23797 %}
23798 ins_pipe( pipe_slow );
23799 %}
23800
23801 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23802 predicate(n->in(1)->bottom_type()->isa_vectmask());
23803 match(Set dst (VectorMaskToLong mask));
23804 effect(TEMP dst, KILL cr);
23805 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23806 ins_encode %{
23807 int opcode = this->ideal_Opcode();
23808 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23809 int mask_len = Matcher::vector_length(this, $mask);
23810 int mask_size = mask_len * type2aelembytes(mbt);
23811 int vlen_enc = vector_length_encoding(this, $mask);
23812 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23813 $dst$$Register, mask_len, mask_size, vlen_enc);
23814 %}
23815 ins_pipe( pipe_slow );
23816 %}
23817
23818 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23819 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23820 match(Set dst (VectorMaskToLong mask));
23821 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23822 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23823 ins_encode %{
23824 int opcode = this->ideal_Opcode();
23825 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23826 int mask_len = Matcher::vector_length(this, $mask);
23827 int vlen_enc = vector_length_encoding(this, $mask);
23828 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23829 $dst$$Register, mask_len, mbt, vlen_enc);
23830 %}
23831 ins_pipe( pipe_slow );
23832 %}
23833
23834 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23835 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23836 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23837 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23838 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23839 ins_encode %{
23840 int opcode = this->ideal_Opcode();
23841 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23842 int mask_len = Matcher::vector_length(this, $mask);
23843 int vlen_enc = vector_length_encoding(this, $mask);
23844 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23845 $dst$$Register, mask_len, mbt, vlen_enc);
23846 %}
23847 ins_pipe( pipe_slow );
23848 %}
23849
23850 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23851 predicate(n->in(1)->bottom_type()->isa_vectmask());
23852 match(Set dst (VectorMaskTrueCount mask));
23853 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23854 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23855 ins_encode %{
23856 int opcode = this->ideal_Opcode();
23857 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23858 int mask_len = Matcher::vector_length(this, $mask);
23859 int mask_size = mask_len * type2aelembytes(mbt);
23860 int vlen_enc = vector_length_encoding(this, $mask);
23861 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23862 $tmp$$Register, mask_len, mask_size, vlen_enc);
23863 %}
23864 ins_pipe( pipe_slow );
23865 %}
23866
23867 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23868 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23869 match(Set dst (VectorMaskTrueCount mask));
23870 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23871 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23872 ins_encode %{
23873 int opcode = this->ideal_Opcode();
23874 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23875 int mask_len = Matcher::vector_length(this, $mask);
23876 int vlen_enc = vector_length_encoding(this, $mask);
23877 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23878 $tmp$$Register, mask_len, mbt, vlen_enc);
23879 %}
23880 ins_pipe( pipe_slow );
23881 %}
23882
23883 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23884 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23885 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23886 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23887 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23888 ins_encode %{
23889 int opcode = this->ideal_Opcode();
23890 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23891 int mask_len = Matcher::vector_length(this, $mask);
23892 int vlen_enc = vector_length_encoding(this, $mask);
23893 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23894 $tmp$$Register, mask_len, mbt, vlen_enc);
23895 %}
23896 ins_pipe( pipe_slow );
23897 %}
23898
23899 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23900 predicate(n->in(1)->bottom_type()->isa_vectmask());
23901 match(Set dst (VectorMaskFirstTrue mask));
23902 match(Set dst (VectorMaskLastTrue mask));
23903 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23904 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23905 ins_encode %{
23906 int opcode = this->ideal_Opcode();
23907 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23908 int mask_len = Matcher::vector_length(this, $mask);
23909 int mask_size = mask_len * type2aelembytes(mbt);
23910 int vlen_enc = vector_length_encoding(this, $mask);
23911 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23912 $tmp$$Register, mask_len, mask_size, vlen_enc);
23913 %}
23914 ins_pipe( pipe_slow );
23915 %}
23916
23917 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23918 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23919 match(Set dst (VectorMaskFirstTrue mask));
23920 match(Set dst (VectorMaskLastTrue mask));
23921 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23922 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23923 ins_encode %{
23924 int opcode = this->ideal_Opcode();
23925 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23926 int mask_len = Matcher::vector_length(this, $mask);
23927 int vlen_enc = vector_length_encoding(this, $mask);
23928 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23929 $tmp$$Register, mask_len, mbt, vlen_enc);
23930 %}
23931 ins_pipe( pipe_slow );
23932 %}
23933
23934 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23935 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23936 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23937 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23938 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23939 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23940 ins_encode %{
23941 int opcode = this->ideal_Opcode();
23942 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23943 int mask_len = Matcher::vector_length(this, $mask);
23944 int vlen_enc = vector_length_encoding(this, $mask);
23945 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23946 $tmp$$Register, mask_len, mbt, vlen_enc);
23947 %}
23948 ins_pipe( pipe_slow );
23949 %}
23950
23951 // --------------------------------- Compress/Expand Operations ---------------------------
23952 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23953 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23954 match(Set dst (CompressV src mask));
23955 match(Set dst (ExpandV src mask));
23956 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23957 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23958 ins_encode %{
23959 int opcode = this->ideal_Opcode();
23960 int vlen_enc = vector_length_encoding(this);
23961 BasicType bt = Matcher::vector_element_basic_type(this);
23962 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23963 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23964 %}
23965 ins_pipe( pipe_slow );
23966 %}
23967
23968 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23969 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23970 match(Set dst (CompressV src mask));
23971 match(Set dst (ExpandV src mask));
23972 format %{ "vector_compress_expand $dst, $src, $mask" %}
23973 ins_encode %{
23974 int opcode = this->ideal_Opcode();
23975 int vector_len = vector_length_encoding(this);
23976 BasicType bt = Matcher::vector_element_basic_type(this);
23977 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23978 %}
23979 ins_pipe( pipe_slow );
23980 %}
23981
23982 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23983 match(Set dst (CompressM mask));
23984 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23985 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23986 ins_encode %{
23987 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23988 int mask_len = Matcher::vector_length(this);
23989 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23990 %}
23991 ins_pipe( pipe_slow );
23992 %}
23993
23994 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23995
23996 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23997 predicate(!VM_Version::supports_gfni());
23998 match(Set dst (ReverseV src));
23999 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24000 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24001 ins_encode %{
24002 int vec_enc = vector_length_encoding(this);
24003 BasicType bt = Matcher::vector_element_basic_type(this);
24004 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24005 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24006 %}
24007 ins_pipe( pipe_slow );
24008 %}
24009
24010 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24011 predicate(VM_Version::supports_gfni());
24012 match(Set dst (ReverseV src));
24013 effect(TEMP dst, TEMP xtmp);
24014 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24015 ins_encode %{
24016 int vec_enc = vector_length_encoding(this);
24017 BasicType bt = Matcher::vector_element_basic_type(this);
24018 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24019 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24020 $xtmp$$XMMRegister);
24021 %}
24022 ins_pipe( pipe_slow );
24023 %}
24024
24025 instruct vreverse_byte_reg(vec dst, vec src) %{
24026 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24027 match(Set dst (ReverseBytesV src));
24028 effect(TEMP dst);
24029 format %{ "vector_reverse_byte $dst, $src" %}
24030 ins_encode %{
24031 int vec_enc = vector_length_encoding(this);
24032 BasicType bt = Matcher::vector_element_basic_type(this);
24033 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24034 %}
24035 ins_pipe( pipe_slow );
24036 %}
24037
24038 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24039 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24040 match(Set dst (ReverseBytesV src));
24041 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24042 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24043 ins_encode %{
24044 int vec_enc = vector_length_encoding(this);
24045 BasicType bt = Matcher::vector_element_basic_type(this);
24046 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24047 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24048 %}
24049 ins_pipe( pipe_slow );
24050 %}
24051
24052 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24053
24054 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24055 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24056 Matcher::vector_length_in_bytes(n->in(1))));
24057 match(Set dst (CountLeadingZerosV src));
24058 format %{ "vector_count_leading_zeros $dst, $src" %}
24059 ins_encode %{
24060 int vlen_enc = vector_length_encoding(this, $src);
24061 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24062 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24063 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24064 %}
24065 ins_pipe( pipe_slow );
24066 %}
24067
24068 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24069 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24070 Matcher::vector_length_in_bytes(n->in(1))));
24071 match(Set dst (CountLeadingZerosV src mask));
24072 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24073 ins_encode %{
24074 int vlen_enc = vector_length_encoding(this, $src);
24075 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24076 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24077 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24078 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24079 %}
24080 ins_pipe( pipe_slow );
24081 %}
24082
24083 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24084 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24085 VM_Version::supports_avx512cd() &&
24086 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24087 match(Set dst (CountLeadingZerosV src));
24088 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24089 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24090 ins_encode %{
24091 int vlen_enc = vector_length_encoding(this, $src);
24092 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24093 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24094 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24095 %}
24096 ins_pipe( pipe_slow );
24097 %}
24098
24099 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24100 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24101 match(Set dst (CountLeadingZerosV src));
24102 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24103 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24104 ins_encode %{
24105 int vlen_enc = vector_length_encoding(this, $src);
24106 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24107 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24108 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24109 $rtmp$$Register, true, vlen_enc);
24110 %}
24111 ins_pipe( pipe_slow );
24112 %}
24113
24114 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24115 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24116 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24117 match(Set dst (CountLeadingZerosV src));
24118 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24119 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24120 ins_encode %{
24121 int vlen_enc = vector_length_encoding(this, $src);
24122 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24123 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24124 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24125 %}
24126 ins_pipe( pipe_slow );
24127 %}
24128
24129 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24130 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24131 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24132 match(Set dst (CountLeadingZerosV src));
24133 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24134 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24135 ins_encode %{
24136 int vlen_enc = vector_length_encoding(this, $src);
24137 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24138 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24139 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24140 %}
24141 ins_pipe( pipe_slow );
24142 %}
24143
24144 // ---------------------------------- Vector Masked Operations ------------------------------------
24145
24146 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24147 match(Set dst (AddVB (Binary dst src2) mask));
24148 match(Set dst (AddVS (Binary dst src2) mask));
24149 match(Set dst (AddVI (Binary dst src2) mask));
24150 match(Set dst (AddVL (Binary dst src2) mask));
24151 match(Set dst (AddVF (Binary dst src2) mask));
24152 match(Set dst (AddVD (Binary dst src2) mask));
24153 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24154 ins_encode %{
24155 int vlen_enc = vector_length_encoding(this);
24156 BasicType bt = Matcher::vector_element_basic_type(this);
24157 int opc = this->ideal_Opcode();
24158 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24159 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24160 %}
24161 ins_pipe( pipe_slow );
24162 %}
24163
24164 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24165 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24166 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24167 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24168 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24169 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24170 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24171 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24172 ins_encode %{
24173 int vlen_enc = vector_length_encoding(this);
24174 BasicType bt = Matcher::vector_element_basic_type(this);
24175 int opc = this->ideal_Opcode();
24176 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24177 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24178 %}
24179 ins_pipe( pipe_slow );
24180 %}
24181
24182 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24183 match(Set dst (XorV (Binary dst src2) mask));
24184 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24185 ins_encode %{
24186 int vlen_enc = vector_length_encoding(this);
24187 BasicType bt = Matcher::vector_element_basic_type(this);
24188 int opc = this->ideal_Opcode();
24189 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24190 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24191 %}
24192 ins_pipe( pipe_slow );
24193 %}
24194
24195 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24196 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24197 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24198 ins_encode %{
24199 int vlen_enc = vector_length_encoding(this);
24200 BasicType bt = Matcher::vector_element_basic_type(this);
24201 int opc = this->ideal_Opcode();
24202 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24203 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24204 %}
24205 ins_pipe( pipe_slow );
24206 %}
24207
24208 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24209 match(Set dst (OrV (Binary dst src2) mask));
24210 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24211 ins_encode %{
24212 int vlen_enc = vector_length_encoding(this);
24213 BasicType bt = Matcher::vector_element_basic_type(this);
24214 int opc = this->ideal_Opcode();
24215 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24216 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24217 %}
24218 ins_pipe( pipe_slow );
24219 %}
24220
24221 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24222 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24223 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24224 ins_encode %{
24225 int vlen_enc = vector_length_encoding(this);
24226 BasicType bt = Matcher::vector_element_basic_type(this);
24227 int opc = this->ideal_Opcode();
24228 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24229 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24230 %}
24231 ins_pipe( pipe_slow );
24232 %}
24233
24234 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24235 match(Set dst (AndV (Binary dst src2) mask));
24236 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24237 ins_encode %{
24238 int vlen_enc = vector_length_encoding(this);
24239 BasicType bt = Matcher::vector_element_basic_type(this);
24240 int opc = this->ideal_Opcode();
24241 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24242 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24243 %}
24244 ins_pipe( pipe_slow );
24245 %}
24246
24247 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24248 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24249 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24250 ins_encode %{
24251 int vlen_enc = vector_length_encoding(this);
24252 BasicType bt = Matcher::vector_element_basic_type(this);
24253 int opc = this->ideal_Opcode();
24254 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24255 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24256 %}
24257 ins_pipe( pipe_slow );
24258 %}
24259
24260 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24261 match(Set dst (SubVB (Binary dst src2) mask));
24262 match(Set dst (SubVS (Binary dst src2) mask));
24263 match(Set dst (SubVI (Binary dst src2) mask));
24264 match(Set dst (SubVL (Binary dst src2) mask));
24265 match(Set dst (SubVF (Binary dst src2) mask));
24266 match(Set dst (SubVD (Binary dst src2) mask));
24267 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24268 ins_encode %{
24269 int vlen_enc = vector_length_encoding(this);
24270 BasicType bt = Matcher::vector_element_basic_type(this);
24271 int opc = this->ideal_Opcode();
24272 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24273 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24274 %}
24275 ins_pipe( pipe_slow );
24276 %}
24277
24278 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24279 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24280 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24281 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24282 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24283 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24284 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24285 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24286 ins_encode %{
24287 int vlen_enc = vector_length_encoding(this);
24288 BasicType bt = Matcher::vector_element_basic_type(this);
24289 int opc = this->ideal_Opcode();
24290 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24291 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24292 %}
24293 ins_pipe( pipe_slow );
24294 %}
24295
24296 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24297 match(Set dst (MulVS (Binary dst src2) mask));
24298 match(Set dst (MulVI (Binary dst src2) mask));
24299 match(Set dst (MulVL (Binary dst src2) mask));
24300 match(Set dst (MulVF (Binary dst src2) mask));
24301 match(Set dst (MulVD (Binary dst src2) mask));
24302 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24303 ins_encode %{
24304 int vlen_enc = vector_length_encoding(this);
24305 BasicType bt = Matcher::vector_element_basic_type(this);
24306 int opc = this->ideal_Opcode();
24307 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24308 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24309 %}
24310 ins_pipe( pipe_slow );
24311 %}
24312
24313 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24314 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24315 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24316 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24317 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24318 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24319 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24320 ins_encode %{
24321 int vlen_enc = vector_length_encoding(this);
24322 BasicType bt = Matcher::vector_element_basic_type(this);
24323 int opc = this->ideal_Opcode();
24324 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24325 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24326 %}
24327 ins_pipe( pipe_slow );
24328 %}
24329
24330 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24331 match(Set dst (SqrtVF dst mask));
24332 match(Set dst (SqrtVD dst mask));
24333 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24334 ins_encode %{
24335 int vlen_enc = vector_length_encoding(this);
24336 BasicType bt = Matcher::vector_element_basic_type(this);
24337 int opc = this->ideal_Opcode();
24338 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24339 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24340 %}
24341 ins_pipe( pipe_slow );
24342 %}
24343
24344 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24345 match(Set dst (DivVF (Binary dst src2) mask));
24346 match(Set dst (DivVD (Binary dst src2) mask));
24347 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24348 ins_encode %{
24349 int vlen_enc = vector_length_encoding(this);
24350 BasicType bt = Matcher::vector_element_basic_type(this);
24351 int opc = this->ideal_Opcode();
24352 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24353 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24354 %}
24355 ins_pipe( pipe_slow );
24356 %}
24357
24358 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24359 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24360 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24361 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24362 ins_encode %{
24363 int vlen_enc = vector_length_encoding(this);
24364 BasicType bt = Matcher::vector_element_basic_type(this);
24365 int opc = this->ideal_Opcode();
24366 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24367 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24368 %}
24369 ins_pipe( pipe_slow );
24370 %}
24371
24372
24373 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24374 match(Set dst (RotateLeftV (Binary dst shift) mask));
24375 match(Set dst (RotateRightV (Binary dst shift) mask));
24376 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24377 ins_encode %{
24378 int vlen_enc = vector_length_encoding(this);
24379 BasicType bt = Matcher::vector_element_basic_type(this);
24380 int opc = this->ideal_Opcode();
24381 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24382 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24383 %}
24384 ins_pipe( pipe_slow );
24385 %}
24386
24387 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24388 match(Set dst (RotateLeftV (Binary dst src2) mask));
24389 match(Set dst (RotateRightV (Binary dst src2) mask));
24390 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24391 ins_encode %{
24392 int vlen_enc = vector_length_encoding(this);
24393 BasicType bt = Matcher::vector_element_basic_type(this);
24394 int opc = this->ideal_Opcode();
24395 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24396 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24397 %}
24398 ins_pipe( pipe_slow );
24399 %}
24400
24401 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24402 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24403 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24404 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24405 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24406 ins_encode %{
24407 int vlen_enc = vector_length_encoding(this);
24408 BasicType bt = Matcher::vector_element_basic_type(this);
24409 int opc = this->ideal_Opcode();
24410 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24411 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24412 %}
24413 ins_pipe( pipe_slow );
24414 %}
24415
24416 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24417 predicate(!n->as_ShiftV()->is_var_shift());
24418 match(Set dst (LShiftVS (Binary dst src2) mask));
24419 match(Set dst (LShiftVI (Binary dst src2) mask));
24420 match(Set dst (LShiftVL (Binary dst src2) mask));
24421 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24422 ins_encode %{
24423 int vlen_enc = vector_length_encoding(this);
24424 BasicType bt = Matcher::vector_element_basic_type(this);
24425 int opc = this->ideal_Opcode();
24426 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24427 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24428 %}
24429 ins_pipe( pipe_slow );
24430 %}
24431
24432 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24433 predicate(n->as_ShiftV()->is_var_shift());
24434 match(Set dst (LShiftVS (Binary dst src2) mask));
24435 match(Set dst (LShiftVI (Binary dst src2) mask));
24436 match(Set dst (LShiftVL (Binary dst src2) mask));
24437 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24438 ins_encode %{
24439 int vlen_enc = vector_length_encoding(this);
24440 BasicType bt = Matcher::vector_element_basic_type(this);
24441 int opc = this->ideal_Opcode();
24442 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24443 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24444 %}
24445 ins_pipe( pipe_slow );
24446 %}
24447
24448 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24449 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24450 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24451 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24452 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24453 ins_encode %{
24454 int vlen_enc = vector_length_encoding(this);
24455 BasicType bt = Matcher::vector_element_basic_type(this);
24456 int opc = this->ideal_Opcode();
24457 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24458 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24459 %}
24460 ins_pipe( pipe_slow );
24461 %}
24462
24463 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24464 predicate(!n->as_ShiftV()->is_var_shift());
24465 match(Set dst (RShiftVS (Binary dst src2) mask));
24466 match(Set dst (RShiftVI (Binary dst src2) mask));
24467 match(Set dst (RShiftVL (Binary dst src2) mask));
24468 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24469 ins_encode %{
24470 int vlen_enc = vector_length_encoding(this);
24471 BasicType bt = Matcher::vector_element_basic_type(this);
24472 int opc = this->ideal_Opcode();
24473 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24474 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24475 %}
24476 ins_pipe( pipe_slow );
24477 %}
24478
24479 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24480 predicate(n->as_ShiftV()->is_var_shift());
24481 match(Set dst (RShiftVS (Binary dst src2) mask));
24482 match(Set dst (RShiftVI (Binary dst src2) mask));
24483 match(Set dst (RShiftVL (Binary dst src2) mask));
24484 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24485 ins_encode %{
24486 int vlen_enc = vector_length_encoding(this);
24487 BasicType bt = Matcher::vector_element_basic_type(this);
24488 int opc = this->ideal_Opcode();
24489 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24490 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24491 %}
24492 ins_pipe( pipe_slow );
24493 %}
24494
24495 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24496 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24497 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24498 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24499 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24500 ins_encode %{
24501 int vlen_enc = vector_length_encoding(this);
24502 BasicType bt = Matcher::vector_element_basic_type(this);
24503 int opc = this->ideal_Opcode();
24504 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24505 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24506 %}
24507 ins_pipe( pipe_slow );
24508 %}
24509
24510 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24511 predicate(!n->as_ShiftV()->is_var_shift());
24512 match(Set dst (URShiftVS (Binary dst src2) mask));
24513 match(Set dst (URShiftVI (Binary dst src2) mask));
24514 match(Set dst (URShiftVL (Binary dst src2) mask));
24515 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24516 ins_encode %{
24517 int vlen_enc = vector_length_encoding(this);
24518 BasicType bt = Matcher::vector_element_basic_type(this);
24519 int opc = this->ideal_Opcode();
24520 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24521 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24522 %}
24523 ins_pipe( pipe_slow );
24524 %}
24525
24526 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24527 predicate(n->as_ShiftV()->is_var_shift());
24528 match(Set dst (URShiftVS (Binary dst src2) mask));
24529 match(Set dst (URShiftVI (Binary dst src2) mask));
24530 match(Set dst (URShiftVL (Binary dst src2) mask));
24531 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24532 ins_encode %{
24533 int vlen_enc = vector_length_encoding(this);
24534 BasicType bt = Matcher::vector_element_basic_type(this);
24535 int opc = this->ideal_Opcode();
24536 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24538 %}
24539 ins_pipe( pipe_slow );
24540 %}
24541
24542 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24543 match(Set dst (MaxV (Binary dst src2) mask));
24544 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24545 ins_encode %{
24546 int vlen_enc = vector_length_encoding(this);
24547 BasicType bt = Matcher::vector_element_basic_type(this);
24548 int opc = this->ideal_Opcode();
24549 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24550 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24551 %}
24552 ins_pipe( pipe_slow );
24553 %}
24554
24555 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24556 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24557 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24558 ins_encode %{
24559 int vlen_enc = vector_length_encoding(this);
24560 BasicType bt = Matcher::vector_element_basic_type(this);
24561 int opc = this->ideal_Opcode();
24562 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24564 %}
24565 ins_pipe( pipe_slow );
24566 %}
24567
24568 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24569 match(Set dst (MinV (Binary dst src2) mask));
24570 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24571 ins_encode %{
24572 int vlen_enc = vector_length_encoding(this);
24573 BasicType bt = Matcher::vector_element_basic_type(this);
24574 int opc = this->ideal_Opcode();
24575 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24577 %}
24578 ins_pipe( pipe_slow );
24579 %}
24580
24581 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24582 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24583 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24584 ins_encode %{
24585 int vlen_enc = vector_length_encoding(this);
24586 BasicType bt = Matcher::vector_element_basic_type(this);
24587 int opc = this->ideal_Opcode();
24588 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24590 %}
24591 ins_pipe( pipe_slow );
24592 %}
24593
24594 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24595 match(Set dst (VectorRearrange (Binary dst src2) mask));
24596 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24597 ins_encode %{
24598 int vlen_enc = vector_length_encoding(this);
24599 BasicType bt = Matcher::vector_element_basic_type(this);
24600 int opc = this->ideal_Opcode();
24601 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24603 %}
24604 ins_pipe( pipe_slow );
24605 %}
24606
24607 instruct vabs_masked(vec dst, kReg mask) %{
24608 match(Set dst (AbsVB dst mask));
24609 match(Set dst (AbsVS dst mask));
24610 match(Set dst (AbsVI dst mask));
24611 match(Set dst (AbsVL dst mask));
24612 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24613 ins_encode %{
24614 int vlen_enc = vector_length_encoding(this);
24615 BasicType bt = Matcher::vector_element_basic_type(this);
24616 int opc = this->ideal_Opcode();
24617 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24618 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24619 %}
24620 ins_pipe( pipe_slow );
24621 %}
24622
24623 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24624 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24625 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24626 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24627 ins_encode %{
24628 assert(UseFMA, "Needs FMA instructions support.");
24629 int vlen_enc = vector_length_encoding(this);
24630 BasicType bt = Matcher::vector_element_basic_type(this);
24631 int opc = this->ideal_Opcode();
24632 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24633 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24634 %}
24635 ins_pipe( pipe_slow );
24636 %}
24637
24638 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24639 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24640 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24641 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24642 ins_encode %{
24643 assert(UseFMA, "Needs FMA instructions support.");
24644 int vlen_enc = vector_length_encoding(this);
24645 BasicType bt = Matcher::vector_element_basic_type(this);
24646 int opc = this->ideal_Opcode();
24647 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24648 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24649 %}
24650 ins_pipe( pipe_slow );
24651 %}
24652
24653 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24654 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24655 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24656 ins_encode %{
24657 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24658 int vlen_enc = vector_length_encoding(this, $src1);
24659 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24660
24661 // Comparison i
24662 switch (src1_elem_bt) {
24663 case T_BYTE: {
24664 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24665 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24666 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24667 break;
24668 }
24669 case T_SHORT: {
24670 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24671 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24672 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24673 break;
24674 }
24675 case T_INT: {
24676 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24677 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24678 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24679 break;
24680 }
24681 case T_LONG: {
24682 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24683 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24684 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24685 break;
24686 }
24687 case T_FLOAT: {
24688 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24689 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24690 break;
24691 }
24692 case T_DOUBLE: {
24693 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24694 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24695 break;
24696 }
24697 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24698 }
24699 %}
24700 ins_pipe( pipe_slow );
24701 %}
24702
24703 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24704 predicate(Matcher::vector_length(n) <= 32);
24705 match(Set dst (MaskAll src));
24706 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24707 ins_encode %{
24708 int mask_len = Matcher::vector_length(this);
24709 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24710 %}
24711 ins_pipe( pipe_slow );
24712 %}
24713
24714 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24715 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24716 match(Set dst (XorVMask src (MaskAll cnt)));
24717 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24718 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24719 ins_encode %{
24720 uint masklen = Matcher::vector_length(this);
24721 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24722 %}
24723 ins_pipe( pipe_slow );
24724 %}
24725
24726 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24727 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24728 (Matcher::vector_length(n) == 16) ||
24729 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24730 match(Set dst (XorVMask src (MaskAll cnt)));
24731 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24732 ins_encode %{
24733 uint masklen = Matcher::vector_length(this);
24734 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24735 %}
24736 ins_pipe( pipe_slow );
24737 %}
24738
24739 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24740 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24741 match(Set dst (VectorLongToMask src));
24742 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24743 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24744 ins_encode %{
24745 int mask_len = Matcher::vector_length(this);
24746 int vec_enc = vector_length_encoding(mask_len);
24747 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24748 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24749 %}
24750 ins_pipe( pipe_slow );
24751 %}
24752
24753
24754 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24755 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24756 match(Set dst (VectorLongToMask src));
24757 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24758 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24759 ins_encode %{
24760 int mask_len = Matcher::vector_length(this);
24761 assert(mask_len <= 32, "invalid mask length");
24762 int vec_enc = vector_length_encoding(mask_len);
24763 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24764 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24765 %}
24766 ins_pipe( pipe_slow );
24767 %}
24768
24769 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24770 predicate(n->bottom_type()->isa_vectmask());
24771 match(Set dst (VectorLongToMask src));
24772 format %{ "long_to_mask_evex $dst, $src\t!" %}
24773 ins_encode %{
24774 __ kmov($dst$$KRegister, $src$$Register);
24775 %}
24776 ins_pipe( pipe_slow );
24777 %}
24778
24779 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24780 match(Set dst (AndVMask src1 src2));
24781 match(Set dst (OrVMask src1 src2));
24782 match(Set dst (XorVMask src1 src2));
24783 effect(TEMP kscratch);
24784 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24785 ins_encode %{
24786 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24787 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24788 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24789 uint masklen = Matcher::vector_length(this);
24790 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24791 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24792 %}
24793 ins_pipe( pipe_slow );
24794 %}
24795
24796 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24797 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24798 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24799 ins_encode %{
24800 int vlen_enc = vector_length_encoding(this);
24801 BasicType bt = Matcher::vector_element_basic_type(this);
24802 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24803 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24804 %}
24805 ins_pipe( pipe_slow );
24806 %}
24807
24808 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24809 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24810 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24811 ins_encode %{
24812 int vlen_enc = vector_length_encoding(this);
24813 BasicType bt = Matcher::vector_element_basic_type(this);
24814 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24815 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24816 %}
24817 ins_pipe( pipe_slow );
24818 %}
24819
24820 instruct castMM(kReg dst)
24821 %{
24822 match(Set dst (CastVV dst));
24823
24824 size(0);
24825 format %{ "# castVV of $dst" %}
24826 ins_encode(/* empty encoding */);
24827 ins_cost(0);
24828 ins_pipe(empty);
24829 %}
24830
24831 instruct castVV(vec dst)
24832 %{
24833 match(Set dst (CastVV dst));
24834
24835 size(0);
24836 format %{ "# castVV of $dst" %}
24837 ins_encode(/* empty encoding */);
24838 ins_cost(0);
24839 ins_pipe(empty);
24840 %}
24841
24842 instruct castVVLeg(legVec dst)
24843 %{
24844 match(Set dst (CastVV dst));
24845
24846 size(0);
24847 format %{ "# castVV of $dst" %}
24848 ins_encode(/* empty encoding */);
24849 ins_cost(0);
24850 ins_pipe(empty);
24851 %}
24852
24853 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24854 %{
24855 match(Set dst (IsInfiniteF src));
24856 effect(TEMP ktmp, KILL cr);
24857 format %{ "float_class_check $dst, $src" %}
24858 ins_encode %{
24859 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24860 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24861 %}
24862 ins_pipe(pipe_slow);
24863 %}
24864
24865 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24866 %{
24867 match(Set dst (IsInfiniteD src));
24868 effect(TEMP ktmp, KILL cr);
24869 format %{ "double_class_check $dst, $src" %}
24870 ins_encode %{
24871 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24872 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24873 %}
24874 ins_pipe(pipe_slow);
24875 %}
24876
24877 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24878 %{
24879 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24880 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24881 match(Set dst (SaturatingAddV src1 src2));
24882 match(Set dst (SaturatingSubV src1 src2));
24883 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24884 ins_encode %{
24885 int vlen_enc = vector_length_encoding(this);
24886 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24887 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24888 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24889 %}
24890 ins_pipe(pipe_slow);
24891 %}
24892
24893 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24894 %{
24895 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24896 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24897 match(Set dst (SaturatingAddV src1 src2));
24898 match(Set dst (SaturatingSubV src1 src2));
24899 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24900 ins_encode %{
24901 int vlen_enc = vector_length_encoding(this);
24902 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24903 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24904 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24905 %}
24906 ins_pipe(pipe_slow);
24907 %}
24908
24909 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24910 %{
24911 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24912 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24913 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24914 match(Set dst (SaturatingAddV src1 src2));
24915 match(Set dst (SaturatingSubV src1 src2));
24916 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24917 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24918 ins_encode %{
24919 int vlen_enc = vector_length_encoding(this);
24920 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24921 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24922 $src1$$XMMRegister, $src2$$XMMRegister,
24923 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24924 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24925 %}
24926 ins_pipe(pipe_slow);
24927 %}
24928
24929 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24930 %{
24931 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24932 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24933 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24934 match(Set dst (SaturatingAddV src1 src2));
24935 match(Set dst (SaturatingSubV src1 src2));
24936 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24937 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24938 ins_encode %{
24939 int vlen_enc = vector_length_encoding(this);
24940 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24941 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24942 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24943 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24944 %}
24945 ins_pipe(pipe_slow);
24946 %}
24947
24948 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24949 %{
24950 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24951 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24952 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24953 match(Set dst (SaturatingAddV src1 src2));
24954 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24955 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24956 ins_encode %{
24957 int vlen_enc = vector_length_encoding(this);
24958 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24959 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24960 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24961 %}
24962 ins_pipe(pipe_slow);
24963 %}
24964
24965 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24966 %{
24967 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24968 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24969 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24970 match(Set dst (SaturatingAddV src1 src2));
24971 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24972 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24973 ins_encode %{
24974 int vlen_enc = vector_length_encoding(this);
24975 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24976 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24977 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24978 %}
24979 ins_pipe(pipe_slow);
24980 %}
24981
24982 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24983 %{
24984 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24985 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24986 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24987 match(Set dst (SaturatingSubV src1 src2));
24988 effect(TEMP ktmp);
24989 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24990 ins_encode %{
24991 int vlen_enc = vector_length_encoding(this);
24992 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24993 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24994 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24995 %}
24996 ins_pipe(pipe_slow);
24997 %}
24998
24999 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25000 %{
25001 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25002 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25003 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25004 match(Set dst (SaturatingSubV src1 src2));
25005 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25006 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25007 ins_encode %{
25008 int vlen_enc = vector_length_encoding(this);
25009 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25010 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25011 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25012 %}
25013 ins_pipe(pipe_slow);
25014 %}
25015
25016 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25017 %{
25018 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25019 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25020 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25021 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25022 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25023 ins_encode %{
25024 int vlen_enc = vector_length_encoding(this);
25025 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25026 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25027 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25028 %}
25029 ins_pipe(pipe_slow);
25030 %}
25031
25032 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25033 %{
25034 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25035 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25036 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25037 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25038 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25039 ins_encode %{
25040 int vlen_enc = vector_length_encoding(this);
25041 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25042 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25043 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25044 %}
25045 ins_pipe(pipe_slow);
25046 %}
25047
25048 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25049 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25050 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25051 match(Set dst (SaturatingAddV (Binary dst src) mask));
25052 match(Set dst (SaturatingSubV (Binary dst src) mask));
25053 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25054 ins_encode %{
25055 int vlen_enc = vector_length_encoding(this);
25056 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25057 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25058 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25059 %}
25060 ins_pipe( pipe_slow );
25061 %}
25062
25063 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25064 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25065 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25066 match(Set dst (SaturatingAddV (Binary dst src) mask));
25067 match(Set dst (SaturatingSubV (Binary dst src) mask));
25068 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25069 ins_encode %{
25070 int vlen_enc = vector_length_encoding(this);
25071 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25072 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25073 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25074 %}
25075 ins_pipe( pipe_slow );
25076 %}
25077
25078 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25079 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25080 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25081 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25082 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25083 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25084 ins_encode %{
25085 int vlen_enc = vector_length_encoding(this);
25086 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25087 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25088 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25089 %}
25090 ins_pipe( pipe_slow );
25091 %}
25092
25093 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25094 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25095 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25096 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25097 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25098 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25099 ins_encode %{
25100 int vlen_enc = vector_length_encoding(this);
25101 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25102 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25103 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25104 %}
25105 ins_pipe( pipe_slow );
25106 %}
25107
25108 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25109 %{
25110 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25111 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25112 ins_encode %{
25113 int vlen_enc = vector_length_encoding(this);
25114 BasicType bt = Matcher::vector_element_basic_type(this);
25115 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25116 %}
25117 ins_pipe(pipe_slow);
25118 %}
25119
25120 instruct reinterpretS2HF(regF dst, rRegI src)
25121 %{
25122 match(Set dst (ReinterpretS2HF src));
25123 format %{ "vmovw $dst, $src" %}
25124 ins_encode %{
25125 __ vmovw($dst$$XMMRegister, $src$$Register);
25126 %}
25127 ins_pipe(pipe_slow);
25128 %}
25129
25130 instruct reinterpretHF2S(rRegI dst, regF src)
25131 %{
25132 match(Set dst (ReinterpretHF2S src));
25133 format %{ "vmovw $dst, $src" %}
25134 ins_encode %{
25135 __ vmovw($dst$$Register, $src$$XMMRegister);
25136 %}
25137 ins_pipe(pipe_slow);
25138 %}
25139
25140 instruct convF2HFAndS2HF(regF dst, regF src)
25141 %{
25142 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25143 format %{ "convF2HFAndS2HF $dst, $src" %}
25144 ins_encode %{
25145 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25146 %}
25147 ins_pipe(pipe_slow);
25148 %}
25149
25150 instruct convHF2SAndHF2F(regF dst, regF src)
25151 %{
25152 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25153 format %{ "convHF2SAndHF2F $dst, $src" %}
25154 ins_encode %{
25155 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25156 %}
25157 ins_pipe(pipe_slow);
25158 %}
25159
25160 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25161 %{
25162 match(Set dst (SqrtHF src));
25163 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25164 ins_encode %{
25165 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25166 %}
25167 ins_pipe(pipe_slow);
25168 %}
25169
25170 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25171 %{
25172 match(Set dst (AddHF src1 src2));
25173 match(Set dst (DivHF src1 src2));
25174 match(Set dst (MulHF src1 src2));
25175 match(Set dst (SubHF src1 src2));
25176 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25177 ins_encode %{
25178 int opcode = this->ideal_Opcode();
25179 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25180 %}
25181 ins_pipe(pipe_slow);
25182 %}
25183
25184 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25185 %{
25186 predicate(VM_Version::supports_avx10_2());
25187 match(Set dst (MaxHF src1 src2));
25188 match(Set dst (MinHF src1 src2));
25189 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25190 ins_encode %{
25191 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25192 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25193 %}
25194 ins_pipe( pipe_slow );
25195 %}
25196
25197 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25198 %{
25199 predicate(!VM_Version::supports_avx10_2());
25200 match(Set dst (MaxHF src1 src2));
25201 match(Set dst (MinHF src1 src2));
25202 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25203 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25204 ins_encode %{
25205 int opcode = this->ideal_Opcode();
25206 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25207 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25208 %}
25209 ins_pipe( pipe_slow );
25210 %}
25211
25212 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25213 %{
25214 match(Set dst (FmaHF src2 (Binary dst src1)));
25215 effect(DEF dst);
25216 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25217 ins_encode %{
25218 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25219 %}
25220 ins_pipe( pipe_slow );
25221 %}
25222
25223
25224 instruct vector_sqrt_HF_reg(vec dst, vec src)
25225 %{
25226 match(Set dst (SqrtVHF src));
25227 format %{ "vector_sqrt_fp16 $dst, $src" %}
25228 ins_encode %{
25229 int vlen_enc = vector_length_encoding(this);
25230 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25231 %}
25232 ins_pipe(pipe_slow);
25233 %}
25234
25235 instruct vector_sqrt_HF_mem(vec dst, memory src)
25236 %{
25237 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25238 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25239 ins_encode %{
25240 int vlen_enc = vector_length_encoding(this);
25241 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25242 %}
25243 ins_pipe(pipe_slow);
25244 %}
25245
25246 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25247 %{
25248 match(Set dst (AddVHF src1 src2));
25249 match(Set dst (DivVHF src1 src2));
25250 match(Set dst (MulVHF src1 src2));
25251 match(Set dst (SubVHF src1 src2));
25252 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25253 ins_encode %{
25254 int vlen_enc = vector_length_encoding(this);
25255 int opcode = this->ideal_Opcode();
25256 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25257 %}
25258 ins_pipe(pipe_slow);
25259 %}
25260
25261
25262 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25263 %{
25264 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25265 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25266 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25267 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25268 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25269 ins_encode %{
25270 int vlen_enc = vector_length_encoding(this);
25271 int opcode = this->ideal_Opcode();
25272 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25273 %}
25274 ins_pipe(pipe_slow);
25275 %}
25276
25277 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25278 %{
25279 match(Set dst (FmaVHF src2 (Binary dst src1)));
25280 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25281 ins_encode %{
25282 int vlen_enc = vector_length_encoding(this);
25283 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25284 %}
25285 ins_pipe( pipe_slow );
25286 %}
25287
25288 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25289 %{
25290 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25291 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25292 ins_encode %{
25293 int vlen_enc = vector_length_encoding(this);
25294 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25295 %}
25296 ins_pipe( pipe_slow );
25297 %}
25298
25299 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25300 %{
25301 predicate(VM_Version::supports_avx10_2());
25302 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25303 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25304 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25305 ins_encode %{
25306 int vlen_enc = vector_length_encoding(this);
25307 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25308 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25309 %}
25310 ins_pipe( pipe_slow );
25311 %}
25312
25313 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25314 %{
25315 predicate(VM_Version::supports_avx10_2());
25316 match(Set dst (MinVHF src1 src2));
25317 match(Set dst (MaxVHF src1 src2));
25318 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25319 ins_encode %{
25320 int vlen_enc = vector_length_encoding(this);
25321 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25322 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25323 %}
25324 ins_pipe( pipe_slow );
25325 %}
25326
25327 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25328 %{
25329 predicate(!VM_Version::supports_avx10_2());
25330 match(Set dst (MinVHF src1 src2));
25331 match(Set dst (MaxVHF src1 src2));
25332 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25333 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25334 ins_encode %{
25335 int vlen_enc = vector_length_encoding(this);
25336 int opcode = this->ideal_Opcode();
25337 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25338 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25339 %}
25340 ins_pipe( pipe_slow );
25341 %}
25342
25343 //----------PEEPHOLE RULES-----------------------------------------------------
25344 // These must follow all instruction definitions as they use the names
25345 // defined in the instructions definitions.
25346 //
25347 // peeppredicate ( rule_predicate );
25348 // // the predicate unless which the peephole rule will be ignored
25349 //
25350 // peepmatch ( root_instr_name [preceding_instruction]* );
25351 //
25352 // peepprocedure ( procedure_name );
25353 // // provide a procedure name to perform the optimization, the procedure should
25354 // // reside in the architecture dependent peephole file, the method has the
25355 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25356 // // with the arguments being the basic block, the current node index inside the
25357 // // block, the register allocator, the functions upon invoked return a new node
25358 // // defined in peepreplace, and the rules of the nodes appearing in the
25359 // // corresponding peepmatch, the function return true if successful, else
25360 // // return false
25361 //
25362 // peepconstraint %{
25363 // (instruction_number.operand_name relational_op instruction_number.operand_name
25364 // [, ...] );
25365 // // instruction numbers are zero-based using left to right order in peepmatch
25366 //
25367 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25368 // // provide an instruction_number.operand_name for each operand that appears
25369 // // in the replacement instruction's match rule
25370 //
25371 // ---------VM FLAGS---------------------------------------------------------
25372 //
25373 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25374 //
25375 // Each peephole rule is given an identifying number starting with zero and
25376 // increasing by one in the order seen by the parser. An individual peephole
25377 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25378 // on the command-line.
25379 //
25380 // ---------CURRENT LIMITATIONS----------------------------------------------
25381 //
25382 // Only transformations inside a basic block (do we need more for peephole)
25383 //
25384 // ---------EXAMPLE----------------------------------------------------------
25385 //
25386 // // pertinent parts of existing instructions in architecture description
25387 // instruct movI(rRegI dst, rRegI src)
25388 // %{
25389 // match(Set dst (CopyI src));
25390 // %}
25391 //
25392 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25393 // %{
25394 // match(Set dst (AddI dst src));
25395 // effect(KILL cr);
25396 // %}
25397 //
25398 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25399 // %{
25400 // match(Set dst (AddI dst src));
25401 // %}
25402 //
25403 // 1. Simple replacement
25404 // - Only match adjacent instructions in same basic block
25405 // - Only equality constraints
25406 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25407 // - Only one replacement instruction
25408 //
25409 // // Change (inc mov) to lea
25410 // peephole %{
25411 // // lea should only be emitted when beneficial
25412 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25413 // // increment preceded by register-register move
25414 // peepmatch ( incI_rReg movI );
25415 // // require that the destination register of the increment
25416 // // match the destination register of the move
25417 // peepconstraint ( 0.dst == 1.dst );
25418 // // construct a replacement instruction that sets
25419 // // the destination to ( move's source register + one )
25420 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25421 // %}
25422 //
25423 // 2. Procedural replacement
25424 // - More flexible finding relevent nodes
25425 // - More flexible constraints
25426 // - More flexible transformations
25427 // - May utilise architecture-dependent API more effectively
25428 // - Currently only one replacement instruction due to adlc parsing capabilities
25429 //
25430 // // Change (inc mov) to lea
25431 // peephole %{
25432 // // lea should only be emitted when beneficial
25433 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25434 // // the rule numbers of these nodes inside are passed into the function below
25435 // peepmatch ( incI_rReg movI );
25436 // // the method that takes the responsibility of transformation
25437 // peepprocedure ( inc_mov_to_lea );
25438 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25439 // // node is passed into the function above
25440 // peepreplace ( leaI_rReg_immI() );
25441 // %}
25442
25443 // These instructions is not matched by the matcher but used by the peephole
25444 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25445 %{
25446 predicate(false);
25447 match(Set dst (AddI src1 src2));
25448 format %{ "leal $dst, [$src1 + $src2]" %}
25449 ins_encode %{
25450 Register dst = $dst$$Register;
25451 Register src1 = $src1$$Register;
25452 Register src2 = $src2$$Register;
25453 if (src1 != rbp && src1 != r13) {
25454 __ leal(dst, Address(src1, src2, Address::times_1));
25455 } else {
25456 assert(src2 != rbp && src2 != r13, "");
25457 __ leal(dst, Address(src2, src1, Address::times_1));
25458 }
25459 %}
25460 ins_pipe(ialu_reg_reg);
25461 %}
25462
25463 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25464 %{
25465 predicate(false);
25466 match(Set dst (AddI src1 src2));
25467 format %{ "leal $dst, [$src1 + $src2]" %}
25468 ins_encode %{
25469 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25470 %}
25471 ins_pipe(ialu_reg_reg);
25472 %}
25473
25474 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25475 %{
25476 predicate(false);
25477 match(Set dst (LShiftI src shift));
25478 format %{ "leal $dst, [$src << $shift]" %}
25479 ins_encode %{
25480 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25481 Register src = $src$$Register;
25482 if (scale == Address::times_2 && src != rbp && src != r13) {
25483 __ leal($dst$$Register, Address(src, src, Address::times_1));
25484 } else {
25485 __ leal($dst$$Register, Address(noreg, src, scale));
25486 }
25487 %}
25488 ins_pipe(ialu_reg_reg);
25489 %}
25490
25491 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25492 %{
25493 predicate(false);
25494 match(Set dst (AddL src1 src2));
25495 format %{ "leaq $dst, [$src1 + $src2]" %}
25496 ins_encode %{
25497 Register dst = $dst$$Register;
25498 Register src1 = $src1$$Register;
25499 Register src2 = $src2$$Register;
25500 if (src1 != rbp && src1 != r13) {
25501 __ leaq(dst, Address(src1, src2, Address::times_1));
25502 } else {
25503 assert(src2 != rbp && src2 != r13, "");
25504 __ leaq(dst, Address(src2, src1, Address::times_1));
25505 }
25506 %}
25507 ins_pipe(ialu_reg_reg);
25508 %}
25509
25510 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25511 %{
25512 predicate(false);
25513 match(Set dst (AddL src1 src2));
25514 format %{ "leaq $dst, [$src1 + $src2]" %}
25515 ins_encode %{
25516 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25517 %}
25518 ins_pipe(ialu_reg_reg);
25519 %}
25520
25521 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25522 %{
25523 predicate(false);
25524 match(Set dst (LShiftL src shift));
25525 format %{ "leaq $dst, [$src << $shift]" %}
25526 ins_encode %{
25527 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25528 Register src = $src$$Register;
25529 if (scale == Address::times_2 && src != rbp && src != r13) {
25530 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25531 } else {
25532 __ leaq($dst$$Register, Address(noreg, src, scale));
25533 }
25534 %}
25535 ins_pipe(ialu_reg_reg);
25536 %}
25537
25538 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25539 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25540 // processors with at least partial ALU support for lea
25541 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25542 // beneficial for processors with full ALU support
25543 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25544
25545 peephole
25546 %{
25547 peeppredicate(VM_Version::supports_fast_2op_lea());
25548 peepmatch (addI_rReg);
25549 peepprocedure (lea_coalesce_reg);
25550 peepreplace (leaI_rReg_rReg_peep());
25551 %}
25552
25553 peephole
25554 %{
25555 peeppredicate(VM_Version::supports_fast_2op_lea());
25556 peepmatch (addI_rReg_imm);
25557 peepprocedure (lea_coalesce_imm);
25558 peepreplace (leaI_rReg_immI_peep());
25559 %}
25560
25561 peephole
25562 %{
25563 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25564 VM_Version::is_intel_cascade_lake());
25565 peepmatch (incI_rReg);
25566 peepprocedure (lea_coalesce_imm);
25567 peepreplace (leaI_rReg_immI_peep());
25568 %}
25569
25570 peephole
25571 %{
25572 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25573 VM_Version::is_intel_cascade_lake());
25574 peepmatch (decI_rReg);
25575 peepprocedure (lea_coalesce_imm);
25576 peepreplace (leaI_rReg_immI_peep());
25577 %}
25578
25579 peephole
25580 %{
25581 peeppredicate(VM_Version::supports_fast_2op_lea());
25582 peepmatch (salI_rReg_immI2);
25583 peepprocedure (lea_coalesce_imm);
25584 peepreplace (leaI_rReg_immI2_peep());
25585 %}
25586
25587 peephole
25588 %{
25589 peeppredicate(VM_Version::supports_fast_2op_lea());
25590 peepmatch (addL_rReg);
25591 peepprocedure (lea_coalesce_reg);
25592 peepreplace (leaL_rReg_rReg_peep());
25593 %}
25594
25595 peephole
25596 %{
25597 peeppredicate(VM_Version::supports_fast_2op_lea());
25598 peepmatch (addL_rReg_imm);
25599 peepprocedure (lea_coalesce_imm);
25600 peepreplace (leaL_rReg_immL32_peep());
25601 %}
25602
25603 peephole
25604 %{
25605 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25606 VM_Version::is_intel_cascade_lake());
25607 peepmatch (incL_rReg);
25608 peepprocedure (lea_coalesce_imm);
25609 peepreplace (leaL_rReg_immL32_peep());
25610 %}
25611
25612 peephole
25613 %{
25614 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25615 VM_Version::is_intel_cascade_lake());
25616 peepmatch (decL_rReg);
25617 peepprocedure (lea_coalesce_imm);
25618 peepreplace (leaL_rReg_immL32_peep());
25619 %}
25620
25621 peephole
25622 %{
25623 peeppredicate(VM_Version::supports_fast_2op_lea());
25624 peepmatch (salL_rReg_immI2);
25625 peepprocedure (lea_coalesce_imm);
25626 peepreplace (leaL_rReg_immI2_peep());
25627 %}
25628
25629 peephole
25630 %{
25631 peepmatch (leaPCompressedOopOffset);
25632 peepprocedure (lea_remove_redundant);
25633 %}
25634
25635 peephole
25636 %{
25637 peepmatch (leaP8Narrow);
25638 peepprocedure (lea_remove_redundant);
25639 %}
25640
25641 peephole
25642 %{
25643 peepmatch (leaP32Narrow);
25644 peepprocedure (lea_remove_redundant);
25645 %}
25646
25647 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25648 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25649
25650 //int variant
25651 peephole
25652 %{
25653 peepmatch (testI_reg);
25654 peepprocedure (test_may_remove);
25655 %}
25656
25657 //long variant
25658 peephole
25659 %{
25660 peepmatch (testL_reg);
25661 peepprocedure (test_may_remove);
25662 %}
25663
25664
25665 //----------SMARTSPILL RULES---------------------------------------------------
25666 // These must follow all instruction definitions as they use the names
25667 // defined in the instructions definitions.