1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je #
1746 // |-jz -> a | b # a & b
1747 // | -> a #
1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1749 XMMRegister a, XMMRegister b,
1750 XMMRegister xmmt, Register rt,
1751 bool min, enum FP_PREC pt) {
1752
1753 Label nan, zero, below, above, done;
1754
1755 emit_fp_ucom(masm, pt, a, b);
1756
1757 if (dst->encoding() != (min ? b : a)->encoding()) {
1758 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1759 } else {
1760 __ jccb(Assembler::above, done);
1761 }
1762
1763 __ jccb(Assembler::parity, nan); // PF=1
1764 __ jccb(Assembler::below, below); // CF=1
1765
1766 // equal
1767 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1768 emit_fp_ucom(masm, pt, a, xmmt);
1769
1770 __ jccb(Assembler::equal, zero);
1771 movfp(masm, pt, dst, a, rt);
1772
1773 __ jmp(done);
1774
1775 __ bind(zero);
1776 if (min) {
1777 __ vpor(dst, a, b, Assembler::AVX_128bit);
1778 } else {
1779 __ vpand(dst, a, b, Assembler::AVX_128bit);
1780 }
1781
1782 __ jmp(done);
1783
1784 __ bind(above);
1785 movfp(masm, pt, dst, min ? b : a, rt);
1786
1787 __ jmp(done);
1788
1789 __ bind(nan);
1790 if (pt == fp_prec_hlf) {
1791 __ movl(rt, 0x00007e00); // Float16.NaN
1792 __ evmovw(dst, rt);
1793 } else if (pt == fp_prec_flt) {
1794 __ movl(rt, 0x7fc00000); // Float.NaN
1795 __ movdl(dst, rt);
1796 } else {
1797 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1798 __ movdq(dst, rt);
1799 }
1800 __ jmp(done);
1801
1802 __ bind(below);
1803 movfp(masm, pt, dst, min ? a : b, rt);
1804
1805 __ bind(done);
1806 }
1807
1808 //=============================================================================
1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1810
1811 int ConstantTable::calculate_table_base_offset() const {
1812 return 0; // absolute addressing, no offset
1813 }
1814
1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1817 ShouldNotReachHere();
1818 }
1819
1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1821 // Empty encoding
1822 }
1823
1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1825 return 0;
1826 }
1827
1828 #ifndef PRODUCT
1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1830 st->print("# MachConstantBaseNode (empty encoding)");
1831 }
1832 #endif
1833
1834
1835 //=============================================================================
1836 #ifndef PRODUCT
1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1838 Compile* C = ra_->C;
1839
1840 int framesize = C->output()->frame_size_in_bytes();
1841 int bangsize = C->output()->bang_size_in_bytes();
1842 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1843 // Remove wordSize for return addr which is already pushed.
1844 framesize -= wordSize;
1845
1846 if (C->output()->need_stack_bang(bangsize)) {
1847 framesize -= wordSize;
1848 st->print("# stack bang (%d bytes)", bangsize);
1849 st->print("\n\t");
1850 st->print("pushq rbp\t# Save rbp");
1851 if (PreserveFramePointer) {
1852 st->print("\n\t");
1853 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1854 }
1855 if (framesize) {
1856 st->print("\n\t");
1857 st->print("subq rsp, #%d\t# Create frame",framesize);
1858 }
1859 } else {
1860 st->print("subq rsp, #%d\t# Create frame",framesize);
1861 st->print("\n\t");
1862 framesize -= wordSize;
1863 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1864 if (PreserveFramePointer) {
1865 st->print("\n\t");
1866 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1867 if (framesize > 0) {
1868 st->print("\n\t");
1869 st->print("addq rbp, #%d", framesize);
1870 }
1871 }
1872 }
1873
1874 if (VerifyStackAtCalls) {
1875 st->print("\n\t");
1876 framesize -= wordSize;
1877 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1878 #ifdef ASSERT
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1954 framesize -= 2*wordSize;
1955
1956 if (framesize) {
1957 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1958 st->print("\t");
1959 }
1960
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2038 static enum RC rc_class(OptoReg::Name reg)
2039 {
2040 if( !OptoReg::is_valid(reg) ) return rc_bad;
2041
2042 if (OptoReg::is_stack(reg)) return rc_stack;
2043
2044 VMReg r = OptoReg::as_VMReg(reg);
2045
2046 if (r->is_Register()) return rc_int;
2047
2048 if (r->is_KRegister()) return rc_kreg;
2049
2050 assert(r->is_XMMRegister(), "must be");
2051 return rc_float;
2052 }
2053
2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2056 int src_hi, int dst_hi, uint ireg, outputStream* st);
2057
2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2059 int stack_offset, int reg, uint ireg, outputStream* st);
2060
2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2062 int dst_offset, uint ireg, outputStream* st) {
2063 if (masm) {
2064 switch (ireg) {
2065 case Op_VecS:
2066 __ movq(Address(rsp, -8), rax);
2067 __ movl(rax, Address(rsp, src_offset));
2068 __ movl(Address(rsp, dst_offset), rax);
2069 __ movq(rax, Address(rsp, -8));
2070 break;
2071 case Op_VecD:
2072 __ pushq(Address(rsp, src_offset));
2073 __ popq (Address(rsp, dst_offset));
2074 break;
2075 case Op_VecX:
2076 __ pushq(Address(rsp, src_offset));
2077 __ popq (Address(rsp, dst_offset));
2078 __ pushq(Address(rsp, src_offset+8));
2079 __ popq (Address(rsp, dst_offset+8));
2080 break;
2081 case Op_VecY:
2082 __ vmovdqu(Address(rsp, -32), xmm0);
2083 __ vmovdqu(xmm0, Address(rsp, src_offset));
2084 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2085 __ vmovdqu(xmm0, Address(rsp, -32));
2086 break;
2087 case Op_VecZ:
2088 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2089 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2090 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2091 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2092 break;
2093 default:
2094 ShouldNotReachHere();
2095 }
2096 #ifndef PRODUCT
2097 } else {
2098 switch (ireg) {
2099 case Op_VecS:
2100 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2101 "movl rax, [rsp + #%d]\n\t"
2102 "movl [rsp + #%d], rax\n\t"
2103 "movq rax, [rsp - #8]",
2104 src_offset, dst_offset);
2105 break;
2106 case Op_VecD:
2107 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2108 "popq [rsp + #%d]",
2109 src_offset, dst_offset);
2110 break;
2111 case Op_VecX:
2112 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2113 "popq [rsp + #%d]\n\t"
2114 "pushq [rsp + #%d]\n\t"
2115 "popq [rsp + #%d]",
2116 src_offset, dst_offset, src_offset+8, dst_offset+8);
2117 break;
2118 case Op_VecY:
2119 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2120 "vmovdqu xmm0, [rsp + #%d]\n\t"
2121 "vmovdqu [rsp + #%d], xmm0\n\t"
2122 "vmovdqu xmm0, [rsp - #32]",
2123 src_offset, dst_offset);
2124 break;
2125 case Op_VecZ:
2126 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2127 "vmovdqu xmm0, [rsp + #%d]\n\t"
2128 "vmovdqu [rsp + #%d], xmm0\n\t"
2129 "vmovdqu xmm0, [rsp - #64]",
2130 src_offset, dst_offset);
2131 break;
2132 default:
2133 ShouldNotReachHere();
2134 }
2135 #endif
2136 }
2137 }
2138
2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2140 PhaseRegAlloc* ra_,
2141 bool do_size,
2142 outputStream* st) const {
2143 assert(masm != nullptr || st != nullptr, "sanity");
2144 // Get registers to move
2145 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2146 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2147 OptoReg::Name dst_second = ra_->get_reg_second(this);
2148 OptoReg::Name dst_first = ra_->get_reg_first(this);
2149
2150 enum RC src_second_rc = rc_class(src_second);
2151 enum RC src_first_rc = rc_class(src_first);
2152 enum RC dst_second_rc = rc_class(dst_second);
2153 enum RC dst_first_rc = rc_class(dst_first);
2154
2155 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2156 "must move at least 1 register" );
2157
2158 if (src_first == dst_first && src_second == dst_second) {
2159 // Self copy, no move
2160 return 0;
2161 }
2162 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2163 uint ireg = ideal_reg();
2164 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2165 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2166 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2167 // mem -> mem
2168 int src_offset = ra_->reg2offset(src_first);
2169 int dst_offset = ra_->reg2offset(dst_first);
2170 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2171 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2172 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2173 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2174 int stack_offset = ra_->reg2offset(dst_first);
2175 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2176 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2177 int stack_offset = ra_->reg2offset(src_first);
2178 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2179 } else {
2180 ShouldNotReachHere();
2181 }
2182 return 0;
2183 }
2184 if (src_first_rc == rc_stack) {
2185 // mem ->
2186 if (dst_first_rc == rc_stack) {
2187 // mem -> mem
2188 assert(src_second != dst_first, "overlap");
2189 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2190 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2191 // 64-bit
2192 int src_offset = ra_->reg2offset(src_first);
2193 int dst_offset = ra_->reg2offset(dst_first);
2194 if (masm) {
2195 __ pushq(Address(rsp, src_offset));
2196 __ popq (Address(rsp, dst_offset));
2197 #ifndef PRODUCT
2198 } else {
2199 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2200 "popq [rsp + #%d]",
2201 src_offset, dst_offset);
2202 #endif
2203 }
2204 } else {
2205 // 32-bit
2206 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2207 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2208 // No pushl/popl, so:
2209 int src_offset = ra_->reg2offset(src_first);
2210 int dst_offset = ra_->reg2offset(dst_first);
2211 if (masm) {
2212 __ movq(Address(rsp, -8), rax);
2213 __ movl(rax, Address(rsp, src_offset));
2214 __ movl(Address(rsp, dst_offset), rax);
2215 __ movq(rax, Address(rsp, -8));
2216 #ifndef PRODUCT
2217 } else {
2218 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2219 "movl rax, [rsp + #%d]\n\t"
2220 "movl [rsp + #%d], rax\n\t"
2221 "movq rax, [rsp - #8]",
2222 src_offset, dst_offset);
2223 #endif
2224 }
2225 }
2226 return 0;
2227 } else if (dst_first_rc == rc_int) {
2228 // mem -> gpr
2229 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2230 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2231 // 64-bit
2232 int offset = ra_->reg2offset(src_first);
2233 if (masm) {
2234 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2235 #ifndef PRODUCT
2236 } else {
2237 st->print("movq %s, [rsp + #%d]\t# spill",
2238 Matcher::regName[dst_first],
2239 offset);
2240 #endif
2241 }
2242 } else {
2243 // 32-bit
2244 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2245 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2246 int offset = ra_->reg2offset(src_first);
2247 if (masm) {
2248 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2249 #ifndef PRODUCT
2250 } else {
2251 st->print("movl %s, [rsp + #%d]\t# spill",
2252 Matcher::regName[dst_first],
2253 offset);
2254 #endif
2255 }
2256 }
2257 return 0;
2258 } else if (dst_first_rc == rc_float) {
2259 // mem-> xmm
2260 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2261 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2262 // 64-bit
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("%s %s, [rsp + #%d]\t# spill",
2269 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2270 Matcher::regName[dst_first],
2271 offset);
2272 #endif
2273 }
2274 } else {
2275 // 32-bit
2276 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2277 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2278 int offset = ra_->reg2offset(src_first);
2279 if (masm) {
2280 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2281 #ifndef PRODUCT
2282 } else {
2283 st->print("movss %s, [rsp + #%d]\t# spill",
2284 Matcher::regName[dst_first],
2285 offset);
2286 #endif
2287 }
2288 }
2289 return 0;
2290 } else if (dst_first_rc == rc_kreg) {
2291 // mem -> kreg
2292 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2293 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2294 // 64-bit
2295 int offset = ra_->reg2offset(src_first);
2296 if (masm) {
2297 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2298 #ifndef PRODUCT
2299 } else {
2300 st->print("kmovq %s, [rsp + #%d]\t# spill",
2301 Matcher::regName[dst_first],
2302 offset);
2303 #endif
2304 }
2305 }
2306 return 0;
2307 }
2308 } else if (src_first_rc == rc_int) {
2309 // gpr ->
2310 if (dst_first_rc == rc_stack) {
2311 // gpr -> mem
2312 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2313 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2314 // 64-bit
2315 int offset = ra_->reg2offset(dst_first);
2316 if (masm) {
2317 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2318 #ifndef PRODUCT
2319 } else {
2320 st->print("movq [rsp + #%d], %s\t# spill",
2321 offset,
2322 Matcher::regName[src_first]);
2323 #endif
2324 }
2325 } else {
2326 // 32-bit
2327 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2328 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2329 int offset = ra_->reg2offset(dst_first);
2330 if (masm) {
2331 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2332 #ifndef PRODUCT
2333 } else {
2334 st->print("movl [rsp + #%d], %s\t# spill",
2335 offset,
2336 Matcher::regName[src_first]);
2337 #endif
2338 }
2339 }
2340 return 0;
2341 } else if (dst_first_rc == rc_int) {
2342 // gpr -> gpr
2343 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2344 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2345 // 64-bit
2346 if (masm) {
2347 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movq %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 } else {
2358 // 32-bit
2359 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2360 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2361 if (masm) {
2362 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2363 as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movl %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 return 0;
2372 }
2373 } else if (dst_first_rc == rc_float) {
2374 // gpr -> xmm
2375 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2376 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2377 // 64-bit
2378 if (masm) {
2379 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2380 #ifndef PRODUCT
2381 } else {
2382 st->print("movdq %s, %s\t# spill",
2383 Matcher::regName[dst_first],
2384 Matcher::regName[src_first]);
2385 #endif
2386 }
2387 } else {
2388 // 32-bit
2389 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2390 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2391 if (masm) {
2392 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("movdl %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 return 0;
2402 } else if (dst_first_rc == rc_kreg) {
2403 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2404 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2405 // 64-bit
2406 if (masm) {
2407 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2408 #ifndef PRODUCT
2409 } else {
2410 st->print("kmovq %s, %s\t# spill",
2411 Matcher::regName[dst_first],
2412 Matcher::regName[src_first]);
2413 #endif
2414 }
2415 }
2416 Unimplemented();
2417 return 0;
2418 }
2419 } else if (src_first_rc == rc_float) {
2420 // xmm ->
2421 if (dst_first_rc == rc_stack) {
2422 // xmm -> mem
2423 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2424 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2425 // 64-bit
2426 int offset = ra_->reg2offset(dst_first);
2427 if (masm) {
2428 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2429 #ifndef PRODUCT
2430 } else {
2431 st->print("movsd [rsp + #%d], %s\t# spill",
2432 offset,
2433 Matcher::regName[src_first]);
2434 #endif
2435 }
2436 } else {
2437 // 32-bit
2438 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2439 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2440 int offset = ra_->reg2offset(dst_first);
2441 if (masm) {
2442 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movss [rsp + #%d], %s\t# spill",
2446 offset,
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 }
2451 return 0;
2452 } else if (dst_first_rc == rc_int) {
2453 // xmm -> gpr
2454 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2455 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2456 // 64-bit
2457 if (masm) {
2458 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2459 #ifndef PRODUCT
2460 } else {
2461 st->print("movdq %s, %s\t# spill",
2462 Matcher::regName[dst_first],
2463 Matcher::regName[src_first]);
2464 #endif
2465 }
2466 } else {
2467 // 32-bit
2468 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2469 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2470 if (masm) {
2471 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("movdl %s, %s\t# spill",
2475 Matcher::regName[dst_first],
2476 Matcher::regName[src_first]);
2477 #endif
2478 }
2479 }
2480 return 0;
2481 } else if (dst_first_rc == rc_float) {
2482 // xmm -> xmm
2483 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2484 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2485 // 64-bit
2486 if (masm) {
2487 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2488 #ifndef PRODUCT
2489 } else {
2490 st->print("%s %s, %s\t# spill",
2491 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2492 Matcher::regName[dst_first],
2493 Matcher::regName[src_first]);
2494 #endif
2495 }
2496 } else {
2497 // 32-bit
2498 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2499 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2500 if (masm) {
2501 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2502 #ifndef PRODUCT
2503 } else {
2504 st->print("%s %s, %s\t# spill",
2505 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2506 Matcher::regName[dst_first],
2507 Matcher::regName[src_first]);
2508 #endif
2509 }
2510 }
2511 return 0;
2512 } else if (dst_first_rc == rc_kreg) {
2513 assert(false, "Illegal spilling");
2514 return 0;
2515 }
2516 } else if (src_first_rc == rc_kreg) {
2517 if (dst_first_rc == rc_stack) {
2518 // mem -> kreg
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 int offset = ra_->reg2offset(dst_first);
2523 if (masm) {
2524 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq [rsp + #%d] , %s\t# spill",
2528 offset,
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 return 0;
2534 } else if (dst_first_rc == rc_int) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 Unimplemented();
2549 return 0;
2550 } else if (dst_first_rc == rc_kreg) {
2551 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2552 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2553 // 64-bit
2554 if (masm) {
2555 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2556 #ifndef PRODUCT
2557 } else {
2558 st->print("kmovq %s, %s\t# spill",
2559 Matcher::regName[dst_first],
2560 Matcher::regName[src_first]);
2561 #endif
2562 }
2563 }
2564 return 0;
2565 } else if (dst_first_rc == rc_float) {
2566 assert(false, "Illegal spill");
2567 return 0;
2568 }
2569 }
2570
2571 assert(0," foo ");
2572 Unimplemented();
2573 return 0;
2574 }
2575
2576 #ifndef PRODUCT
2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2578 implementation(nullptr, ra_, false, st);
2579 }
2580 #endif
2581
2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2583 implementation(masm, ra_, false, nullptr);
2584 }
2585
2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2587 return MachNode::size(ra_);
2588 }
2589
2590 //=============================================================================
2591 #ifndef PRODUCT
2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_reg_first(this);
2596 st->print("leaq %s, [rsp + #%d]\t# box lock",
2597 Matcher::regName[reg], offset);
2598 }
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
2660
2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2662 int oper_index) {
2663 if (mdef == nullptr) {
2664 return false;
2665 }
2666
2667 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2668 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2669 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2670 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2671 return false;
2672 }
2673
2674 // Complex memory operand covers multiple incoming edges needed for
2675 // address computation. Biasing def towards any address component will not
2676 // result in NDD demotion by assembler.
2677 if (mdef->operand_num_edges(oper_index) != 1) {
2678 return false;
2679 }
2680
2681 // Demotion candidate must be register mask compatible with definition.
2682 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2683 if (!oper_mask.overlap(mdef->out_RegMask())) {
2684 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2685 return false;
2686 }
2687
2688 switch (oper_index) {
2689 // First operand of MachNode corresponding to Intel APX NDD selection
2690 // pattern can share its assigned register with definition operand if
2691 // their live ranges do not overlap. In such a scenario we can demote
2692 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2693 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2694 // are decorated with a special flag by instruction selector.
2695 case 1:
2696 return is_ndd_demotable_opr1(mdef);
2697
2698 // Definition operand of commutative operation can be biased towards second
2699 // operand.
2700 case 2:
2701 return is_ndd_demotable_opr2(mdef);
2702
2703 // Current scheme only selects up to two biasing candidates
2704 default:
2705 assert(false, "unhandled operand index: %s", mdef->Name());
2706 break;
2707 }
2708
2709 return false;
2710 }
2711
2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2713 assert(EnableVectorSupport, "sanity");
2714 int lo = XMM0_num;
2715 int hi = XMM0b_num;
2716 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2717 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2718 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2719 return OptoRegPair(hi, lo);
2720 }
2721
2722 // Is this branch offset short enough that a short branch can be used?
2723 //
2724 // NOTE: If the platform does not provide any short branch variants, then
2725 // this method should return false for offset 0.
2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2727 // The passed offset is relative to address of the branch.
2728 // On 86 a branch displacement is calculated relative to address
2729 // of a next instruction.
2730 offset -= br_size;
2731
2732 // the short version of jmpConUCF2 contains multiple branches,
2733 // making the reach slightly less
2734 if (rule == jmpConUCF2_rule)
2735 return (-126 <= offset && offset <= 125);
2736 return (-128 <= offset && offset <= 127);
2737 }
2738
2739 #ifdef ASSERT
2740 // Return whether or not this register is ever used as an argument.
2741 bool Matcher::can_be_java_arg(int reg)
2742 {
2743 return
2744 reg == RDI_num || reg == RDI_H_num ||
2745 reg == RSI_num || reg == RSI_H_num ||
2746 reg == RDX_num || reg == RDX_H_num ||
2747 reg == RCX_num || reg == RCX_H_num ||
2748 reg == R8_num || reg == R8_H_num ||
2749 reg == R9_num || reg == R9_H_num ||
2750 reg == R12_num || reg == R12_H_num ||
2751 reg == XMM0_num || reg == XMM0b_num ||
2752 reg == XMM1_num || reg == XMM1b_num ||
2753 reg == XMM2_num || reg == XMM2b_num ||
2754 reg == XMM3_num || reg == XMM3b_num ||
2755 reg == XMM4_num || reg == XMM4b_num ||
2756 reg == XMM5_num || reg == XMM5b_num ||
2757 reg == XMM6_num || reg == XMM6b_num ||
2758 reg == XMM7_num || reg == XMM7b_num;
2759 }
2760 #endif
2761
2762 uint Matcher::int_pressure_limit()
2763 {
2764 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2765 }
2766
2767 uint Matcher::float_pressure_limit()
2768 {
2769 // After experiment around with different values, the following default threshold
2770 // works best for LCM's register pressure scheduling on x64.
2771 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2772 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2773 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2774 }
2775
2776 // Register for DIVI projection of divmodI
2777 const RegMask& Matcher::divI_proj_mask() {
2778 return INT_RAX_REG_mask();
2779 }
2780
2781 // Register for MODI projection of divmodI
2782 const RegMask& Matcher::modI_proj_mask() {
2783 return INT_RDX_REG_mask();
2784 }
2785
2786 // Register for DIVL projection of divmodL
2787 const RegMask& Matcher::divL_proj_mask() {
2788 return LONG_RAX_REG_mask();
2789 }
2790
2791 // Register for MODL projection of divmodL
2792 const RegMask& Matcher::modL_proj_mask() {
2793 return LONG_RDX_REG_mask();
2794 }
2795
2796 %}
2797
2798 source_hpp %{
2799 // Header information of the source block.
2800 // Method declarations/definitions which are used outside
2801 // the ad-scope can conveniently be defined here.
2802 //
2803 // To keep related declarations/definitions/uses close together,
2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2805
2806 #include "runtime/vm_version.hpp"
2807
2808 class NativeJump;
2809
2810 class CallStubImpl {
2811
2812 //--------------------------------------------------------------
2813 //---< Used for optimization in Compile::shorten_branches >---
2814 //--------------------------------------------------------------
2815
2816 public:
2817 // Size of call trampoline stub.
2818 static uint size_call_trampoline() {
2819 return 0; // no call trampolines on this platform
2820 }
2821
2822 // number of relocations needed by a call trampoline stub
2823 static uint reloc_call_trampoline() {
2824 return 0; // no call trampolines on this platform
2825 }
2826 };
2827
2828 class HandlerImpl {
2829
2830 public:
2831
2832 static int emit_deopt_handler(C2_MacroAssembler* masm);
2833
2834 static uint size_deopt_handler() {
2835 // one call and one jmp.
2836 return 7;
2837 }
2838 };
2839
2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2841 switch(bytes) {
2842 case 4: // fall-through
2843 case 8: // fall-through
2844 case 16: return Assembler::AVX_128bit;
2845 case 32: return Assembler::AVX_256bit;
2846 case 64: return Assembler::AVX_512bit;
2847
2848 default: {
2849 ShouldNotReachHere();
2850 return Assembler::AVX_NoVec;
2851 }
2852 }
2853 }
2854
2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2856 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2857 }
2858
2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2860 uint def_idx = use->operand_index(opnd);
2861 Node* def = use->in(def_idx);
2862 return vector_length_encoding(def);
2863 }
2864
2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
2866 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2867 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2868 }
2869
2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2871 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2872 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2873 }
2874
2875 class Node::PD {
2876 public:
2877 enum NodeFlags : uint64_t {
2878 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2879 Flag_sets_carry_flag = Node::_last_flag << 2,
2880 Flag_sets_parity_flag = Node::_last_flag << 3,
2881 Flag_sets_zero_flag = Node::_last_flag << 4,
2882 Flag_sets_overflow_flag = Node::_last_flag << 5,
2883 Flag_sets_sign_flag = Node::_last_flag << 6,
2884 Flag_clears_carry_flag = Node::_last_flag << 7,
2885 Flag_clears_parity_flag = Node::_last_flag << 8,
2886 Flag_clears_zero_flag = Node::_last_flag << 9,
2887 Flag_clears_overflow_flag = Node::_last_flag << 10,
2888 Flag_clears_sign_flag = Node::_last_flag << 11,
2889 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2890 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2891 _last_flag = Flag_ndd_demotable_opr2
2892 };
2893 };
2894
2895 %} // end source_hpp
2896
2897 source %{
2898
2899 #include "opto/addnode.hpp"
2900 #include "c2_intelJccErratum_x86.hpp"
2901
2902 void PhaseOutput::pd_perform_mach_node_analysis() {
2903 if (VM_Version::has_intel_jcc_erratum()) {
2904 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2905 _buf_sizes._code += extra_padding;
2906 }
2907 }
2908
2909 int MachNode::pd_alignment_required() const {
2910 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2911 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2912 return IntelJccErratum::largest_jcc_size() + 1;
2913 } else {
2914 return 1;
2915 }
2916 }
2917
2918 int MachNode::compute_padding(int current_offset) const {
2919 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2920 Compile* C = Compile::current();
2921 PhaseOutput* output = C->output();
2922 Block* block = output->block();
2923 int index = output->index();
2924 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2925 } else {
2926 return 0;
2927 }
2928 }
2929
2930 // Emit deopt handler code.
2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2932
2933 // Note that the code buffer's insts_mark is always relative to insts.
2934 // That's why we must use the macroassembler to generate a handler.
2935 address base = __ start_a_stub(size_deopt_handler());
2936 if (base == nullptr) {
2937 ciEnv::current()->record_failure("CodeCache is full");
2938 return 0; // CodeBuffer::expand failed
2939 }
2940 int offset = __ offset();
2941
2942 Label start;
2943 __ bind(start);
2944
2945 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2946
2947 int entry_offset = __ offset();
2948
2949 __ jmp(start);
2950
2951 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2952 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2953 "out of bounds read in post-call NOP check");
2954 __ end_a_stub();
2955 return entry_offset;
2956 }
2957
2958 static Assembler::Width widthForType(BasicType bt) {
2959 if (bt == T_BYTE) {
2960 return Assembler::B;
2961 } else if (bt == T_SHORT) {
2962 return Assembler::W;
2963 } else if (bt == T_INT) {
2964 return Assembler::D;
2965 } else {
2966 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2967 return Assembler::Q;
2968 }
2969 }
2970
2971 //=============================================================================
2972
2973 // Float masks come from different places depending on platform.
2974 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2975 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2976 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2977 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2978 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2979 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2980 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2981 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2982 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2983 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2984 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2985 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2986 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2987 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2988 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2989 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2990 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2991 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2992 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2993
2994 //=============================================================================
2995 bool Matcher::match_rule_supported(int opcode) {
2996 if (!has_match_rule(opcode)) {
2997 return false; // no match rule present
2998 }
2999 switch (opcode) {
3000 case Op_AbsVL:
3001 case Op_StoreVectorScatter:
3002 if (UseAVX < 3) {
3003 return false;
3004 }
3005 break;
3006 case Op_PopCountI:
3007 case Op_PopCountL:
3008 if (!UsePopCountInstruction) {
3009 return false;
3010 }
3011 break;
3012 case Op_PopCountVI:
3013 if (UseAVX < 2) {
3014 return false;
3015 }
3016 break;
3017 case Op_CompressV:
3018 case Op_ExpandV:
3019 case Op_PopCountVL:
3020 if (UseAVX < 2) {
3021 return false;
3022 }
3023 break;
3024 case Op_MulVI:
3025 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3026 return false;
3027 }
3028 break;
3029 case Op_MulVL:
3030 if (UseSSE < 4) { // only with SSE4_1 or AVX
3031 return false;
3032 }
3033 break;
3034 case Op_MulReductionVL:
3035 if (VM_Version::supports_avx512dq() == false) {
3036 return false;
3037 }
3038 break;
3039 case Op_AbsVB:
3040 case Op_AbsVS:
3041 case Op_AbsVI:
3042 case Op_AddReductionVI:
3043 case Op_AndReductionV:
3044 case Op_OrReductionV:
3045 case Op_XorReductionV:
3046 if (UseSSE < 3) { // requires at least SSSE3
3047 return false;
3048 }
3049 break;
3050 case Op_MaxHF:
3051 case Op_MinHF:
3052 if (!VM_Version::supports_avx512vlbw()) {
3053 return false;
3054 } // fallthrough
3055 case Op_AddHF:
3056 case Op_DivHF:
3057 case Op_FmaHF:
3058 case Op_MulHF:
3059 case Op_ReinterpretS2HF:
3060 case Op_ReinterpretHF2S:
3061 case Op_SubHF:
3062 case Op_SqrtHF:
3063 if (!VM_Version::supports_avx512_fp16()) {
3064 return false;
3065 }
3066 break;
3067 case Op_VectorLoadShuffle:
3068 case Op_VectorRearrange:
3069 case Op_MulReductionVI:
3070 if (UseSSE < 4) { // requires at least SSE4
3071 return false;
3072 }
3073 break;
3074 case Op_IsInfiniteF:
3075 case Op_IsInfiniteD:
3076 if (!VM_Version::supports_avx512dq()) {
3077 return false;
3078 }
3079 break;
3080 case Op_SqrtVD:
3081 case Op_SqrtVF:
3082 case Op_VectorMaskCmp:
3083 case Op_VectorCastB2X:
3084 case Op_VectorCastS2X:
3085 case Op_VectorCastI2X:
3086 case Op_VectorCastL2X:
3087 case Op_VectorCastF2X:
3088 case Op_VectorCastD2X:
3089 case Op_VectorUCastB2X:
3090 case Op_VectorUCastS2X:
3091 case Op_VectorUCastI2X:
3092 case Op_VectorMaskCast:
3093 if (UseAVX < 1) { // enabled for AVX only
3094 return false;
3095 }
3096 break;
3097 case Op_PopulateIndex:
3098 if (UseAVX < 2) {
3099 return false;
3100 }
3101 break;
3102 case Op_RoundVF:
3103 if (UseAVX < 2) { // enabled for AVX2 only
3104 return false;
3105 }
3106 break;
3107 case Op_RoundVD:
3108 if (UseAVX < 3) {
3109 return false; // enabled for AVX3 only
3110 }
3111 break;
3112 case Op_CompareAndSwapL:
3113 case Op_CompareAndSwapP:
3114 break;
3115 case Op_StrIndexOf:
3116 if (!UseSSE42Intrinsics) {
3117 return false;
3118 }
3119 break;
3120 case Op_StrIndexOfChar:
3121 if (!UseSSE42Intrinsics) {
3122 return false;
3123 }
3124 break;
3125 case Op_OnSpinWait:
3126 if (VM_Version::supports_on_spin_wait() == false) {
3127 return false;
3128 }
3129 break;
3130 case Op_MulVB:
3131 case Op_LShiftVB:
3132 case Op_RShiftVB:
3133 case Op_URShiftVB:
3134 case Op_VectorInsert:
3135 case Op_VectorLoadMask:
3136 case Op_VectorStoreMask:
3137 case Op_VectorBlend:
3138 if (UseSSE < 4) {
3139 return false;
3140 }
3141 break;
3142 case Op_MaxD:
3143 case Op_MaxF:
3144 case Op_MinD:
3145 case Op_MinF:
3146 if (UseAVX < 1) { // enabled for AVX only
3147 return false;
3148 }
3149 break;
3150 case Op_CacheWB:
3151 case Op_CacheWBPreSync:
3152 case Op_CacheWBPostSync:
3153 if (!VM_Version::supports_data_cache_line_flush()) {
3154 return false;
3155 }
3156 break;
3157 case Op_ExtractB:
3158 case Op_ExtractL:
3159 case Op_ExtractI:
3160 case Op_RoundDoubleMode:
3161 if (UseSSE < 4) {
3162 return false;
3163 }
3164 break;
3165 case Op_RoundDoubleModeV:
3166 if (VM_Version::supports_avx() == false) {
3167 return false; // 128bit vroundpd is not available
3168 }
3169 break;
3170 case Op_LoadVectorGather:
3171 case Op_LoadVectorGatherMasked:
3172 if (UseAVX < 2) {
3173 return false;
3174 }
3175 break;
3176 case Op_FmaF:
3177 case Op_FmaD:
3178 case Op_FmaVD:
3179 case Op_FmaVF:
3180 if (!UseFMA) {
3181 return false;
3182 }
3183 break;
3184 case Op_MacroLogicV:
3185 if (UseAVX < 3 || !UseVectorMacroLogic) {
3186 return false;
3187 }
3188 break;
3189
3190 case Op_VectorCmpMasked:
3191 case Op_VectorMaskGen:
3192 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3193 return false;
3194 }
3195 break;
3196 case Op_VectorMaskFirstTrue:
3197 case Op_VectorMaskLastTrue:
3198 case Op_VectorMaskTrueCount:
3199 case Op_VectorMaskToLong:
3200 if (UseAVX < 1) {
3201 return false;
3202 }
3203 break;
3204 case Op_RoundF:
3205 case Op_RoundD:
3206 break;
3207 case Op_CopySignD:
3208 case Op_CopySignF:
3209 if (UseAVX < 3) {
3210 return false;
3211 }
3212 if (!VM_Version::supports_avx512vl()) {
3213 return false;
3214 }
3215 break;
3216 case Op_CompressBits:
3217 case Op_ExpandBits:
3218 if (!VM_Version::supports_bmi2()) {
3219 return false;
3220 }
3221 break;
3222 case Op_CompressM:
3223 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3224 return false;
3225 }
3226 break;
3227 case Op_ConvF2HF:
3228 case Op_ConvHF2F:
3229 if (!VM_Version::supports_float16()) {
3230 return false;
3231 }
3232 break;
3233 case Op_VectorCastF2HF:
3234 case Op_VectorCastHF2F:
3235 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3236 return false;
3237 }
3238 break;
3239 }
3240 return true; // Match rules are supported by default.
3241 }
3242
3243 //------------------------------------------------------------------------
3244
3245 static inline bool is_pop_count_instr_target(BasicType bt) {
3246 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3247 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3248 }
3249
3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3251 return match_rule_supported_vector(opcode, vlen, bt);
3252 }
3253
3254 // Identify extra cases that we might want to provide match rules for vector nodes and
3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3257 if (!match_rule_supported(opcode)) {
3258 return false;
3259 }
3260 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3261 // * SSE2 supports 128bit vectors for all types;
3262 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3263 // * AVX2 supports 256bit vectors for all types;
3264 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3265 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3266 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3267 // And MaxVectorSize is taken into account as well.
3268 if (!vector_size_supported(bt, vlen)) {
3269 return false;
3270 }
3271 // Special cases which require vector length follow:
3272 // * implementation limitations
3273 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3274 // * 128bit vroundpd instruction is present only in AVX1
3275 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3276 switch (opcode) {
3277 case Op_MaxVHF:
3278 case Op_MinVHF:
3279 if (!VM_Version::supports_avx512bw()) {
3280 return false;
3281 }
3282 case Op_AddVHF:
3283 case Op_DivVHF:
3284 case Op_FmaVHF:
3285 case Op_MulVHF:
3286 case Op_SubVHF:
3287 case Op_SqrtVHF:
3288 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3289 return false;
3290 }
3291 if (!VM_Version::supports_avx512_fp16()) {
3292 return false;
3293 }
3294 break;
3295 case Op_AbsVF:
3296 case Op_NegVF:
3297 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3298 return false; // 512bit vandps and vxorps are not available
3299 }
3300 break;
3301 case Op_AbsVD:
3302 case Op_NegVD:
3303 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3304 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3305 }
3306 break;
3307 case Op_RotateRightV:
3308 case Op_RotateLeftV:
3309 if (bt != T_INT && bt != T_LONG) {
3310 return false;
3311 } // fallthrough
3312 case Op_MacroLogicV:
3313 if (!VM_Version::supports_evex() ||
3314 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3315 return false;
3316 }
3317 break;
3318 case Op_ClearArray:
3319 case Op_VectorMaskGen:
3320 case Op_VectorCmpMasked:
3321 if (!VM_Version::supports_avx512bw()) {
3322 return false;
3323 }
3324 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3325 return false;
3326 }
3327 break;
3328 case Op_LoadVectorMasked:
3329 case Op_StoreVectorMasked:
3330 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3331 return false;
3332 }
3333 break;
3334 case Op_UMinV:
3335 case Op_UMaxV:
3336 if (UseAVX == 0) {
3337 return false;
3338 }
3339 break;
3340 case Op_UMinReductionV:
3341 case Op_UMaxReductionV:
3342 if (UseAVX == 0) {
3343 return false;
3344 }
3345 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3349 return false;
3350 }
3351 break;
3352 case Op_MaxV:
3353 case Op_MinV:
3354 if (UseSSE < 4 && is_integral_type(bt)) {
3355 return false;
3356 }
3357 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3358 // Float/Double intrinsics are enabled for AVX family currently.
3359 if (UseAVX == 0) {
3360 return false;
3361 }
3362 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3363 return false;
3364 }
3365 }
3366 break;
3367 case Op_CallLeafVector:
3368 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3369 return false;
3370 }
3371 break;
3372 case Op_AddReductionVI:
3373 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3374 return false;
3375 }
3376 // fallthrough
3377 case Op_AndReductionV:
3378 case Op_OrReductionV:
3379 case Op_XorReductionV:
3380 if (is_subword_type(bt) && (UseSSE < 4)) {
3381 return false;
3382 }
3383 break;
3384 case Op_MinReductionV:
3385 case Op_MaxReductionV:
3386 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3387 return false;
3388 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3389 return false;
3390 }
3391 // Float/Double intrinsics enabled for AVX family.
3392 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3393 return false;
3394 }
3395 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3396 return false;
3397 }
3398 break;
3399 case Op_VectorBlend:
3400 if (UseAVX == 0 && size_in_bits < 128) {
3401 return false;
3402 }
3403 break;
3404 case Op_VectorTest:
3405 if (UseSSE < 4) {
3406 return false; // Implementation limitation
3407 } else if (size_in_bits < 32) {
3408 return false; // Implementation limitation
3409 }
3410 break;
3411 case Op_VectorLoadShuffle:
3412 case Op_VectorRearrange:
3413 if(vlen == 2) {
3414 return false; // Implementation limitation due to how shuffle is loaded
3415 } else if (size_in_bits == 256 && UseAVX < 2) {
3416 return false; // Implementation limitation
3417 }
3418 break;
3419 case Op_VectorLoadMask:
3420 case Op_VectorMaskCast:
3421 if (size_in_bits == 256 && UseAVX < 2) {
3422 return false; // Implementation limitation
3423 }
3424 // fallthrough
3425 case Op_VectorStoreMask:
3426 if (vlen == 2) {
3427 return false; // Implementation limitation
3428 }
3429 break;
3430 case Op_PopulateIndex:
3431 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3432 return false;
3433 }
3434 break;
3435 case Op_VectorCastB2X:
3436 case Op_VectorCastS2X:
3437 case Op_VectorCastI2X:
3438 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3439 return false;
3440 }
3441 break;
3442 case Op_VectorCastL2X:
3443 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3444 return false;
3445 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3446 return false;
3447 }
3448 break;
3449 case Op_VectorCastF2X: {
3450 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3451 // happen after intermediate conversion to integer and special handling
3452 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3453 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3454 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3455 return false;
3456 }
3457 }
3458 // fallthrough
3459 case Op_VectorCastD2X:
3460 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3461 return false;
3462 }
3463 break;
3464 case Op_VectorCastF2HF:
3465 case Op_VectorCastHF2F:
3466 if (!VM_Version::supports_f16c() &&
3467 ((!VM_Version::supports_evex() ||
3468 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3469 return false;
3470 }
3471 break;
3472 case Op_RoundVD:
3473 if (!VM_Version::supports_avx512dq()) {
3474 return false;
3475 }
3476 break;
3477 case Op_MulReductionVI:
3478 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3479 return false;
3480 }
3481 break;
3482 case Op_LoadVectorGatherMasked:
3483 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3484 return false;
3485 }
3486 if (is_subword_type(bt) &&
3487 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3488 (size_in_bits < 64) ||
3489 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3490 return false;
3491 }
3492 break;
3493 case Op_StoreVectorScatterMasked:
3494 case Op_StoreVectorScatter:
3495 if (is_subword_type(bt)) {
3496 return false;
3497 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3498 return false;
3499 }
3500 // fallthrough
3501 case Op_LoadVectorGather:
3502 if (!is_subword_type(bt) && size_in_bits == 64) {
3503 return false;
3504 }
3505 if (is_subword_type(bt) && size_in_bits < 64) {
3506 return false;
3507 }
3508 break;
3509 case Op_SaturatingAddV:
3510 case Op_SaturatingSubV:
3511 if (UseAVX < 1) {
3512 return false; // Implementation limitation
3513 }
3514 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3515 return false;
3516 }
3517 break;
3518 case Op_SelectFromTwoVector:
3519 if (size_in_bits < 128) {
3520 return false;
3521 }
3522 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3523 return false;
3524 }
3525 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3526 return false;
3527 }
3528 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3529 return false;
3530 }
3531 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3532 return false;
3533 }
3534 break;
3535 case Op_MaskAll:
3536 if (!VM_Version::supports_evex()) {
3537 return false;
3538 }
3539 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3540 return false;
3541 }
3542 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3543 return false;
3544 }
3545 break;
3546 case Op_VectorMaskCmp:
3547 if (vlen < 2 || size_in_bits < 32) {
3548 return false;
3549 }
3550 break;
3551 case Op_CompressM:
3552 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3553 return false;
3554 }
3555 break;
3556 case Op_CompressV:
3557 case Op_ExpandV:
3558 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3559 return false;
3560 }
3561 if (size_in_bits < 128 ) {
3562 return false;
3563 }
3564 case Op_VectorLongToMask:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3569 return false;
3570 }
3571 break;
3572 case Op_SignumVD:
3573 case Op_SignumVF:
3574 if (UseAVX < 1) {
3575 return false;
3576 }
3577 break;
3578 case Op_PopCountVI:
3579 case Op_PopCountVL: {
3580 if (!is_pop_count_instr_target(bt) &&
3581 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3582 return false;
3583 }
3584 }
3585 break;
3586 case Op_ReverseV:
3587 case Op_ReverseBytesV:
3588 if (UseAVX < 2) {
3589 return false;
3590 }
3591 break;
3592 case Op_CountTrailingZerosV:
3593 case Op_CountLeadingZerosV:
3594 if (UseAVX < 2) {
3595 return false;
3596 }
3597 break;
3598 }
3599 return true; // Per default match rules are supported.
3600 }
3601
3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3603 // ADLC based match_rule_supported routine checks for the existence of pattern based
3604 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3605 // of their non-masked counterpart with mask edge being the differentiator.
3606 // This routine does a strict check on the existence of masked operation patterns
3607 // by returning a default false value for all the other opcodes apart from the
3608 // ones whose masked instruction patterns are defined in this file.
3609 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3610 return false;
3611 }
3612
3613 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3614 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3615 return false;
3616 }
3617 switch(opcode) {
3618 // Unary masked operations
3619 case Op_AbsVB:
3620 case Op_AbsVS:
3621 if(!VM_Version::supports_avx512bw()) {
3622 return false; // Implementation limitation
3623 }
3624 case Op_AbsVI:
3625 case Op_AbsVL:
3626 return true;
3627
3628 // Ternary masked operations
3629 case Op_FmaVF:
3630 case Op_FmaVD:
3631 return true;
3632
3633 case Op_MacroLogicV:
3634 if(bt != T_INT && bt != T_LONG) {
3635 return false;
3636 }
3637 return true;
3638
3639 // Binary masked operations
3640 case Op_AddVB:
3641 case Op_AddVS:
3642 case Op_SubVB:
3643 case Op_SubVS:
3644 case Op_MulVS:
3645 case Op_LShiftVS:
3646 case Op_RShiftVS:
3647 case Op_URShiftVS:
3648 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3649 if (!VM_Version::supports_avx512bw()) {
3650 return false; // Implementation limitation
3651 }
3652 return true;
3653
3654 case Op_MulVL:
3655 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3656 if (!VM_Version::supports_avx512dq()) {
3657 return false; // Implementation limitation
3658 }
3659 return true;
3660
3661 case Op_AndV:
3662 case Op_OrV:
3663 case Op_XorV:
3664 case Op_RotateRightV:
3665 case Op_RotateLeftV:
3666 if (bt != T_INT && bt != T_LONG) {
3667 return false; // Implementation limitation
3668 }
3669 return true;
3670
3671 case Op_VectorLoadMask:
3672 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3673 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3674 return false;
3675 }
3676 return true;
3677
3678 case Op_AddVI:
3679 case Op_AddVL:
3680 case Op_AddVF:
3681 case Op_AddVD:
3682 case Op_SubVI:
3683 case Op_SubVL:
3684 case Op_SubVF:
3685 case Op_SubVD:
3686 case Op_MulVI:
3687 case Op_MulVF:
3688 case Op_MulVD:
3689 case Op_DivVF:
3690 case Op_DivVD:
3691 case Op_SqrtVF:
3692 case Op_SqrtVD:
3693 case Op_LShiftVI:
3694 case Op_LShiftVL:
3695 case Op_RShiftVI:
3696 case Op_RShiftVL:
3697 case Op_URShiftVI:
3698 case Op_URShiftVL:
3699 case Op_LoadVectorMasked:
3700 case Op_StoreVectorMasked:
3701 case Op_LoadVectorGatherMasked:
3702 case Op_StoreVectorScatterMasked:
3703 return true;
3704
3705 case Op_UMinV:
3706 case Op_UMaxV:
3707 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3708 return false;
3709 } // fallthrough
3710 case Op_MaxV:
3711 case Op_MinV:
3712 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3713 return false; // Implementation limitation
3714 }
3715 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719 case Op_SaturatingAddV:
3720 case Op_SaturatingSubV:
3721 if (!is_subword_type(bt)) {
3722 return false;
3723 }
3724 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3725 return false; // Implementation limitation
3726 }
3727 return true;
3728
3729 case Op_VectorMaskCmp:
3730 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3731 return false; // Implementation limitation
3732 }
3733 return true;
3734
3735 case Op_VectorRearrange:
3736 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3737 return false; // Implementation limitation
3738 }
3739 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3740 return false; // Implementation limitation
3741 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 // Binary Logical operations
3747 case Op_AndVMask:
3748 case Op_OrVMask:
3749 case Op_XorVMask:
3750 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3751 return false; // Implementation limitation
3752 }
3753 return true;
3754
3755 case Op_PopCountVI:
3756 case Op_PopCountVL:
3757 if (!is_pop_count_instr_target(bt)) {
3758 return false;
3759 }
3760 return true;
3761
3762 case Op_MaskAll:
3763 return true;
3764
3765 case Op_CountLeadingZerosV:
3766 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3767 return true;
3768 }
3769 default:
3770 return false;
3771 }
3772 }
3773
3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3775 return false;
3776 }
3777
3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3780 switch (elem_bt) {
3781 case T_BYTE: return false;
3782 case T_SHORT: return !VM_Version::supports_avx512bw();
3783 case T_INT: return !VM_Version::supports_avx();
3784 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3785 default:
3786 ShouldNotReachHere();
3787 return false;
3788 }
3789 }
3790
3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3792 // Prefer predicate if the mask type is "TypePVectMask".
3793 return vt->isa_pvectmask() != nullptr;
3794 }
3795
3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3797 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3798 bool legacy = (generic_opnd->opcode() == LEGVEC);
3799 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3800 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3801 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3802 return new legVecZOper();
3803 }
3804 if (legacy) {
3805 switch (ideal_reg) {
3806 case Op_VecS: return new legVecSOper();
3807 case Op_VecD: return new legVecDOper();
3808 case Op_VecX: return new legVecXOper();
3809 case Op_VecY: return new legVecYOper();
3810 case Op_VecZ: return new legVecZOper();
3811 }
3812 } else {
3813 switch (ideal_reg) {
3814 case Op_VecS: return new vecSOper();
3815 case Op_VecD: return new vecDOper();
3816 case Op_VecX: return new vecXOper();
3817 case Op_VecY: return new vecYOper();
3818 case Op_VecZ: return new vecZOper();
3819 }
3820 }
3821 ShouldNotReachHere();
3822 return nullptr;
3823 }
3824
3825 bool Matcher::is_reg2reg_move(MachNode* m) {
3826 switch (m->rule()) {
3827 case MoveVec2Leg_rule:
3828 case MoveLeg2Vec_rule:
3829 case MoveF2VL_rule:
3830 case MoveF2LEG_rule:
3831 case MoveVL2F_rule:
3832 case MoveLEG2F_rule:
3833 case MoveD2VL_rule:
3834 case MoveD2LEG_rule:
3835 case MoveVL2D_rule:
3836 case MoveLEG2D_rule:
3837 return true;
3838 default:
3839 return false;
3840 }
3841 }
3842
3843 bool Matcher::is_generic_vector(MachOper* opnd) {
3844 switch (opnd->opcode()) {
3845 case VEC:
3846 case LEGVEC:
3847 return true;
3848 default:
3849 return false;
3850 }
3851 }
3852
3853 //------------------------------------------------------------------------
3854
3855 const RegMask* Matcher::predicate_reg_mask(void) {
3856 return &_VECTMASK_REG_mask;
3857 }
3858
3859 // Max vector size in bytes. 0 if not supported.
3860 int Matcher::vector_width_in_bytes(BasicType bt) {
3861 assert(is_java_primitive(bt), "only primitive type vectors");
3862 // SSE2 supports 128bit vectors for all types.
3863 // AVX2 supports 256bit vectors for all types.
3864 // AVX2/EVEX supports 512bit vectors for all types.
3865 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3866 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3867 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3868 size = (UseAVX > 2) ? 64 : 32;
3869 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3870 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3871 // Use flag to limit vector size.
3872 size = MIN2(size,(int)MaxVectorSize);
3873 // Minimum 2 values in vector (or 4 for bytes).
3874 switch (bt) {
3875 case T_DOUBLE:
3876 case T_LONG:
3877 if (size < 16) return 0;
3878 break;
3879 case T_FLOAT:
3880 case T_INT:
3881 if (size < 8) return 0;
3882 break;
3883 case T_BOOLEAN:
3884 if (size < 4) return 0;
3885 break;
3886 case T_CHAR:
3887 if (size < 4) return 0;
3888 break;
3889 case T_BYTE:
3890 if (size < 4) return 0;
3891 break;
3892 case T_SHORT:
3893 if (size < 4) return 0;
3894 break;
3895 default:
3896 ShouldNotReachHere();
3897 }
3898 return size;
3899 }
3900
3901 // Limits on vector size (number of elements) loaded into vector.
3902 int Matcher::max_vector_size(const BasicType bt) {
3903 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3904 }
3905 int Matcher::min_vector_size(const BasicType bt) {
3906 int max_size = max_vector_size(bt);
3907 // Min size which can be loaded into vector is 4 bytes.
3908 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3909 // Support for calling svml double64 vectors
3910 if (bt == T_DOUBLE) {
3911 size = 1;
3912 }
3913 return MIN2(size,max_size);
3914 }
3915
3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3917 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3918 // by default on Cascade Lake
3919 if (VM_Version::is_default_intel_cascade_lake()) {
3920 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3921 }
3922 return Matcher::max_vector_size(bt);
3923 }
3924
3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3926 return -1;
3927 }
3928
3929 // Vector ideal reg corresponding to specified size in bytes
3930 uint Matcher::vector_ideal_reg(int size) {
3931 assert(MaxVectorSize >= size, "");
3932 switch(size) {
3933 case 4: return Op_VecS;
3934 case 8: return Op_VecD;
3935 case 16: return Op_VecX;
3936 case 32: return Op_VecY;
3937 case 64: return Op_VecZ;
3938 }
3939 ShouldNotReachHere();
3940 return 0;
3941 }
3942
3943 // Check for shift by small constant as well
3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3945 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3946 shift->in(2)->get_int() <= 3 &&
3947 // Are there other uses besides address expressions?
3948 !matcher->is_visited(shift)) {
3949 address_visited.set(shift->_idx); // Flag as address_visited
3950 mstack.push(shift->in(2), Matcher::Visit);
3951 Node *conv = shift->in(1);
3952 // Allow Matcher to match the rule which bypass
3953 // ConvI2L operation for an array index on LP64
3954 // if the index value is positive.
3955 if (conv->Opcode() == Op_ConvI2L &&
3956 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3957 // Are there other uses besides address expressions?
3958 !matcher->is_visited(conv)) {
3959 address_visited.set(conv->_idx); // Flag as address_visited
3960 mstack.push(conv->in(1), Matcher::Pre_Visit);
3961 } else {
3962 mstack.push(conv, Matcher::Pre_Visit);
3963 }
3964 return true;
3965 }
3966 return false;
3967 }
3968
3969 // This function identifies sub-graphs in which a 'load' node is
3970 // input to two different nodes, and such that it can be matched
3971 // with BMI instructions like blsi, blsr, etc.
3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3974 // refers to the same node.
3975 //
3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3977 // This is a temporary solution until we make DAGs expressible in ADL.
3978 template<typename ConType>
3979 class FusedPatternMatcher {
3980 Node* _op1_node;
3981 Node* _mop_node;
3982 int _con_op;
3983
3984 static int match_next(Node* n, int next_op, int next_op_idx) {
3985 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3986 return -1;
3987 }
3988
3989 if (next_op_idx == -1) { // n is commutative, try rotations
3990 if (n->in(1)->Opcode() == next_op) {
3991 return 1;
3992 } else if (n->in(2)->Opcode() == next_op) {
3993 return 2;
3994 }
3995 } else {
3996 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3997 if (n->in(next_op_idx)->Opcode() == next_op) {
3998 return next_op_idx;
3999 }
4000 }
4001 return -1;
4002 }
4003
4004 public:
4005 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4006 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4007
4008 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4009 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4010 typename ConType::NativeType con_value) {
4011 if (_op1_node->Opcode() != op1) {
4012 return false;
4013 }
4014 if (_mop_node->outcnt() > 2) {
4015 return false;
4016 }
4017 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4018 if (op1_op2_idx == -1) {
4019 return false;
4020 }
4021 // Memory operation must be the other edge
4022 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4023
4024 // Check that the mop node is really what we want
4025 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4026 Node* op2_node = _op1_node->in(op1_op2_idx);
4027 if (op2_node->outcnt() > 1) {
4028 return false;
4029 }
4030 assert(op2_node->Opcode() == op2, "Should be");
4031 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4032 if (op2_con_idx == -1) {
4033 return false;
4034 }
4035 // Memory operation must be the other edge
4036 int op2_mop_idx = (op2_con_idx & 1) + 1;
4037 // Check that the memory operation is the same node
4038 if (op2_node->in(op2_mop_idx) == _mop_node) {
4039 // Now check the constant
4040 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4041 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4042 return true;
4043 }
4044 }
4045 }
4046 return false;
4047 }
4048 };
4049
4050 static bool is_bmi_pattern(Node* n, Node* m) {
4051 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4052 if (n != nullptr && m != nullptr) {
4053 if (m->Opcode() == Op_LoadI) {
4054 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4055 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4056 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4057 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4058 } else if (m->Opcode() == Op_LoadL) {
4059 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4060 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4061 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4062 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4063 }
4064 }
4065 return false;
4066 }
4067
4068 // Should the matcher clone input 'm' of node 'n'?
4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4070 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4071 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4072 mstack.push(m, Visit);
4073 return true;
4074 }
4075 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4076 mstack.push(m, Visit); // m = ShiftCntV
4077 return true;
4078 }
4079 if (is_encode_and_store_pattern(n, m)) {
4080 mstack.push(m, Visit);
4081 return true;
4082 }
4083 return false;
4084 }
4085
4086 // Should the Matcher clone shifts on addressing modes, expecting them
4087 // to be subsumed into complex addressing expressions or compute them
4088 // into registers?
4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4090 Node *off = m->in(AddPNode::Offset);
4091 if (off->is_Con()) {
4092 address_visited.test_set(m->_idx); // Flag as address_visited
4093 Node *adr = m->in(AddPNode::Address);
4094
4095 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4096 // AtomicAdd is not an addressing expression.
4097 // Cheap to find it by looking for screwy base.
4098 if (adr->is_AddP() &&
4099 !adr->in(AddPNode::Base)->is_top() &&
4100 !adr->in(AddPNode::Offset)->is_Con() &&
4101 off->get_long() == (int) (off->get_long()) && // immL32
4102 // Are there other uses besides address expressions?
4103 !is_visited(adr)) {
4104 address_visited.set(adr->_idx); // Flag as address_visited
4105 Node *shift = adr->in(AddPNode::Offset);
4106 if (!clone_shift(shift, this, mstack, address_visited)) {
4107 mstack.push(shift, Pre_Visit);
4108 }
4109 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4110 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4111 } else {
4112 mstack.push(adr, Pre_Visit);
4113 }
4114
4115 // Clone X+offset as it also folds into most addressing expressions
4116 mstack.push(off, Visit);
4117 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4118 return true;
4119 } else if (clone_shift(off, this, mstack, address_visited)) {
4120 address_visited.test_set(m->_idx); // Flag as address_visited
4121 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4122 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4123 return true;
4124 }
4125 return false;
4126 }
4127
4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4129 switch (bt) {
4130 case BoolTest::eq:
4131 return Assembler::eq;
4132 case BoolTest::ne:
4133 return Assembler::neq;
4134 case BoolTest::le:
4135 case BoolTest::ule:
4136 return Assembler::le;
4137 case BoolTest::ge:
4138 case BoolTest::uge:
4139 return Assembler::nlt;
4140 case BoolTest::lt:
4141 case BoolTest::ult:
4142 return Assembler::lt;
4143 case BoolTest::gt:
4144 case BoolTest::ugt:
4145 return Assembler::nle;
4146 default : ShouldNotReachHere(); return Assembler::_false;
4147 }
4148 }
4149
4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4151 switch (bt) {
4152 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4153 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4154 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4155 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4156 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4157 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4158 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4159 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4160 }
4161 }
4162
4163 // Helper methods for MachSpillCopyNode::implementation().
4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4165 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4166 assert(ireg == Op_VecS || // 32bit vector
4167 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4168 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4169 "no non-adjacent vector moves" );
4170 if (masm) {
4171 switch (ireg) {
4172 case Op_VecS: // copy whole register
4173 case Op_VecD:
4174 case Op_VecX:
4175 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4176 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4177 } else {
4178 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4179 }
4180 break;
4181 case Op_VecY:
4182 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4183 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4184 } else {
4185 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4186 }
4187 break;
4188 case Op_VecZ:
4189 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4190 break;
4191 default:
4192 ShouldNotReachHere();
4193 }
4194 #ifndef PRODUCT
4195 } else {
4196 switch (ireg) {
4197 case Op_VecS:
4198 case Op_VecD:
4199 case Op_VecX:
4200 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4201 break;
4202 case Op_VecY:
4203 case Op_VecZ:
4204 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4205 break;
4206 default:
4207 ShouldNotReachHere();
4208 }
4209 #endif
4210 }
4211 }
4212
4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4214 int stack_offset, int reg, uint ireg, outputStream* st) {
4215 if (masm) {
4216 if (is_load) {
4217 switch (ireg) {
4218 case Op_VecS:
4219 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecD:
4222 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4223 break;
4224 case Op_VecX:
4225 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4226 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4227 } else {
4228 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4229 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4230 }
4231 break;
4232 case Op_VecY:
4233 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4234 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4235 } else {
4236 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4237 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4238 }
4239 break;
4240 case Op_VecZ:
4241 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4242 break;
4243 default:
4244 ShouldNotReachHere();
4245 }
4246 } else { // store
4247 switch (ireg) {
4248 case Op_VecS:
4249 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecD:
4252 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4253 break;
4254 case Op_VecX:
4255 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4256 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4257 }
4258 else {
4259 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4260 }
4261 break;
4262 case Op_VecY:
4263 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4264 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4265 }
4266 else {
4267 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4268 }
4269 break;
4270 case Op_VecZ:
4271 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4272 break;
4273 default:
4274 ShouldNotReachHere();
4275 }
4276 }
4277 #ifndef PRODUCT
4278 } else {
4279 if (is_load) {
4280 switch (ireg) {
4281 case Op_VecS:
4282 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecD:
4285 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecX:
4288 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4289 break;
4290 case Op_VecY:
4291 case Op_VecZ:
4292 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4293 break;
4294 default:
4295 ShouldNotReachHere();
4296 }
4297 } else { // store
4298 switch (ireg) {
4299 case Op_VecS:
4300 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecD:
4303 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecX:
4306 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4307 break;
4308 case Op_VecY:
4309 case Op_VecZ:
4310 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4311 break;
4312 default:
4313 ShouldNotReachHere();
4314 }
4315 }
4316 #endif
4317 }
4318 }
4319
4320 template <class T>
4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4322 int size = type2aelembytes(bt) * len;
4323 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4324 for (int i = 0; i < len; i++) {
4325 int offset = i * type2aelembytes(bt);
4326 switch (bt) {
4327 case T_BYTE: val->at(i) = con; break;
4328 case T_SHORT: {
4329 jshort c = con;
4330 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4331 break;
4332 }
4333 case T_INT: {
4334 jint c = con;
4335 memcpy(val->adr_at(offset), &c, sizeof(jint));
4336 break;
4337 }
4338 case T_LONG: {
4339 jlong c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4341 break;
4342 }
4343 case T_FLOAT: {
4344 jfloat c = con;
4345 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4346 break;
4347 }
4348 case T_DOUBLE: {
4349 jdouble c = con;
4350 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4351 break;
4352 }
4353 default: assert(false, "%s", type2name(bt));
4354 }
4355 }
4356 return val;
4357 }
4358
4359 static inline jlong high_bit_set(BasicType bt) {
4360 switch (bt) {
4361 case T_BYTE: return 0x8080808080808080;
4362 case T_SHORT: return 0x8000800080008000;
4363 case T_INT: return 0x8000000080000000;
4364 case T_LONG: return 0x8000000000000000;
4365 default:
4366 ShouldNotReachHere();
4367 return 0;
4368 }
4369 }
4370
4371 #ifndef PRODUCT
4372 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4373 st->print("nop \t# %d bytes pad for loops and calls", _count);
4374 }
4375 #endif
4376
4377 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4378 __ nop(_count);
4379 }
4380
4381 uint MachNopNode::size(PhaseRegAlloc*) const {
4382 return _count;
4383 }
4384
4385 #ifndef PRODUCT
4386 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4387 st->print("# breakpoint");
4388 }
4389 #endif
4390
4391 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4392 __ int3();
4393 }
4394
4395 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4396 return MachNode::size(ra_);
4397 }
4398
4399 %}
4400
4401 //----------ENCODING BLOCK-----------------------------------------------------
4402 // This block specifies the encoding classes used by the compiler to
4403 // output byte streams. Encoding classes are parameterized macros
4404 // used by Machine Instruction Nodes in order to generate the bit
4405 // encoding of the instruction. Operands specify their base encoding
4406 // interface with the interface keyword. There are currently
4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4408 // COND_INTER. REG_INTER causes an operand to generate a function
4409 // which returns its register number when queried. CONST_INTER causes
4410 // an operand to generate a function which returns the value of the
4411 // constant when queried. MEMORY_INTER causes an operand to generate
4412 // four functions which return the Base Register, the Index Register,
4413 // the Scale Value, and the Offset Value of the operand when queried.
4414 // COND_INTER causes an operand to generate six functions which return
4415 // the encoding code (ie - encoding bits for the instruction)
4416 // associated with each basic boolean condition for a conditional
4417 // instruction.
4418 //
4419 // Instructions specify two basic values for encoding. Again, a
4420 // function is available to check if the constant displacement is an
4421 // oop. They use the ins_encode keyword to specify their encoding
4422 // classes (which must be a sequence of enc_class names, and their
4423 // parameters, specified in the encoding block), and they use the
4424 // opcode keyword to specify, in order, their primary, secondary, and
4425 // tertiary opcode. Only the opcode sections which a particular
4426 // instruction needs for encoding need to be specified.
4427 encode %{
4428 enc_class cdql_enc(no_rax_rdx_RegI div)
4429 %{
4430 // Full implementation of Java idiv and irem; checks for
4431 // special case as described in JVM spec., p.243 & p.271.
4432 //
4433 // normal case special case
4434 //
4435 // input : rax: dividend min_int
4436 // reg: divisor -1
4437 //
4438 // output: rax: quotient (= rax idiv reg) min_int
4439 // rdx: remainder (= rax irem reg) 0
4440 //
4441 // Code sequnce:
4442 //
4443 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4444 // 5: 75 07/08 jne e <normal>
4445 // 7: 33 d2 xor %edx,%edx
4446 // [div >= 8 -> offset + 1]
4447 // [REX_B]
4448 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4449 // c: 74 03/04 je 11 <done>
4450 // 000000000000000e <normal>:
4451 // e: 99 cltd
4452 // [div >= 8 -> offset + 1]
4453 // [REX_B]
4454 // f: f7 f9 idiv $div
4455 // 0000000000000011 <done>:
4456 Label normal;
4457 Label done;
4458
4459 // cmp $0x80000000,%eax
4460 __ cmpl(as_Register(RAX_enc), 0x80000000);
4461
4462 // jne e <normal>
4463 __ jccb(Assembler::notEqual, normal);
4464
4465 // xor %edx,%edx
4466 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4467
4468 // cmp $0xffffffffffffffff,%ecx
4469 __ cmpl($div$$Register, -1);
4470
4471 // je 11 <done>
4472 __ jccb(Assembler::equal, done);
4473
4474 // <normal>
4475 // cltd
4476 __ bind(normal);
4477 __ cdql();
4478
4479 // idivl
4480 // <done>
4481 __ idivl($div$$Register);
4482 __ bind(done);
4483 %}
4484
4485 enc_class cdqq_enc(no_rax_rdx_RegL div)
4486 %{
4487 // Full implementation of Java ldiv and lrem; checks for
4488 // special case as described in JVM spec., p.243 & p.271.
4489 //
4490 // normal case special case
4491 //
4492 // input : rax: dividend min_long
4493 // reg: divisor -1
4494 //
4495 // output: rax: quotient (= rax idiv reg) min_long
4496 // rdx: remainder (= rax irem reg) 0
4497 //
4498 // Code sequnce:
4499 //
4500 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4501 // 7: 00 00 80
4502 // a: 48 39 d0 cmp %rdx,%rax
4503 // d: 75 08 jne 17 <normal>
4504 // f: 33 d2 xor %edx,%edx
4505 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4506 // 15: 74 05 je 1c <done>
4507 // 0000000000000017 <normal>:
4508 // 17: 48 99 cqto
4509 // 19: 48 f7 f9 idiv $div
4510 // 000000000000001c <done>:
4511 Label normal;
4512 Label done;
4513
4514 // mov $0x8000000000000000,%rdx
4515 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4516
4517 // cmp %rdx,%rax
4518 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4519
4520 // jne 17 <normal>
4521 __ jccb(Assembler::notEqual, normal);
4522
4523 // xor %edx,%edx
4524 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4525
4526 // cmp $0xffffffffffffffff,$div
4527 __ cmpq($div$$Register, -1);
4528
4529 // je 1e <done>
4530 __ jccb(Assembler::equal, done);
4531
4532 // <normal>
4533 // cqto
4534 __ bind(normal);
4535 __ cdqq();
4536
4537 // idivq (note: must be emitted by the user of this rule)
4538 // <done>
4539 __ idivq($div$$Register);
4540 __ bind(done);
4541 %}
4542
4543 enc_class clear_avx %{
4544 DEBUG_ONLY(int off0 = __ offset());
4545 if (generate_vzeroupper(Compile::current())) {
4546 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4547 // Clear upper bits of YMM registers when current compiled code uses
4548 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4549 __ vzeroupper();
4550 }
4551 DEBUG_ONLY(int off1 = __ offset());
4552 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4553 %}
4554
4555 enc_class Java_To_Runtime(method meth) %{
4556 __ lea(r10, RuntimeAddress((address)$meth$$method));
4557 __ call(r10);
4558 __ post_call_nop();
4559 %}
4560
4561 enc_class Java_Static_Call(method meth)
4562 %{
4563 // JAVA STATIC CALL
4564 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4565 // determine who we intended to call.
4566 if (!_method) {
4567 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4568 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4569 // The NOP here is purely to ensure that eliding a call to
4570 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4571 __ nop(5);
4572 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4573 } else {
4574 int method_index = resolved_method_index(masm);
4575 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4576 : static_call_Relocation::spec(method_index);
4577 address mark = __ pc();
4578 int call_offset = __ offset();
4579 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4580 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4581 // Calls of the same statically bound method can share
4582 // a stub to the interpreter.
4583 __ code()->shared_stub_to_interp_for(_method, call_offset);
4584 } else {
4585 // Emit stubs for static call.
4586 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4587 __ clear_inst_mark();
4588 if (stub == nullptr) {
4589 ciEnv::current()->record_failure("CodeCache is full");
4590 return;
4591 }
4592 }
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
4633 // | SP-+--------+----> Matcher::_old_SP, even aligned
4634 // | | in | 3 area for Intel ret address
4635 // Owned by |preserve| Empty on Sparc.
4636 // SELF +--------+
4637 // | | pad2 | 2 pad to align old SP
4638 // | +--------+ 1
4639 // | | locks | 0
4640 // | +--------+----> OptoReg::stack0(), even aligned
4641 // | | pad1 | 11 pad to align new SP
4642 // | +--------+
4643 // | | | 10
4644 // | | spills | 9 spills
4645 // V | | 8 (pad0 slot for callee)
4646 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4647 // ^ | out | 7
4648 // | | args | 6 Holes in outgoing args owned by CALLEE
4649 // Owned by +--------+
4650 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4651 // | new |preserve| Must be even-aligned.
4652 // | SP-+--------+----> Matcher::_new_SP, even aligned
4653 // | | |
4654 //
4655 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4656 // known from SELF's arguments and the Java calling convention.
4657 // Region 6-7 is determined per call site.
4658 // Note 2: If the calling convention leaves holes in the incoming argument
4659 // area, those holes are owned by SELF. Holes in the outgoing area
4660 // are owned by the CALLEE. Holes should not be necessary in the
4661 // incoming area, as the Java calling convention is completely under
4662 // the control of the AD file. Doubles can be sorted and packed to
4663 // avoid holes. Holes in the outgoing arguments may be necessary for
4664 // varargs C calling conventions.
4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4666 // even aligned with pad0 as needed.
4667 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4668 // region 6-11 is even aligned; it may be padded out more so that
4669 // the region from SP to FP meets the minimum stack alignment.
4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4671 // alignment. Region 11, pad1, may be dynamically extended so that
4672 // SP meets the minimum alignment.
4673
4674 frame
4675 %{
4676 // These three registers define part of the calling convention
4677 // between compiled code and the interpreter.
4678 inline_cache_reg(RAX); // Inline Cache Register
4679
4680 // Optional: name the operand used by cisc-spilling to access
4681 // [stack_pointer + offset]
4682 cisc_spilling_operand_name(indOffset32);
4683
4684 // Number of stack slots consumed by locking an object
4685 sync_stack_slots(2);
4686
4687 // Compiled code's Frame Pointer
4688 frame_pointer(RSP);
4689
4690 // Stack alignment requirement
4691 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4692
4693 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4694 // for calls to C. Supports the var-args backing area for register parms.
4695 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4696
4697 // The after-PROLOG location of the return address. Location of
4698 // return address specifies a type (REG or STACK) and a number
4699 // representing the register number (i.e. - use a register name) or
4700 // stack slot.
4701 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4702 // Otherwise, it is above the locks and verification slot and alignment word
4703 return_addr(STACK - 2 +
4704 align_up((Compile::current()->in_preserve_stack_slots() +
4705 Compile::current()->fixed_slots()),
4706 stack_alignment_in_slots()));
4707
4708 // Location of compiled Java return values. Same as C for now.
4709 return_value
4710 %{
4711 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4712 "only return normal values");
4713
4714 static const int lo[Op_RegL + 1] = {
4715 0,
4716 0,
4717 RAX_num, // Op_RegN
4718 RAX_num, // Op_RegI
4719 RAX_num, // Op_RegP
4720 XMM0_num, // Op_RegF
4721 XMM0_num, // Op_RegD
4722 RAX_num // Op_RegL
4723 };
4724 static const int hi[Op_RegL + 1] = {
4725 0,
4726 0,
4727 OptoReg::Bad, // Op_RegN
4728 OptoReg::Bad, // Op_RegI
4729 RAX_H_num, // Op_RegP
4730 OptoReg::Bad, // Op_RegF
4731 XMM0b_num, // Op_RegD
4732 RAX_H_num // Op_RegL
4733 };
4734 // Excluded flags and vector registers.
4735 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4736 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4737 %}
4738 %}
4739
4740 //----------ATTRIBUTES---------------------------------------------------------
4741 //----------Operand Attributes-------------------------------------------------
4742 op_attrib op_cost(0); // Required cost attribute
4743
4744 //----------Instruction Attributes---------------------------------------------
4745 ins_attrib ins_cost(100); // Required cost attribute
4746 ins_attrib ins_size(8); // Required size attribute (in bits)
4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4748 // a non-matching short branch variant
4749 // of some long branch?
4750 ins_attrib ins_alignment(1); // Required alignment attribute (must
4751 // be a power of 2) specifies the
4752 // alignment that some part of the
4753 // instruction (not necessarily the
4754 // start) requires. If > 1, a
4755 // compute_padding() function must be
4756 // provided for the instruction
4757
4758 // Whether this node is expanded during code emission into a sequence of
4759 // instructions and the first instruction can perform an implicit null check.
4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4761
4762 //----------OPERANDS-----------------------------------------------------------
4763 // Operand definitions must precede instruction definitions for correct parsing
4764 // in the ADLC because operands constitute user defined types which are used in
4765 // instruction definitions.
4766
4767 //----------Simple Operands----------------------------------------------------
4768 // Immediate Operands
4769 // Integer Immediate
4770 operand immI()
4771 %{
4772 match(ConI);
4773
4774 op_cost(10);
4775 format %{ %}
4776 interface(CONST_INTER);
4777 %}
4778
4779 // Constant for test vs zero
4780 operand immI_0()
4781 %{
4782 predicate(n->get_int() == 0);
4783 match(ConI);
4784
4785 op_cost(0);
4786 format %{ %}
4787 interface(CONST_INTER);
4788 %}
4789
4790 // Constant for increment
4791 operand immI_1()
4792 %{
4793 predicate(n->get_int() == 1);
4794 match(ConI);
4795
4796 op_cost(0);
4797 format %{ %}
4798 interface(CONST_INTER);
4799 %}
4800
4801 // Constant for decrement
4802 operand immI_M1()
4803 %{
4804 predicate(n->get_int() == -1);
4805 match(ConI);
4806
4807 op_cost(0);
4808 format %{ %}
4809 interface(CONST_INTER);
4810 %}
4811
4812 operand immI_2()
4813 %{
4814 predicate(n->get_int() == 2);
4815 match(ConI);
4816
4817 op_cost(0);
4818 format %{ %}
4819 interface(CONST_INTER);
4820 %}
4821
4822 operand immI_4()
4823 %{
4824 predicate(n->get_int() == 4);
4825 match(ConI);
4826
4827 op_cost(0);
4828 format %{ %}
4829 interface(CONST_INTER);
4830 %}
4831
4832 operand immI_8()
4833 %{
4834 predicate(n->get_int() == 8);
4835 match(ConI);
4836
4837 op_cost(0);
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 // Valid scale values for addressing modes
4843 operand immI2()
4844 %{
4845 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4846 match(ConI);
4847
4848 format %{ %}
4849 interface(CONST_INTER);
4850 %}
4851
4852 operand immU7()
4853 %{
4854 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4855 match(ConI);
4856
4857 op_cost(5);
4858 format %{ %}
4859 interface(CONST_INTER);
4860 %}
4861
4862 operand immI8()
4863 %{
4864 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4865 match(ConI);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 operand immU8()
4873 %{
4874 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4875 match(ConI);
4876
4877 op_cost(5);
4878 format %{ %}
4879 interface(CONST_INTER);
4880 %}
4881
4882 operand immI16()
4883 %{
4884 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4885 match(ConI);
4886
4887 op_cost(10);
4888 format %{ %}
4889 interface(CONST_INTER);
4890 %}
4891
4892 // Int Immediate non-negative
4893 operand immU31()
4894 %{
4895 predicate(n->get_int() >= 0);
4896 match(ConI);
4897
4898 op_cost(0);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 // Pointer Immediate
4904 operand immP()
4905 %{
4906 match(ConP);
4907
4908 op_cost(10);
4909 format %{ %}
4910 interface(CONST_INTER);
4911 %}
4912
4913 // Null Pointer Immediate
4914 operand immP0()
4915 %{
4916 predicate(n->get_ptr() == 0);
4917 match(ConP);
4918
4919 op_cost(5);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 // Pointer Immediate
4925 operand immN() %{
4926 match(ConN);
4927
4928 op_cost(10);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 operand immNKlass() %{
4934 match(ConNKlass);
4935
4936 op_cost(10);
4937 format %{ %}
4938 interface(CONST_INTER);
4939 %}
4940
4941 // Null Pointer Immediate
4942 operand immN0() %{
4943 predicate(n->get_narrowcon() == 0);
4944 match(ConN);
4945
4946 op_cost(5);
4947 format %{ %}
4948 interface(CONST_INTER);
4949 %}
4950
4951 operand immP31()
4952 %{
4953 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4954 && (n->get_ptr() >> 31) == 0);
4955 match(ConP);
4956
4957 op_cost(5);
4958 format %{ %}
4959 interface(CONST_INTER);
4960 %}
4961
4962
4963 // Long Immediate
4964 operand immL()
4965 %{
4966 match(ConL);
4967
4968 op_cost(20);
4969 format %{ %}
4970 interface(CONST_INTER);
4971 %}
4972
4973 // Long Immediate 8-bit
4974 operand immL8()
4975 %{
4976 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4977 match(ConL);
4978
4979 op_cost(5);
4980 format %{ %}
4981 interface(CONST_INTER);
4982 %}
4983
4984 // Long Immediate 32-bit unsigned
4985 operand immUL32()
4986 %{
4987 predicate(n->get_long() == (unsigned int) (n->get_long()));
4988 match(ConL);
4989
4990 op_cost(10);
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 // Long Immediate 32-bit signed
4996 operand immL32()
4997 %{
4998 predicate(n->get_long() == (int) (n->get_long()));
4999 match(ConL);
5000
5001 op_cost(15);
5002 format %{ %}
5003 interface(CONST_INTER);
5004 %}
5005
5006 operand immL_Pow2()
5007 %{
5008 predicate(is_power_of_2((julong)n->get_long()));
5009 match(ConL);
5010
5011 op_cost(15);
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 operand immL_NotPow2()
5017 %{
5018 predicate(is_power_of_2((julong)~n->get_long()));
5019 match(ConL);
5020
5021 op_cost(15);
5022 format %{ %}
5023 interface(CONST_INTER);
5024 %}
5025
5026 // Long Immediate zero
5027 operand immL0()
5028 %{
5029 predicate(n->get_long() == 0L);
5030 match(ConL);
5031
5032 op_cost(10);
5033 format %{ %}
5034 interface(CONST_INTER);
5035 %}
5036
5037 // Constant for increment
5038 operand immL1()
5039 %{
5040 predicate(n->get_long() == 1);
5041 match(ConL);
5042
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 // Constant for decrement
5048 operand immL_M1()
5049 %{
5050 predicate(n->get_long() == -1);
5051 match(ConL);
5052
5053 format %{ %}
5054 interface(CONST_INTER);
5055 %}
5056
5057 // Long Immediate: low 32-bit mask
5058 operand immL_32bits()
5059 %{
5060 predicate(n->get_long() == 0xFFFFFFFFL);
5061 match(ConL);
5062 op_cost(20);
5063
5064 format %{ %}
5065 interface(CONST_INTER);
5066 %}
5067
5068 // Int Immediate: 2^n-1, positive
5069 operand immI_Pow2M1()
5070 %{
5071 predicate((n->get_int() > 0)
5072 && is_power_of_2((juint)n->get_int() + 1));
5073 match(ConI);
5074
5075 op_cost(20);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Float Immediate zero
5081 operand immF0()
5082 %{
5083 predicate(jint_cast(n->getf()) == 0);
5084 match(ConF);
5085
5086 op_cost(5);
5087 format %{ %}
5088 interface(CONST_INTER);
5089 %}
5090
5091 // Float Immediate
5092 operand immF()
5093 %{
5094 match(ConF);
5095
5096 op_cost(15);
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Half Float Immediate
5102 operand immH()
5103 %{
5104 match(ConH);
5105
5106 op_cost(15);
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Double Immediate zero
5112 operand immD0()
5113 %{
5114 predicate(jlong_cast(n->getd()) == 0);
5115 match(ConD);
5116
5117 op_cost(5);
5118 format %{ %}
5119 interface(CONST_INTER);
5120 %}
5121
5122 // Double Immediate
5123 operand immD()
5124 %{
5125 match(ConD);
5126
5127 op_cost(15);
5128 format %{ %}
5129 interface(CONST_INTER);
5130 %}
5131
5132 // Immediates for special shifts (sign extend)
5133
5134 // Constants for increment
5135 operand immI_16()
5136 %{
5137 predicate(n->get_int() == 16);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 operand immI_24()
5145 %{
5146 predicate(n->get_int() == 24);
5147 match(ConI);
5148
5149 format %{ %}
5150 interface(CONST_INTER);
5151 %}
5152
5153 // Constant for byte-wide masking
5154 operand immI_255()
5155 %{
5156 predicate(n->get_int() == 255);
5157 match(ConI);
5158
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Constant for short-wide masking
5164 operand immI_65535()
5165 %{
5166 predicate(n->get_int() == 65535);
5167 match(ConI);
5168
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Constant for byte-wide masking
5174 operand immL_255()
5175 %{
5176 predicate(n->get_long() == 255);
5177 match(ConL);
5178
5179 format %{ %}
5180 interface(CONST_INTER);
5181 %}
5182
5183 // Constant for short-wide masking
5184 operand immL_65535()
5185 %{
5186 predicate(n->get_long() == 65535);
5187 match(ConL);
5188
5189 format %{ %}
5190 interface(CONST_INTER);
5191 %}
5192
5193 // AOT Runtime Constants Address
5194 operand immAOTRuntimeConstantsAddress()
5195 %{
5196 // Check if the address is in the range of AOT Runtime Constants
5197 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5198 match(ConP);
5199
5200 op_cost(0);
5201 format %{ %}
5202 interface(CONST_INTER);
5203 %}
5204
5205 operand kReg()
5206 %{
5207 constraint(ALLOC_IN_RC(vectmask_reg));
5208 match(RegVectMask);
5209 format %{%}
5210 interface(REG_INTER);
5211 %}
5212
5213 // Register Operands
5214 // Integer Register
5215 operand rRegI()
5216 %{
5217 constraint(ALLOC_IN_RC(int_reg));
5218 match(RegI);
5219
5220 match(rax_RegI);
5221 match(rbx_RegI);
5222 match(rcx_RegI);
5223 match(rdx_RegI);
5224 match(rdi_RegI);
5225
5226 format %{ %}
5227 interface(REG_INTER);
5228 %}
5229
5230 // Special Registers
5231 operand rax_RegI()
5232 %{
5233 constraint(ALLOC_IN_RC(int_rax_reg));
5234 match(RegI);
5235 match(rRegI);
5236
5237 format %{ "RAX" %}
5238 interface(REG_INTER);
5239 %}
5240
5241 // Special Registers
5242 operand rbx_RegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_rbx_reg));
5245 match(RegI);
5246 match(rRegI);
5247
5248 format %{ "RBX" %}
5249 interface(REG_INTER);
5250 %}
5251
5252 operand rcx_RegI()
5253 %{
5254 constraint(ALLOC_IN_RC(int_rcx_reg));
5255 match(RegI);
5256 match(rRegI);
5257
5258 format %{ "RCX" %}
5259 interface(REG_INTER);
5260 %}
5261
5262 operand rdx_RegI()
5263 %{
5264 constraint(ALLOC_IN_RC(int_rdx_reg));
5265 match(RegI);
5266 match(rRegI);
5267
5268 format %{ "RDX" %}
5269 interface(REG_INTER);
5270 %}
5271
5272 operand rdi_RegI()
5273 %{
5274 constraint(ALLOC_IN_RC(int_rdi_reg));
5275 match(RegI);
5276 match(rRegI);
5277
5278 format %{ "RDI" %}
5279 interface(REG_INTER);
5280 %}
5281
5282 operand no_rax_rdx_RegI()
5283 %{
5284 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5285 match(RegI);
5286 match(rbx_RegI);
5287 match(rcx_RegI);
5288 match(rdi_RegI);
5289
5290 format %{ %}
5291 interface(REG_INTER);
5292 %}
5293
5294 operand no_rbp_r13_RegI()
5295 %{
5296 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5297 match(RegI);
5298 match(rRegI);
5299 match(rax_RegI);
5300 match(rbx_RegI);
5301 match(rcx_RegI);
5302 match(rdx_RegI);
5303 match(rdi_RegI);
5304
5305 format %{ %}
5306 interface(REG_INTER);
5307 %}
5308
5309 // Pointer Register
5310 operand any_RegP()
5311 %{
5312 constraint(ALLOC_IN_RC(any_reg));
5313 match(RegP);
5314 match(rax_RegP);
5315 match(rbx_RegP);
5316 match(rdi_RegP);
5317 match(rsi_RegP);
5318 match(rbp_RegP);
5319 match(r15_RegP);
5320 match(rRegP);
5321
5322 format %{ %}
5323 interface(REG_INTER);
5324 %}
5325
5326 operand rRegP()
5327 %{
5328 constraint(ALLOC_IN_RC(ptr_reg));
5329 match(RegP);
5330 match(rax_RegP);
5331 match(rbx_RegP);
5332 match(rdi_RegP);
5333 match(rsi_RegP);
5334 match(rbp_RegP); // See Q&A below about
5335 match(r15_RegP); // r15_RegP and rbp_RegP.
5336
5337 format %{ %}
5338 interface(REG_INTER);
5339 %}
5340
5341 operand rRegN() %{
5342 constraint(ALLOC_IN_RC(int_reg));
5343 match(RegN);
5344
5345 format %{ %}
5346 interface(REG_INTER);
5347 %}
5348
5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5352 // The output of an instruction is controlled by the allocator, which respects
5353 // register class masks, not match rules. Unless an instruction mentions
5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5355 // by the allocator as an input.
5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5358 // result, RBP is not included in the output of the instruction either.
5359
5360 // This operand is not allowed to use RBP even if
5361 // RBP is not used to hold the frame pointer.
5362 operand no_rbp_RegP()
5363 %{
5364 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5365 match(RegP);
5366 match(rbx_RegP);
5367 match(rsi_RegP);
5368 match(rdi_RegP);
5369
5370 format %{ %}
5371 interface(REG_INTER);
5372 %}
5373
5374 // Special Registers
5375 // Return a pointer value
5376 operand rax_RegP()
5377 %{
5378 constraint(ALLOC_IN_RC(ptr_rax_reg));
5379 match(RegP);
5380 match(rRegP);
5381
5382 format %{ %}
5383 interface(REG_INTER);
5384 %}
5385
5386 // Special Registers
5387 // Return a compressed pointer value
5388 operand rax_RegN()
5389 %{
5390 constraint(ALLOC_IN_RC(int_rax_reg));
5391 match(RegN);
5392 match(rRegN);
5393
5394 format %{ %}
5395 interface(REG_INTER);
5396 %}
5397
5398 // Used in AtomicAdd
5399 operand rbx_RegP()
5400 %{
5401 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5402 match(RegP);
5403 match(rRegP);
5404
5405 format %{ %}
5406 interface(REG_INTER);
5407 %}
5408
5409 operand rsi_RegP()
5410 %{
5411 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5412 match(RegP);
5413 match(rRegP);
5414
5415 format %{ %}
5416 interface(REG_INTER);
5417 %}
5418
5419 operand rbp_RegP()
5420 %{
5421 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5422 match(RegP);
5423 match(rRegP);
5424
5425 format %{ %}
5426 interface(REG_INTER);
5427 %}
5428
5429 // Used in rep stosq
5430 operand rdi_RegP()
5431 %{
5432 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5433 match(RegP);
5434 match(rRegP);
5435
5436 format %{ %}
5437 interface(REG_INTER);
5438 %}
5439
5440 operand r15_RegP()
5441 %{
5442 constraint(ALLOC_IN_RC(ptr_r15_reg));
5443 match(RegP);
5444 match(rRegP);
5445
5446 format %{ %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand rRegL()
5451 %{
5452 constraint(ALLOC_IN_RC(long_reg));
5453 match(RegL);
5454 match(rax_RegL);
5455 match(rdx_RegL);
5456
5457 format %{ %}
5458 interface(REG_INTER);
5459 %}
5460
5461 // Special Registers
5462 operand no_rax_rdx_RegL()
5463 %{
5464 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5465 match(RegL);
5466 match(rRegL);
5467
5468 format %{ %}
5469 interface(REG_INTER);
5470 %}
5471
5472 operand rax_RegL()
5473 %{
5474 constraint(ALLOC_IN_RC(long_rax_reg));
5475 match(RegL);
5476 match(rRegL);
5477
5478 format %{ "RAX" %}
5479 interface(REG_INTER);
5480 %}
5481
5482 operand rcx_RegL()
5483 %{
5484 constraint(ALLOC_IN_RC(long_rcx_reg));
5485 match(RegL);
5486 match(rRegL);
5487
5488 format %{ %}
5489 interface(REG_INTER);
5490 %}
5491
5492 operand rdx_RegL()
5493 %{
5494 constraint(ALLOC_IN_RC(long_rdx_reg));
5495 match(RegL);
5496 match(rRegL);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 operand r11_RegL()
5503 %{
5504 constraint(ALLOC_IN_RC(long_r11_reg));
5505 match(RegL);
5506 match(rRegL);
5507
5508 format %{ %}
5509 interface(REG_INTER);
5510 %}
5511
5512 operand no_rbp_r13_RegL()
5513 %{
5514 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5515 match(RegL);
5516 match(rRegL);
5517 match(rax_RegL);
5518 match(rcx_RegL);
5519 match(rdx_RegL);
5520
5521 format %{ %}
5522 interface(REG_INTER);
5523 %}
5524
5525 // Flags register, used as output of compare instructions
5526 operand rFlagsReg()
5527 %{
5528 constraint(ALLOC_IN_RC(int_flags));
5529 match(RegFlags);
5530
5531 format %{ "RFLAGS" %}
5532 interface(REG_INTER);
5533 %}
5534
5535 // Flags register, used as output of FLOATING POINT compare instructions
5536 operand rFlagsRegU()
5537 %{
5538 constraint(ALLOC_IN_RC(int_flags));
5539 match(RegFlags);
5540
5541 format %{ "RFLAGS_U" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCF() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CF" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 operand rFlagsRegUCFE() %{
5555 constraint(ALLOC_IN_RC(int_flags));
5556 match(RegFlags);
5557 predicate(UseAPX && VM_Version::supports_avx10_2());
5558
5559 format %{ "RFLAGS_U_CFE" %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand regF() %{
5565 constraint(ALLOC_IN_RC(float_reg));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand legRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_legacy));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Float register operands
5582 operand vlRegF() %{
5583 constraint(ALLOC_IN_RC(float_reg_vl));
5584 match(RegF);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand regD() %{
5592 constraint(ALLOC_IN_RC(double_reg));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand legRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_legacy));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Double register operands
5609 operand vlRegD() %{
5610 constraint(ALLOC_IN_RC(double_reg_vl));
5611 match(RegD);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 //----------Memory Operands----------------------------------------------------
5618 // Direct Memory Operand
5619 // operand direct(immP addr)
5620 // %{
5621 // match(addr);
5622
5623 // format %{ "[$addr]" %}
5624 // interface(MEMORY_INTER) %{
5625 // base(0xFFFFFFFF);
5626 // index(0x4);
5627 // scale(0x0);
5628 // disp($addr);
5629 // %}
5630 // %}
5631
5632 // Indirect Memory Operand
5633 operand indirect(any_RegP reg)
5634 %{
5635 constraint(ALLOC_IN_RC(ptr_reg));
5636 match(reg);
5637
5638 format %{ "[$reg]" %}
5639 interface(MEMORY_INTER) %{
5640 base($reg);
5641 index(0x4);
5642 scale(0x0);
5643 disp(0x0);
5644 %}
5645 %}
5646
5647 // Indirect Memory Plus Short Offset Operand
5648 operand indOffset8(any_RegP reg, immL8 off)
5649 %{
5650 constraint(ALLOC_IN_RC(ptr_reg));
5651 match(AddP reg off);
5652
5653 format %{ "[$reg + $off (8-bit)]" %}
5654 interface(MEMORY_INTER) %{
5655 base($reg);
5656 index(0x4);
5657 scale(0x0);
5658 disp($off);
5659 %}
5660 %}
5661
5662 // Indirect Memory Plus Long Offset Operand
5663 operand indOffset32(any_RegP reg, immL32 off)
5664 %{
5665 constraint(ALLOC_IN_RC(ptr_reg));
5666 match(AddP reg off);
5667
5668 format %{ "[$reg + $off (32-bit)]" %}
5669 interface(MEMORY_INTER) %{
5670 base($reg);
5671 index(0x4);
5672 scale(0x0);
5673 disp($off);
5674 %}
5675 %}
5676
5677 // Indirect Memory Plus Index Register Plus Offset Operand
5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 match(AddP (AddP reg lreg) off);
5682
5683 op_cost(10);
5684 format %{"[$reg + $off + $lreg]" %}
5685 interface(MEMORY_INTER) %{
5686 base($reg);
5687 index($lreg);
5688 scale(0x0);
5689 disp($off);
5690 %}
5691 %}
5692
5693 // Indirect Memory Plus Index Register Plus Offset Operand
5694 operand indIndex(any_RegP reg, rRegL lreg)
5695 %{
5696 constraint(ALLOC_IN_RC(ptr_reg));
5697 match(AddP reg lreg);
5698
5699 op_cost(10);
5700 format %{"[$reg + $lreg]" %}
5701 interface(MEMORY_INTER) %{
5702 base($reg);
5703 index($lreg);
5704 scale(0x0);
5705 disp(0x0);
5706 %}
5707 %}
5708
5709 // Indirect Memory Times Scale Plus Index Register
5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5711 %{
5712 constraint(ALLOC_IN_RC(ptr_reg));
5713 match(AddP reg (LShiftL lreg scale));
5714
5715 op_cost(10);
5716 format %{"[$reg + $lreg << $scale]" %}
5717 interface(MEMORY_INTER) %{
5718 base($reg);
5719 index($lreg);
5720 scale($scale);
5721 disp(0x0);
5722 %}
5723 %}
5724
5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5726 %{
5727 constraint(ALLOC_IN_RC(ptr_reg));
5728 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5729 match(AddP reg (LShiftL (ConvI2L idx) scale));
5730
5731 op_cost(10);
5732 format %{"[$reg + pos $idx << $scale]" %}
5733 interface(MEMORY_INTER) %{
5734 base($reg);
5735 index($idx);
5736 scale($scale);
5737 disp(0x0);
5738 %}
5739 %}
5740
5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5743 %{
5744 constraint(ALLOC_IN_RC(ptr_reg));
5745 match(AddP (AddP reg (LShiftL lreg scale)) off);
5746
5747 op_cost(10);
5748 format %{"[$reg + $off + $lreg << $scale]" %}
5749 interface(MEMORY_INTER) %{
5750 base($reg);
5751 index($lreg);
5752 scale($scale);
5753 disp($off);
5754 %}
5755 %}
5756
5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5759 %{
5760 constraint(ALLOC_IN_RC(ptr_reg));
5761 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5762 match(AddP (AddP reg (ConvI2L idx)) off);
5763
5764 op_cost(10);
5765 format %{"[$reg + $off + $idx]" %}
5766 interface(MEMORY_INTER) %{
5767 base($reg);
5768 index($idx);
5769 scale(0x0);
5770 disp($off);
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
5811 %{
5812 predicate(CompressedOops::shift() == 0);
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 match(DecodeN reg);
5815
5816 format %{ "[$reg]" %}
5817 interface(MEMORY_INTER) %{
5818 base($reg);
5819 index(0x4);
5820 scale(0x0);
5821 disp(0x0);
5822 %}
5823 %}
5824
5825 // Indirect Memory Plus Short Offset Operand
5826 operand indOffset8Narrow(rRegN reg, immL8 off)
5827 %{
5828 predicate(CompressedOops::shift() == 0);
5829 constraint(ALLOC_IN_RC(ptr_reg));
5830 match(AddP (DecodeN reg) off);
5831
5832 format %{ "[$reg + $off (8-bit)]" %}
5833 interface(MEMORY_INTER) %{
5834 base($reg);
5835 index(0x4);
5836 scale(0x0);
5837 disp($off);
5838 %}
5839 %}
5840
5841 // Indirect Memory Plus Long Offset Operand
5842 operand indOffset32Narrow(rRegN reg, immL32 off)
5843 %{
5844 predicate(CompressedOops::shift() == 0);
5845 constraint(ALLOC_IN_RC(ptr_reg));
5846 match(AddP (DecodeN reg) off);
5847
5848 format %{ "[$reg + $off (32-bit)]" %}
5849 interface(MEMORY_INTER) %{
5850 base($reg);
5851 index(0x4);
5852 scale(0x0);
5853 disp($off);
5854 %}
5855 %}
5856
5857 // Indirect Memory Plus Index Register Plus Offset Operand
5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5859 %{
5860 predicate(CompressedOops::shift() == 0);
5861 constraint(ALLOC_IN_RC(ptr_reg));
5862 match(AddP (AddP (DecodeN reg) lreg) off);
5863
5864 op_cost(10);
5865 format %{"[$reg + $off + $lreg]" %}
5866 interface(MEMORY_INTER) %{
5867 base($reg);
5868 index($lreg);
5869 scale(0x0);
5870 disp($off);
5871 %}
5872 %}
5873
5874 // Indirect Memory Plus Index Register Plus Offset Operand
5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
5876 %{
5877 predicate(CompressedOops::shift() == 0);
5878 constraint(ALLOC_IN_RC(ptr_reg));
5879 match(AddP (DecodeN reg) lreg);
5880
5881 op_cost(10);
5882 format %{"[$reg + $lreg]" %}
5883 interface(MEMORY_INTER) %{
5884 base($reg);
5885 index($lreg);
5886 scale(0x0);
5887 disp(0x0);
5888 %}
5889 %}
5890
5891 // Indirect Memory Times Scale Plus Index Register
5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5893 %{
5894 predicate(CompressedOops::shift() == 0);
5895 constraint(ALLOC_IN_RC(ptr_reg));
5896 match(AddP (DecodeN reg) (LShiftL lreg scale));
5897
5898 op_cost(10);
5899 format %{"[$reg + $lreg << $scale]" %}
5900 interface(MEMORY_INTER) %{
5901 base($reg);
5902 index($lreg);
5903 scale($scale);
5904 disp(0x0);
5905 %}
5906 %}
5907
5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5910 %{
5911 predicate(CompressedOops::shift() == 0);
5912 constraint(ALLOC_IN_RC(ptr_reg));
5913 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5914
5915 op_cost(10);
5916 format %{"[$reg + $off + $lreg << $scale]" %}
5917 interface(MEMORY_INTER) %{
5918 base($reg);
5919 index($lreg);
5920 scale($scale);
5921 disp($off);
5922 %}
5923 %}
5924
5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5927 %{
5928 constraint(ALLOC_IN_RC(ptr_reg));
5929 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5930 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5931
5932 op_cost(10);
5933 format %{"[$reg + $off + $idx]" %}
5934 interface(MEMORY_INTER) %{
5935 base($reg);
5936 index($idx);
5937 scale(0x0);
5938 disp($off);
5939 %}
5940 %}
5941
5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5944 %{
5945 constraint(ALLOC_IN_RC(ptr_reg));
5946 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5947 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5948
5949 op_cost(10);
5950 format %{"[$reg + $off + $idx << $scale]" %}
5951 interface(MEMORY_INTER) %{
5952 base($reg);
5953 index($idx);
5954 scale($scale);
5955 disp($off);
5956 %}
5957 %}
5958
5959 //----------Special Memory Operands--------------------------------------------
5960 // Stack Slot Operand - This operand is used for loading and storing temporary
5961 // values on the stack where a match requires a value to
5962 // flow through memory.
5963 operand stackSlotP(sRegP reg)
5964 %{
5965 constraint(ALLOC_IN_RC(stack_slots));
5966 // No match rule because this operand is only generated in matching
5967
5968 format %{ "[$reg]" %}
5969 interface(MEMORY_INTER) %{
5970 base(0x4); // RSP
5971 index(0x4); // No Index
5972 scale(0x0); // No Scale
5973 disp($reg); // Stack Offset
5974 %}
5975 %}
5976
5977 operand stackSlotI(sRegI reg)
5978 %{
5979 constraint(ALLOC_IN_RC(stack_slots));
5980 // No match rule because this operand is only generated in matching
5981
5982 format %{ "[$reg]" %}
5983 interface(MEMORY_INTER) %{
5984 base(0x4); // RSP
5985 index(0x4); // No Index
5986 scale(0x0); // No Scale
5987 disp($reg); // Stack Offset
5988 %}
5989 %}
5990
5991 operand stackSlotF(sRegF reg)
5992 %{
5993 constraint(ALLOC_IN_RC(stack_slots));
5994 // No match rule because this operand is only generated in matching
5995
5996 format %{ "[$reg]" %}
5997 interface(MEMORY_INTER) %{
5998 base(0x4); // RSP
5999 index(0x4); // No Index
6000 scale(0x0); // No Scale
6001 disp($reg); // Stack Offset
6002 %}
6003 %}
6004
6005 operand stackSlotD(sRegD reg)
6006 %{
6007 constraint(ALLOC_IN_RC(stack_slots));
6008 // No match rule because this operand is only generated in matching
6009
6010 format %{ "[$reg]" %}
6011 interface(MEMORY_INTER) %{
6012 base(0x4); // RSP
6013 index(0x4); // No Index
6014 scale(0x0); // No Scale
6015 disp($reg); // Stack Offset
6016 %}
6017 %}
6018 operand stackSlotL(sRegL reg)
6019 %{
6020 constraint(ALLOC_IN_RC(stack_slots));
6021 // No match rule because this operand is only generated in matching
6022
6023 format %{ "[$reg]" %}
6024 interface(MEMORY_INTER) %{
6025 base(0x4); // RSP
6026 index(0x4); // No Index
6027 scale(0x0); // No Scale
6028 disp($reg); // Stack Offset
6029 %}
6030 %}
6031
6032 //----------Conditional Branch Operands----------------------------------------
6033 // Comparison Op - This is the operation of the comparison, and is limited to
6034 // the following set of codes:
6035 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6036 //
6037 // Other attributes of the comparison, such as unsignedness, are specified
6038 // by the comparison instruction that sets a condition code flags register.
6039 // That result is represented by a flags operand whose subtype is appropriate
6040 // to the unsignedness (etc.) of the comparison.
6041 //
6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6045
6046 // Comparison Code
6047 operand cmpOp()
6048 %{
6049 match(Bool);
6050
6051 format %{ "" %}
6052 interface(COND_INTER) %{
6053 equal(0x4, "e");
6054 not_equal(0x5, "ne");
6055 less(0xc, "l");
6056 greater_equal(0xd, "ge");
6057 less_equal(0xe, "le");
6058 greater(0xf, "g");
6059 overflow(0x0, "o");
6060 no_overflow(0x1, "no");
6061 %}
6062 %}
6063
6064 // Comparison Code, unsigned compare. Used by FP also, with
6065 // C2 (unordered) turned into GT or LT already. The other bits
6066 // C0 and C3 are turned into Carry & Zero flags.
6067 operand cmpOpU()
6068 %{
6069 match(Bool);
6070
6071 format %{ "" %}
6072 interface(COND_INTER) %{
6073 equal(0x4, "e");
6074 not_equal(0x5, "ne");
6075 less(0x2, "b");
6076 greater_equal(0x3, "ae");
6077 less_equal(0x6, "be");
6078 greater(0x7, "a");
6079 overflow(0x0, "o");
6080 no_overflow(0x1, "no");
6081 %}
6082 %}
6083
6084
6085 // Floating comparisons that don't require any fixup for the unordered case,
6086 // If both inputs of the comparison are the same, ZF is always set so we
6087 // don't need to use cmpOpUCF2 for eq/ne
6088 operand cmpOpUCF() %{
6089 match(Bool);
6090 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6091 (n->as_Bool()->_test._test == BoolTest::lt ||
6092 n->as_Bool()->_test._test == BoolTest::ge ||
6093 n->as_Bool()->_test._test == BoolTest::le ||
6094 n->as_Bool()->_test._test == BoolTest::gt ||
6095 n->in(1)->in(1) == n->in(1)->in(2)));
6096 format %{ "" %}
6097 interface(COND_INTER) %{
6098 equal(0xb, "np");
6099 not_equal(0xa, "p");
6100 less(0x2, "b");
6101 greater_equal(0x3, "ae");
6102 less_equal(0x6, "be");
6103 greater(0x7, "a");
6104 overflow(0x0, "o");
6105 no_overflow(0x1, "no");
6106 %}
6107 %}
6108
6109
6110 // Floating comparisons that can be fixed up with extra conditional jumps
6111 operand cmpOpUCF2() %{
6112 match(Bool);
6113 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6114 (n->as_Bool()->_test._test == BoolTest::ne ||
6115 n->as_Bool()->_test._test == BoolTest::eq) &&
6116 n->in(1)->in(1) != n->in(1)->in(2));
6117 format %{ "" %}
6118 interface(COND_INTER) %{
6119 equal(0x4, "e");
6120 not_equal(0x5, "ne");
6121 less(0x2, "b");
6122 greater_equal(0x3, "ae");
6123 less_equal(0x6, "be");
6124 greater(0x7, "a");
6125 overflow(0x0, "o");
6126 no_overflow(0x1, "no");
6127 %}
6128 %}
6129
6130
6131 // Floating point comparisons that set condition flags to test more directly,
6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
6134 // latter conditions to ones that use unsigned tests before passing into an
6135 // instruction because the preceding comparison might be based on a three way
6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6137 operand cmpOpUCFE()
6138 %{
6139 match(Bool);
6140 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6141 (n->as_Bool()->_test._test == BoolTest::ne ||
6142 n->as_Bool()->_test._test == BoolTest::eq ||
6143 n->as_Bool()->_test._test == BoolTest::lt ||
6144 n->as_Bool()->_test._test == BoolTest::ge ||
6145 n->as_Bool()->_test._test == BoolTest::le ||
6146 n->as_Bool()->_test._test == BoolTest::gt));
6147
6148 format %{ "" %}
6149 interface(COND_INTER) %{
6150 equal(0x4, "e");
6151 not_equal(0x5, "ne");
6152 less(0x2, "b");
6153 greater_equal(0x3, "ae");
6154 less_equal(0x6, "be");
6155 greater(0x7, "a");
6156 overflow(0x0, "o");
6157 no_overflow(0x1, "no");
6158 %}
6159 %}
6160
6161 // Operands for bound floating pointer register arguments
6162 operand rxmm0() %{
6163 constraint(ALLOC_IN_RC(xmm0_reg));
6164 match(VecX);
6165 format%{%}
6166 interface(REG_INTER);
6167 %}
6168
6169 // Vectors
6170
6171 // Dummy generic vector class. Should be used for all vector operands.
6172 // Replaced with vec[SDXYZ] during post-selection pass.
6173 operand vec() %{
6174 constraint(ALLOC_IN_RC(dynamic));
6175 match(VecX);
6176 match(VecY);
6177 match(VecZ);
6178 match(VecS);
6179 match(VecD);
6180
6181 format %{ %}
6182 interface(REG_INTER);
6183 %}
6184
6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6188 // runtime code generation via reg_class_dynamic.
6189 operand legVec() %{
6190 constraint(ALLOC_IN_RC(dynamic));
6191 match(VecX);
6192 match(VecY);
6193 match(VecZ);
6194 match(VecS);
6195 match(VecD);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces vec during post-selection cleanup. See above.
6202 operand vecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces legVec during post-selection cleanup. See above.
6211 operand legVecS() %{
6212 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6213 match(VecS);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces vec during post-selection cleanup. See above.
6220 operand vecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces legVec during post-selection cleanup. See above.
6229 operand legVecD() %{
6230 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6231 match(VecD);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces vec during post-selection cleanup. See above.
6238 operand vecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces legVec during post-selection cleanup. See above.
6247 operand legVecX() %{
6248 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6249 match(VecX);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces vec during post-selection cleanup. See above.
6256 operand vecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces legVec during post-selection cleanup. See above.
6265 operand legVecY() %{
6266 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6267 match(VecY);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces vec during post-selection cleanup. See above.
6274 operand vecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
6321 // Generic P2/P3 pipeline
6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6323 // 3 instructions decoded per cycle.
6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6325 // 3 ALU op, only ALU0 handles mul instructions.
6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6327 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6328 BR, FPU,
6329 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6330
6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
6332 // Pipeline Description specifies the stages in the machine's pipeline
6333
6334 // Generic P2/P3 pipeline
6335 pipe_desc(S0, S1, S2, S3, S4, S5);
6336
6337 //----------PIPELINE CLASSES---------------------------------------------------
6338 // Pipeline Classes describe the stages in which input and output are
6339 // referenced by the hardware pipeline.
6340
6341 // Naming convention: ialu or fpu
6342 // Then: _reg
6343 // Then: _reg if there is a 2nd register
6344 // Then: _long if it's a pair of instructions implementing a long
6345 // Then: _fat if it requires the big decoder
6346 // Or: _mem if it requires the big decoder and a memory unit.
6347
6348 // Integer ALU reg operation
6349 pipe_class ialu_reg(rRegI dst)
6350 %{
6351 single_instruction;
6352 dst : S4(write);
6353 dst : S3(read);
6354 DECODE : S0; // any decoder
6355 ALU : S3; // any alu
6356 %}
6357
6358 // Long ALU reg operation
6359 pipe_class ialu_reg_long(rRegL dst)
6360 %{
6361 instruction_count(2);
6362 dst : S4(write);
6363 dst : S3(read);
6364 DECODE : S0(2); // any 2 decoders
6365 ALU : S3(2); // both alus
6366 %}
6367
6368 // Integer ALU reg operation using big decoder
6369 pipe_class ialu_reg_fat(rRegI dst)
6370 %{
6371 single_instruction;
6372 dst : S4(write);
6373 dst : S3(read);
6374 D0 : S0; // big decoder only
6375 ALU : S3; // any alu
6376 %}
6377
6378 // Integer ALU reg-reg operation
6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6380 %{
6381 single_instruction;
6382 dst : S4(write);
6383 src : S3(read);
6384 DECODE : S0; // any decoder
6385 ALU : S3; // any alu
6386 %}
6387
6388 // Integer ALU reg-reg operation
6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6390 %{
6391 single_instruction;
6392 dst : S4(write);
6393 src : S3(read);
6394 D0 : S0; // big decoder only
6395 ALU : S3; // any alu
6396 %}
6397
6398 // Integer ALU reg-mem operation
6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6400 %{
6401 single_instruction;
6402 dst : S5(write);
6403 mem : S3(read);
6404 D0 : S0; // big decoder only
6405 ALU : S4; // any alu
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer mem operation (prefetch)
6410 pipe_class ialu_mem(memory mem)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 D0 : S0; // big decoder only
6415 MEM : S3; // any mem
6416 %}
6417
6418 // Integer Store to Memory
6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
6420 %{
6421 single_instruction;
6422 mem : S3(read);
6423 src : S5(read);
6424 D0 : S0; // big decoder only
6425 ALU : S4; // any alu
6426 MEM : S3;
6427 %}
6428
6429 // // Long Store to Memory
6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6431 // %{
6432 // instruction_count(2);
6433 // mem : S3(read);
6434 // src : S5(read);
6435 // D0 : S0(2); // big decoder only; twice
6436 // ALU : S4(2); // any 2 alus
6437 // MEM : S3(2); // Both mems
6438 // %}
6439
6440 // Integer Store to Memory
6441 pipe_class ialu_mem_imm(memory mem)
6442 %{
6443 single_instruction;
6444 mem : S3(read);
6445 D0 : S0; // big decoder only
6446 ALU : S4; // any alu
6447 MEM : S3;
6448 %}
6449
6450 // Integer ALU0 reg-reg operation
6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6452 %{
6453 single_instruction;
6454 dst : S4(write);
6455 src : S3(read);
6456 D0 : S0; // Big decoder only
6457 ALU0 : S3; // only alu0
6458 %}
6459
6460 // Integer ALU0 reg-mem operation
6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6462 %{
6463 single_instruction;
6464 dst : S5(write);
6465 mem : S3(read);
6466 D0 : S0; // big decoder only
6467 ALU0 : S4; // ALU0 only
6468 MEM : S3; // any mem
6469 %}
6470
6471 // Integer ALU reg-reg operation
6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6473 %{
6474 single_instruction;
6475 cr : S4(write);
6476 src1 : S3(read);
6477 src2 : S3(read);
6478 DECODE : S0; // any decoder
6479 ALU : S3; // any alu
6480 %}
6481
6482 // Integer ALU reg-imm operation
6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6484 %{
6485 single_instruction;
6486 cr : S4(write);
6487 src1 : S3(read);
6488 DECODE : S0; // any decoder
6489 ALU : S3; // any alu
6490 %}
6491
6492 // Integer ALU reg-mem operation
6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6494 %{
6495 single_instruction;
6496 cr : S4(write);
6497 src1 : S3(read);
6498 src2 : S3(read);
6499 D0 : S0; // big decoder only
6500 ALU : S4; // any alu
6501 MEM : S3;
6502 %}
6503
6504 // Conditional move reg-reg
6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6506 %{
6507 instruction_count(4);
6508 y : S4(read);
6509 q : S3(read);
6510 p : S3(read);
6511 DECODE : S0(4); // any decoder
6512 %}
6513
6514 // Conditional move reg-reg
6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6516 %{
6517 single_instruction;
6518 dst : S4(write);
6519 src : S3(read);
6520 cr : S3(read);
6521 DECODE : S0; // any decoder
6522 %}
6523
6524 // Conditional move reg-mem
6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6526 %{
6527 single_instruction;
6528 dst : S4(write);
6529 src : S3(read);
6530 cr : S3(read);
6531 DECODE : S0; // any decoder
6532 MEM : S3;
6533 %}
6534
6535 // Conditional move reg-reg long
6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6537 %{
6538 single_instruction;
6539 dst : S4(write);
6540 src : S3(read);
6541 cr : S3(read);
6542 DECODE : S0(2); // any 2 decoders
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg(regD dst)
6547 %{
6548 instruction_count(2);
6549 dst : S3(read);
6550 DECODE : S0(2); // any 2 decoders
6551 FPU : S3;
6552 %}
6553
6554 // Float reg-reg operation
6555 pipe_class fpu_reg_reg(regD dst, regD src)
6556 %{
6557 instruction_count(2);
6558 dst : S4(write);
6559 src : S3(read);
6560 DECODE : S0(2); // any 2 decoders
6561 FPU : S3;
6562 %}
6563
6564 // Float reg-reg operation
6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6566 %{
6567 instruction_count(3);
6568 dst : S4(write);
6569 src1 : S3(read);
6570 src2 : S3(read);
6571 DECODE : S0(3); // any 3 decoders
6572 FPU : S3(2);
6573 %}
6574
6575 // Float reg-reg operation
6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6577 %{
6578 instruction_count(4);
6579 dst : S4(write);
6580 src1 : S3(read);
6581 src2 : S3(read);
6582 src3 : S3(read);
6583 DECODE : S0(4); // any 3 decoders
6584 FPU : S3(2);
6585 %}
6586
6587 // Float reg-reg operation
6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6589 %{
6590 instruction_count(4);
6591 dst : S4(write);
6592 src1 : S3(read);
6593 src2 : S3(read);
6594 src3 : S3(read);
6595 DECODE : S1(3); // any 3 decoders
6596 D0 : S0; // Big decoder only
6597 FPU : S3(2);
6598 MEM : S3;
6599 %}
6600
6601 // Float reg-mem operation
6602 pipe_class fpu_reg_mem(regD dst, memory mem)
6603 %{
6604 instruction_count(2);
6605 dst : S5(write);
6606 mem : S3(read);
6607 D0 : S0; // big decoder only
6608 DECODE : S1; // any decoder for FPU POP
6609 FPU : S4;
6610 MEM : S3; // any mem
6611 %}
6612
6613 // Float reg-mem operation
6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6615 %{
6616 instruction_count(3);
6617 dst : S5(write);
6618 src1 : S3(read);
6619 mem : S3(read);
6620 D0 : S0; // big decoder only
6621 DECODE : S1(2); // any decoder for FPU POP
6622 FPU : S4;
6623 MEM : S3; // any mem
6624 %}
6625
6626 // Float mem-reg operation
6627 pipe_class fpu_mem_reg(memory mem, regD src)
6628 %{
6629 instruction_count(2);
6630 src : S5(read);
6631 mem : S3(read);
6632 DECODE : S0; // any decoder for FPU PUSH
6633 D0 : S1; // big decoder only
6634 FPU : S4;
6635 MEM : S3; // any mem
6636 %}
6637
6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6639 %{
6640 instruction_count(3);
6641 src1 : S3(read);
6642 src2 : S3(read);
6643 mem : S3(read);
6644 DECODE : S0(2); // any decoder for FPU PUSH
6645 D0 : S1; // big decoder only
6646 FPU : S4;
6647 MEM : S3; // any mem
6648 %}
6649
6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6651 %{
6652 instruction_count(3);
6653 src1 : S3(read);
6654 src2 : S3(read);
6655 mem : S4(read);
6656 DECODE : S0; // any decoder for FPU PUSH
6657 D0 : S0(2); // big decoder only
6658 FPU : S4;
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem(memory dst, memory src1)
6663 %{
6664 instruction_count(2);
6665 src1 : S3(read);
6666 dst : S4(read);
6667 D0 : S0(2); // big decoder only
6668 MEM : S3(2); // any mem
6669 %}
6670
6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6672 %{
6673 instruction_count(3);
6674 src1 : S3(read);
6675 src2 : S3(read);
6676 dst : S4(read);
6677 D0 : S0(3); // big decoder only
6678 FPU : S4;
6679 MEM : S3(3); // any mem
6680 %}
6681
6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6683 %{
6684 instruction_count(3);
6685 src1 : S4(read);
6686 mem : S4(read);
6687 DECODE : S0; // any decoder for FPU PUSH
6688 D0 : S0(2); // big decoder only
6689 FPU : S4;
6690 MEM : S3(2); // any mem
6691 %}
6692
6693 // Float load constant
6694 pipe_class fpu_reg_con(regD dst)
6695 %{
6696 instruction_count(2);
6697 dst : S5(write);
6698 D0 : S0; // big decoder only for the load
6699 DECODE : S1; // any decoder for FPU POP
6700 FPU : S4;
6701 MEM : S3; // any mem
6702 %}
6703
6704 // Float load constant
6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
6706 %{
6707 instruction_count(3);
6708 dst : S5(write);
6709 src : S3(read);
6710 D0 : S0; // big decoder only for the load
6711 DECODE : S1(2); // any decoder for FPU POP
6712 FPU : S4;
6713 MEM : S3; // any mem
6714 %}
6715
6716 // UnConditional branch
6717 pipe_class pipe_jmp(label labl)
6718 %{
6719 single_instruction;
6720 BR : S3;
6721 %}
6722
6723 // Conditional branch
6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6725 %{
6726 single_instruction;
6727 cr : S1(read);
6728 BR : S3;
6729 %}
6730
6731 // Allocation idiom
6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6733 %{
6734 instruction_count(1); force_serialization;
6735 fixed_latency(6);
6736 heap_ptr : S3(read);
6737 DECODE : S0(3);
6738 D0 : S2;
6739 MEM : S3;
6740 ALU : S3(2);
6741 dst : S5(write);
6742 BR : S5;
6743 %}
6744
6745 // Generic big/slow expanded idiom
6746 pipe_class pipe_slow()
6747 %{
6748 instruction_count(10); multiple_bundles; force_serialization;
6749 fixed_latency(100);
6750 D0 : S0(2);
6751 MEM : S3(2);
6752 %}
6753
6754 // The real do-nothing guy
6755 pipe_class empty()
6756 %{
6757 instruction_count(0);
6758 %}
6759
6760 // Define the class for the Nop node
6761 define
6762 %{
6763 MachNop = empty;
6764 %}
6765
6766 %}
6767
6768 //----------INSTRUCTIONS-------------------------------------------------------
6769 //
6770 // match -- States which machine-independent subtree may be replaced
6771 // by this instruction.
6772 // ins_cost -- The estimated cost of this instruction is used by instruction
6773 // selection to identify a minimum cost tree of machine
6774 // instructions that matches a tree of machine-independent
6775 // instructions.
6776 // format -- A string providing the disassembly for this instruction.
6777 // The value of an instruction's operand may be inserted
6778 // by referring to it with a '$' prefix.
6779 // opcode -- Three instruction opcodes may be provided. These are referred
6780 // to within an encode class as $primary, $secondary, and $tertiary
6781 // rrspectively. The primary opcode is commonly used to
6782 // indicate the type of machine instruction, while secondary
6783 // and tertiary are often used for prefix options or addressing
6784 // modes.
6785 // ins_encode -- A list of encode classes with parameters. The encode class
6786 // name must have been defined in an 'enc_class' specification
6787 // in the encode section of the architecture description.
6788
6789 // ============================================================================
6790
6791 instruct ShouldNotReachHere() %{
6792 match(Halt);
6793 format %{ "stop\t# ShouldNotReachHere" %}
6794 ins_encode %{
6795 if (is_reachable()) {
6796 const char* str = __ code_string(_halt_reason);
6797 __ stop(str);
6798 }
6799 %}
6800 ins_pipe(pipe_slow);
6801 %}
6802
6803 // ============================================================================
6804
6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6806 // Load Float
6807 instruct MoveF2VL(vlRegF dst, regF src) %{
6808 match(Set dst src);
6809 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6810 ins_encode %{
6811 ShouldNotReachHere();
6812 %}
6813 ins_pipe( fpu_reg_reg );
6814 %}
6815
6816 // Load Float
6817 instruct MoveF2LEG(legRegF dst, regF src) %{
6818 match(Set dst src);
6819 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6820 ins_encode %{
6821 ShouldNotReachHere();
6822 %}
6823 ins_pipe( fpu_reg_reg );
6824 %}
6825
6826 // Load Float
6827 instruct MoveVL2F(regF dst, vlRegF src) %{
6828 match(Set dst src);
6829 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6830 ins_encode %{
6831 ShouldNotReachHere();
6832 %}
6833 ins_pipe( fpu_reg_reg );
6834 %}
6835
6836 // Load Float
6837 instruct MoveLEG2F(regF dst, legRegF src) %{
6838 match(Set dst src);
6839 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6840 ins_encode %{
6841 ShouldNotReachHere();
6842 %}
6843 ins_pipe( fpu_reg_reg );
6844 %}
6845
6846 // Load Double
6847 instruct MoveD2VL(vlRegD dst, regD src) %{
6848 match(Set dst src);
6849 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6850 ins_encode %{
6851 ShouldNotReachHere();
6852 %}
6853 ins_pipe( fpu_reg_reg );
6854 %}
6855
6856 // Load Double
6857 instruct MoveD2LEG(legRegD dst, regD src) %{
6858 match(Set dst src);
6859 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6860 ins_encode %{
6861 ShouldNotReachHere();
6862 %}
6863 ins_pipe( fpu_reg_reg );
6864 %}
6865
6866 // Load Double
6867 instruct MoveVL2D(regD dst, vlRegD src) %{
6868 match(Set dst src);
6869 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6870 ins_encode %{
6871 ShouldNotReachHere();
6872 %}
6873 ins_pipe( fpu_reg_reg );
6874 %}
6875
6876 // Load Double
6877 instruct MoveLEG2D(regD dst, legRegD src) %{
6878 match(Set dst src);
6879 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6880 ins_encode %{
6881 ShouldNotReachHere();
6882 %}
6883 ins_pipe( fpu_reg_reg );
6884 %}
6885
6886 //----------Load/Store/Move Instructions---------------------------------------
6887 //----------Load Instructions--------------------------------------------------
6888
6889 // Load Byte (8 bit signed)
6890 instruct loadB(rRegI dst, memory mem)
6891 %{
6892 match(Set dst (LoadB mem));
6893
6894 ins_cost(125);
6895 format %{ "movsbl $dst, $mem\t# byte" %}
6896
6897 ins_encode %{
6898 __ movsbl($dst$$Register, $mem$$Address);
6899 %}
6900
6901 ins_pipe(ialu_reg_mem);
6902 %}
6903
6904 // Load Byte (8 bit signed) into Long Register
6905 instruct loadB2L(rRegL dst, memory mem)
6906 %{
6907 match(Set dst (ConvI2L (LoadB mem)));
6908
6909 ins_cost(125);
6910 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6911
6912 ins_encode %{
6913 __ movsbq($dst$$Register, $mem$$Address);
6914 %}
6915
6916 ins_pipe(ialu_reg_mem);
6917 %}
6918
6919 // Load Unsigned Byte (8 bit UNsigned)
6920 instruct loadUB(rRegI dst, memory mem)
6921 %{
6922 match(Set dst (LoadUB mem));
6923
6924 ins_cost(125);
6925 format %{ "movzbl $dst, $mem\t# ubyte" %}
6926
6927 ins_encode %{
6928 __ movzbl($dst$$Register, $mem$$Address);
6929 %}
6930
6931 ins_pipe(ialu_reg_mem);
6932 %}
6933
6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6935 instruct loadUB2L(rRegL dst, memory mem)
6936 %{
6937 match(Set dst (ConvI2L (LoadUB mem)));
6938
6939 ins_cost(125);
6940 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6941
6942 ins_encode %{
6943 __ movzbq($dst$$Register, $mem$$Address);
6944 %}
6945
6946 ins_pipe(ialu_reg_mem);
6947 %}
6948
6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6951 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6952 effect(KILL cr);
6953
6954 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6955 "andl $dst, right_n_bits($mask, 8)" %}
6956 ins_encode %{
6957 Register Rdst = $dst$$Register;
6958 __ movzbq(Rdst, $mem$$Address);
6959 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6960 %}
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Short (16 bit signed)
6965 instruct loadS(rRegI dst, memory mem)
6966 %{
6967 match(Set dst (LoadS mem));
6968
6969 ins_cost(125);
6970 format %{ "movswl $dst, $mem\t# short" %}
6971
6972 ins_encode %{
6973 __ movswl($dst$$Register, $mem$$Address);
6974 %}
6975
6976 ins_pipe(ialu_reg_mem);
6977 %}
6978
6979 // Load Short (16 bit signed) to Byte (8 bit signed)
6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6981 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6982
6983 ins_cost(125);
6984 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6985 ins_encode %{
6986 __ movsbl($dst$$Register, $mem$$Address);
6987 %}
6988 ins_pipe(ialu_reg_mem);
6989 %}
6990
6991 // Load Short (16 bit signed) into Long Register
6992 instruct loadS2L(rRegL dst, memory mem)
6993 %{
6994 match(Set dst (ConvI2L (LoadS mem)));
6995
6996 ins_cost(125);
6997 format %{ "movswq $dst, $mem\t# short -> long" %}
6998
6999 ins_encode %{
7000 __ movswq($dst$$Register, $mem$$Address);
7001 %}
7002
7003 ins_pipe(ialu_reg_mem);
7004 %}
7005
7006 // Load Unsigned Short/Char (16 bit UNsigned)
7007 instruct loadUS(rRegI dst, memory mem)
7008 %{
7009 match(Set dst (LoadUS mem));
7010
7011 ins_cost(125);
7012 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7013
7014 ins_encode %{
7015 __ movzwl($dst$$Register, $mem$$Address);
7016 %}
7017
7018 ins_pipe(ialu_reg_mem);
7019 %}
7020
7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7023 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7024
7025 ins_cost(125);
7026 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7027 ins_encode %{
7028 __ movsbl($dst$$Register, $mem$$Address);
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7034 instruct loadUS2L(rRegL dst, memory mem)
7035 %{
7036 match(Set dst (ConvI2L (LoadUS mem)));
7037
7038 ins_cost(125);
7039 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7040
7041 ins_encode %{
7042 __ movzwq($dst$$Register, $mem$$Address);
7043 %}
7044
7045 ins_pipe(ialu_reg_mem);
7046 %}
7047
7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7050 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7051
7052 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7053 ins_encode %{
7054 __ movzbq($dst$$Register, $mem$$Address);
7055 %}
7056 ins_pipe(ialu_reg_mem);
7057 %}
7058
7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7061 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7062 effect(KILL cr);
7063
7064 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7065 "andl $dst, right_n_bits($mask, 16)" %}
7066 ins_encode %{
7067 Register Rdst = $dst$$Register;
7068 __ movzwq(Rdst, $mem$$Address);
7069 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7070 %}
7071 ins_pipe(ialu_reg_mem);
7072 %}
7073
7074 // Load Integer
7075 instruct loadI(rRegI dst, memory mem)
7076 %{
7077 match(Set dst (LoadI mem));
7078
7079 ins_cost(125);
7080 format %{ "movl $dst, $mem\t# int" %}
7081
7082 ins_encode %{
7083 __ movl($dst$$Register, $mem$$Address);
7084 %}
7085
7086 ins_pipe(ialu_reg_mem);
7087 %}
7088
7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7091 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7092
7093 ins_cost(125);
7094 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7095 ins_encode %{
7096 __ movsbl($dst$$Register, $mem$$Address);
7097 %}
7098 ins_pipe(ialu_reg_mem);
7099 %}
7100
7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7103 match(Set dst (AndI (LoadI mem) mask));
7104
7105 ins_cost(125);
7106 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7107 ins_encode %{
7108 __ movzbl($dst$$Register, $mem$$Address);
7109 %}
7110 ins_pipe(ialu_reg_mem);
7111 %}
7112
7113 // Load Integer (32 bit signed) to Short (16 bit signed)
7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7115 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7116
7117 ins_cost(125);
7118 format %{ "movswl $dst, $mem\t# int -> short" %}
7119 ins_encode %{
7120 __ movswl($dst$$Register, $mem$$Address);
7121 %}
7122 ins_pipe(ialu_reg_mem);
7123 %}
7124
7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7127 match(Set dst (AndI (LoadI mem) mask));
7128
7129 ins_cost(125);
7130 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7131 ins_encode %{
7132 __ movzwl($dst$$Register, $mem$$Address);
7133 %}
7134 ins_pipe(ialu_reg_mem);
7135 %}
7136
7137 // Load Integer into Long Register
7138 instruct loadI2L(rRegL dst, memory mem)
7139 %{
7140 match(Set dst (ConvI2L (LoadI mem)));
7141
7142 ins_cost(125);
7143 format %{ "movslq $dst, $mem\t# int -> long" %}
7144
7145 ins_encode %{
7146 __ movslq($dst$$Register, $mem$$Address);
7147 %}
7148
7149 ins_pipe(ialu_reg_mem);
7150 %}
7151
7152 // Load Integer with mask 0xFF into Long Register
7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7154 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7155
7156 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7157 ins_encode %{
7158 __ movzbq($dst$$Register, $mem$$Address);
7159 %}
7160 ins_pipe(ialu_reg_mem);
7161 %}
7162
7163 // Load Integer with mask 0xFFFF into Long Register
7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7165 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7166
7167 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7168 ins_encode %{
7169 __ movzwq($dst$$Register, $mem$$Address);
7170 %}
7171 ins_pipe(ialu_reg_mem);
7172 %}
7173
7174 // Load Integer with a 31-bit mask into Long Register
7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7176 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7177 effect(KILL cr);
7178
7179 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7180 "andl $dst, $mask" %}
7181 ins_encode %{
7182 Register Rdst = $dst$$Register;
7183 __ movl(Rdst, $mem$$Address);
7184 __ andl(Rdst, $mask$$constant);
7185 %}
7186 ins_pipe(ialu_reg_mem);
7187 %}
7188
7189 // Load Unsigned Integer into Long Register
7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7191 %{
7192 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7193
7194 ins_cost(125);
7195 format %{ "movl $dst, $mem\t# uint -> long" %}
7196
7197 ins_encode %{
7198 __ movl($dst$$Register, $mem$$Address);
7199 %}
7200
7201 ins_pipe(ialu_reg_mem);
7202 %}
7203
7204 // Load Long
7205 instruct loadL(rRegL dst, memory mem)
7206 %{
7207 match(Set dst (LoadL mem));
7208
7209 ins_cost(125);
7210 format %{ "movq $dst, $mem\t# long" %}
7211
7212 ins_encode %{
7213 __ movq($dst$$Register, $mem$$Address);
7214 %}
7215
7216 ins_pipe(ialu_reg_mem); // XXX
7217 %}
7218
7219 // Load Range
7220 instruct loadRange(rRegI dst, memory mem)
7221 %{
7222 match(Set dst (LoadRange mem));
7223
7224 ins_cost(125); // XXX
7225 format %{ "movl $dst, $mem\t# range" %}
7226 ins_encode %{
7227 __ movl($dst$$Register, $mem$$Address);
7228 %}
7229 ins_pipe(ialu_reg_mem);
7230 %}
7231
7232 // Load Pointer
7233 instruct loadP(rRegP dst, memory mem)
7234 %{
7235 match(Set dst (LoadP mem));
7236 predicate(n->as_Load()->barrier_data() == 0);
7237
7238 ins_cost(125); // XXX
7239 format %{ "movq $dst, $mem\t# ptr" %}
7240 ins_encode %{
7241 __ movq($dst$$Register, $mem$$Address);
7242 %}
7243 ins_pipe(ialu_reg_mem); // XXX
7244 %}
7245
7246 // Load Compressed Pointer
7247 instruct loadN(rRegN dst, memory mem)
7248 %{
7249 predicate(n->as_Load()->barrier_data() == 0);
7250 match(Set dst (LoadN mem));
7251
7252 ins_cost(125); // XXX
7253 format %{ "movl $dst, $mem\t# compressed ptr" %}
7254 ins_encode %{
7255 __ movl($dst$$Register, $mem$$Address);
7256 %}
7257 ins_pipe(ialu_reg_mem); // XXX
7258 %}
7259
7260
7261 // Load Klass Pointer
7262 instruct loadKlass(rRegP dst, memory mem)
7263 %{
7264 match(Set dst (LoadKlass mem));
7265
7266 ins_cost(125); // XXX
7267 format %{ "movq $dst, $mem\t# class" %}
7268 ins_encode %{
7269 __ movq($dst$$Register, $mem$$Address);
7270 %}
7271 ins_pipe(ialu_reg_mem); // XXX
7272 %}
7273
7274 // Load narrow Klass Pointer
7275 instruct loadNKlass(rRegN dst, memory mem)
7276 %{
7277 predicate(!UseCompactObjectHeaders);
7278 match(Set dst (LoadNKlass mem));
7279
7280 ins_cost(125); // XXX
7281 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7282 ins_encode %{
7283 __ movl($dst$$Register, $mem$$Address);
7284 %}
7285 ins_pipe(ialu_reg_mem); // XXX
7286 %}
7287
7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7289 %{
7290 predicate(UseCompactObjectHeaders);
7291 match(Set dst (LoadNKlass mem));
7292 effect(KILL cr);
7293 ins_cost(125);
7294 format %{
7295 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7296 "shrl $dst, markWord::klass_shift"
7297 %}
7298 ins_encode %{
7299 // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
7300 // obj-start, so that we can load from the object's mark-word instead.
7301 Register d = $dst$$Register;
7302 Address s = ($mem$$Address).plus_disp(-Type::klass_offset());
7303 if (UseAPX) {
7304 __ eshrl(d, s, markWord::klass_shift, false);
7305 } else {
7306 __ movl(d, s);
7307 __ shrl(d, markWord::klass_shift);
7308 }
7309 %}
7310 ins_pipe(ialu_reg_mem);
7311 %}
7312
7313 // Load Float
7314 instruct loadF(regF dst, memory mem)
7315 %{
7316 match(Set dst (LoadF mem));
7317
7318 ins_cost(145); // XXX
7319 format %{ "movss $dst, $mem\t# float" %}
7320 ins_encode %{
7321 __ movflt($dst$$XMMRegister, $mem$$Address);
7322 %}
7323 ins_pipe(pipe_slow); // XXX
7324 %}
7325
7326 // Load Double
7327 instruct loadD_partial(regD dst, memory mem)
7328 %{
7329 predicate(!UseXmmLoadAndClearUpper);
7330 match(Set dst (LoadD mem));
7331
7332 ins_cost(145); // XXX
7333 format %{ "movlpd $dst, $mem\t# double" %}
7334 ins_encode %{
7335 __ movdbl($dst$$XMMRegister, $mem$$Address);
7336 %}
7337 ins_pipe(pipe_slow); // XXX
7338 %}
7339
7340 instruct loadD(regD dst, memory mem)
7341 %{
7342 predicate(UseXmmLoadAndClearUpper);
7343 match(Set dst (LoadD mem));
7344
7345 ins_cost(145); // XXX
7346 format %{ "movsd $dst, $mem\t# double" %}
7347 ins_encode %{
7348 __ movdbl($dst$$XMMRegister, $mem$$Address);
7349 %}
7350 ins_pipe(pipe_slow); // XXX
7351 %}
7352
7353 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7354 %{
7355 match(Set dst con);
7356
7357 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7358
7359 ins_encode %{
7360 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7361 %}
7362
7363 ins_pipe(ialu_reg_fat);
7364 %}
7365
7366 // min = java.lang.Math.min(float a, float b)
7367 // max = java.lang.Math.max(float a, float b)
7368 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7369 %{
7370 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7371 match(Set dst (MaxF a b));
7372 match(Set dst (MinF a b));
7373
7374 format %{ "minmaxF $dst, $a, $b" %}
7375 ins_encode %{
7376 int opcode = this->ideal_Opcode();
7377 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7378 %}
7379 ins_pipe( pipe_slow );
7380 %}
7381
7382 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
7383 %{
7384 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7385 match(Set dst (MaxF a b));
7386 match(Set dst (MinF a b));
7387 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7388
7389 format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7390 ins_encode %{
7391 int opcode = this->ideal_Opcode();
7392 bool min = (opcode == Op_MinF) ? true : false;
7393 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7394 min, fp_prec_flt /*pt*/);
7395 %}
7396 ins_pipe( pipe_slow );
7397 %}
7398
7399 // min = java.lang.Math.min(float a, float b)
7400 // max = java.lang.Math.max(float a, float b)
7401 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7402 %{
7403 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7404 match(Set dst (MaxF a b));
7405 match(Set dst (MinF a b));
7406 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7407
7408 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7409 ins_encode %{
7410 int opcode = this->ideal_Opcode();
7411 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7412 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7413 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7414 %}
7415 ins_pipe( pipe_slow );
7416 %}
7417
7418 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
7419 %{
7420 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7421 match(Set dst (MaxF a b));
7422 match(Set dst (MinF a b));
7423 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7424
7425 format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7426 ins_encode %{
7427 int opcode = this->ideal_Opcode();
7428 bool min = (opcode == Op_MinF) ? true : false;
7429 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7430 min, fp_prec_flt /*pt*/);
7431 %}
7432 ins_pipe( pipe_slow );
7433 %}
7434
7435 // min = java.lang.Math.min(double a, double b)
7436 // max = java.lang.Math.max(double a, double b)
7437 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7438 %{
7439 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7440 match(Set dst (MaxD a b));
7441 match(Set dst (MinD a b));
7442
7443 format %{ "minmaxD $dst, $a, $b" %}
7444 ins_encode %{
7445 int opcode = this->ideal_Opcode();
7446 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7447 %}
7448 ins_pipe( pipe_slow );
7449 %}
7450
7451 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
7452 %{
7453 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7454 match(Set dst (MaxD a b));
7455 match(Set dst (MinD a b));
7456 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7457
7458 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7459 ins_encode %{
7460 int opcode = this->ideal_Opcode();
7461 bool min = (opcode == Op_MinD) ? true : false;
7462 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7463 min, fp_prec_dbl /*pt*/);
7464 %}
7465 ins_pipe( pipe_slow );
7466 %}
7467
7468 // min = java.lang.Math.min(double a, double b)
7469 // max = java.lang.Math.max(double a, double b)
7470 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7471 %{
7472 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7473 match(Set dst (MaxD a b));
7474 match(Set dst (MinD a b));
7475 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7476
7477 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7478 ins_encode %{
7479 int opcode = this->ideal_Opcode();
7480 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7481 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7482 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7483 %}
7484 ins_pipe( pipe_slow );
7485 %}
7486
7487 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
7488 %{
7489 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7490 match(Set dst (MaxD a b));
7491 match(Set dst (MinD a b));
7492 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7493
7494 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7495 ins_encode %{
7496 int opcode = this->ideal_Opcode();
7497 bool min = (opcode == Op_MinD) ? true : false;
7498 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7499 min, fp_prec_dbl /*pt*/);
7500 %}
7501 ins_pipe( pipe_slow );
7502 %}
7503
7504 // Load Effective Address
7505 instruct leaP8(rRegP dst, indOffset8 mem)
7506 %{
7507 match(Set dst mem);
7508
7509 ins_cost(110); // XXX
7510 format %{ "leaq $dst, $mem\t# ptr 8" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaP32(rRegP dst, indOffset32 mem)
7518 %{
7519 match(Set dst mem);
7520
7521 ins_cost(110);
7522 format %{ "leaq $dst, $mem\t# ptr 32" %}
7523 ins_encode %{
7524 __ leaq($dst$$Register, $mem$$Address);
7525 %}
7526 ins_pipe(ialu_reg_reg_fat);
7527 %}
7528
7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7530 %{
7531 match(Set dst mem);
7532
7533 ins_cost(110);
7534 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7535 ins_encode %{
7536 __ leaq($dst$$Register, $mem$$Address);
7537 %}
7538 ins_pipe(ialu_reg_reg_fat);
7539 %}
7540
7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7542 %{
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7554 %{
7555 match(Set dst mem);
7556
7557 ins_cost(110);
7558 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7559 ins_encode %{
7560 __ leaq($dst$$Register, $mem$$Address);
7561 %}
7562 ins_pipe(ialu_reg_reg_fat);
7563 %}
7564
7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7590 %{
7591 match(Set dst mem);
7592
7593 ins_cost(110);
7594 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7595 ins_encode %{
7596 __ leaq($dst$$Register, $mem$$Address);
7597 %}
7598 ins_pipe(ialu_reg_reg_fat);
7599 %}
7600
7601 // Load Effective Address which uses Narrow (32-bits) oop
7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7603 %{
7604 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7605 match(Set dst mem);
7606
7607 ins_cost(110);
7608 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7609 ins_encode %{
7610 __ leaq($dst$$Register, $mem$$Address);
7611 %}
7612 ins_pipe(ialu_reg_reg_fat);
7613 %}
7614
7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7616 %{
7617 predicate(CompressedOops::shift() == 0);
7618 match(Set dst mem);
7619
7620 ins_cost(110); // XXX
7621 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7622 ins_encode %{
7623 __ leaq($dst$$Register, $mem$$Address);
7624 %}
7625 ins_pipe(ialu_reg_reg_fat);
7626 %}
7627
7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7629 %{
7630 predicate(CompressedOops::shift() == 0);
7631 match(Set dst mem);
7632
7633 ins_cost(110);
7634 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7635 ins_encode %{
7636 __ leaq($dst$$Register, $mem$$Address);
7637 %}
7638 ins_pipe(ialu_reg_reg_fat);
7639 %}
7640
7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7642 %{
7643 predicate(CompressedOops::shift() == 0);
7644 match(Set dst mem);
7645
7646 ins_cost(110);
7647 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7648 ins_encode %{
7649 __ leaq($dst$$Register, $mem$$Address);
7650 %}
7651 ins_pipe(ialu_reg_reg_fat);
7652 %}
7653
7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7655 %{
7656 predicate(CompressedOops::shift() == 0);
7657 match(Set dst mem);
7658
7659 ins_cost(110);
7660 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7661 ins_encode %{
7662 __ leaq($dst$$Register, $mem$$Address);
7663 %}
7664 ins_pipe(ialu_reg_reg_fat);
7665 %}
7666
7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7668 %{
7669 predicate(CompressedOops::shift() == 0);
7670 match(Set dst mem);
7671
7672 ins_cost(110);
7673 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7674 ins_encode %{
7675 __ leaq($dst$$Register, $mem$$Address);
7676 %}
7677 ins_pipe(ialu_reg_reg_fat);
7678 %}
7679
7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7681 %{
7682 predicate(CompressedOops::shift() == 0);
7683 match(Set dst mem);
7684
7685 ins_cost(110);
7686 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7687 ins_encode %{
7688 __ leaq($dst$$Register, $mem$$Address);
7689 %}
7690 ins_pipe(ialu_reg_reg_fat);
7691 %}
7692
7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7694 %{
7695 predicate(CompressedOops::shift() == 0);
7696 match(Set dst mem);
7697
7698 ins_cost(110);
7699 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7700 ins_encode %{
7701 __ leaq($dst$$Register, $mem$$Address);
7702 %}
7703 ins_pipe(ialu_reg_reg_fat);
7704 %}
7705
7706 instruct loadConI(rRegI dst, immI src)
7707 %{
7708 match(Set dst src);
7709
7710 format %{ "movl $dst, $src\t# int" %}
7711 ins_encode %{
7712 __ movl($dst$$Register, $src$$constant);
7713 %}
7714 ins_pipe(ialu_reg_fat); // XXX
7715 %}
7716
7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7718 %{
7719 match(Set dst src);
7720 effect(KILL cr);
7721
7722 ins_cost(50);
7723 format %{ "xorl $dst, $dst\t# int" %}
7724 ins_encode %{
7725 __ xorl($dst$$Register, $dst$$Register);
7726 %}
7727 ins_pipe(ialu_reg);
7728 %}
7729
7730 instruct loadConL(rRegL dst, immL src)
7731 %{
7732 match(Set dst src);
7733
7734 ins_cost(150);
7735 format %{ "movq $dst, $src\t# long" %}
7736 ins_encode %{
7737 __ mov64($dst$$Register, $src$$constant);
7738 %}
7739 ins_pipe(ialu_reg);
7740 %}
7741
7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7743 %{
7744 match(Set dst src);
7745 effect(KILL cr);
7746
7747 ins_cost(50);
7748 format %{ "xorl $dst, $dst\t# long" %}
7749 ins_encode %{
7750 __ xorl($dst$$Register, $dst$$Register);
7751 %}
7752 ins_pipe(ialu_reg); // XXX
7753 %}
7754
7755 instruct loadConUL32(rRegL dst, immUL32 src)
7756 %{
7757 match(Set dst src);
7758
7759 ins_cost(60);
7760 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7761 ins_encode %{
7762 __ movl($dst$$Register, $src$$constant);
7763 %}
7764 ins_pipe(ialu_reg);
7765 %}
7766
7767 instruct loadConL32(rRegL dst, immL32 src)
7768 %{
7769 match(Set dst src);
7770
7771 ins_cost(70);
7772 format %{ "movq $dst, $src\t# long (32-bit)" %}
7773 ins_encode %{
7774 __ movq($dst$$Register, $src$$constant);
7775 %}
7776 ins_pipe(ialu_reg);
7777 %}
7778
7779 instruct loadConP(rRegP dst, immP con) %{
7780 match(Set dst con);
7781
7782 format %{ "movq $dst, $con\t# ptr" %}
7783 ins_encode %{
7784 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7785 %}
7786 ins_pipe(ialu_reg_fat); // XXX
7787 %}
7788
7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7790 %{
7791 match(Set dst src);
7792 effect(KILL cr);
7793
7794 ins_cost(50);
7795 format %{ "xorl $dst, $dst\t# ptr" %}
7796 ins_encode %{
7797 __ xorl($dst$$Register, $dst$$Register);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7803 %{
7804 match(Set dst src);
7805 effect(KILL cr);
7806
7807 ins_cost(60);
7808 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7809 ins_encode %{
7810 __ movl($dst$$Register, $src$$constant);
7811 %}
7812 ins_pipe(ialu_reg);
7813 %}
7814
7815 instruct loadConF(regF dst, immF con) %{
7816 match(Set dst con);
7817 ins_cost(125);
7818 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7819 ins_encode %{
7820 __ movflt($dst$$XMMRegister, $constantaddress($con));
7821 %}
7822 ins_pipe(pipe_slow);
7823 %}
7824
7825 instruct loadConH(regF dst, immH con) %{
7826 match(Set dst con);
7827 ins_cost(125);
7828 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7829 ins_encode %{
7830 __ movflt($dst$$XMMRegister, $constantaddress($con));
7831 %}
7832 ins_pipe(pipe_slow);
7833 %}
7834
7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7836 match(Set dst src);
7837 effect(KILL cr);
7838 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7839 ins_encode %{
7840 __ xorq($dst$$Register, $dst$$Register);
7841 %}
7842 ins_pipe(ialu_reg);
7843 %}
7844
7845 instruct loadConN(rRegN dst, immN src) %{
7846 match(Set dst src);
7847
7848 ins_cost(125);
7849 format %{ "movl $dst, $src\t# compressed ptr" %}
7850 ins_encode %{
7851 address con = (address)$src$$constant;
7852 if (con == nullptr) {
7853 ShouldNotReachHere();
7854 } else {
7855 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7856 }
7857 %}
7858 ins_pipe(ialu_reg_fat); // XXX
7859 %}
7860
7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7862 match(Set dst src);
7863
7864 ins_cost(125);
7865 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7866 ins_encode %{
7867 address con = (address)$src$$constant;
7868 if (con == nullptr) {
7869 ShouldNotReachHere();
7870 } else {
7871 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7872 }
7873 %}
7874 ins_pipe(ialu_reg_fat); // XXX
7875 %}
7876
7877 instruct loadConF0(regF dst, immF0 src)
7878 %{
7879 match(Set dst src);
7880 ins_cost(100);
7881
7882 format %{ "xorps $dst, $dst\t# float 0.0" %}
7883 ins_encode %{
7884 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7885 %}
7886 ins_pipe(pipe_slow);
7887 %}
7888
7889 // Use the same format since predicate() can not be used here.
7890 instruct loadConD(regD dst, immD con) %{
7891 match(Set dst con);
7892 ins_cost(125);
7893 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7894 ins_encode %{
7895 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7896 %}
7897 ins_pipe(pipe_slow);
7898 %}
7899
7900 instruct loadConD0(regD dst, immD0 src)
7901 %{
7902 match(Set dst src);
7903 ins_cost(100);
7904
7905 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7906 ins_encode %{
7907 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7908 %}
7909 ins_pipe(pipe_slow);
7910 %}
7911
7912 instruct loadSSI(rRegI dst, stackSlotI src)
7913 %{
7914 match(Set dst src);
7915
7916 ins_cost(125);
7917 format %{ "movl $dst, $src\t# int stk" %}
7918 ins_encode %{
7919 __ movl($dst$$Register, $src$$Address);
7920 %}
7921 ins_pipe(ialu_reg_mem);
7922 %}
7923
7924 instruct loadSSL(rRegL dst, stackSlotL src)
7925 %{
7926 match(Set dst src);
7927
7928 ins_cost(125);
7929 format %{ "movq $dst, $src\t# long stk" %}
7930 ins_encode %{
7931 __ movq($dst$$Register, $src$$Address);
7932 %}
7933 ins_pipe(ialu_reg_mem);
7934 %}
7935
7936 instruct loadSSP(rRegP dst, stackSlotP src)
7937 %{
7938 match(Set dst src);
7939
7940 ins_cost(125);
7941 format %{ "movq $dst, $src\t# ptr stk" %}
7942 ins_encode %{
7943 __ movq($dst$$Register, $src$$Address);
7944 %}
7945 ins_pipe(ialu_reg_mem);
7946 %}
7947
7948 instruct loadSSF(regF dst, stackSlotF src)
7949 %{
7950 match(Set dst src);
7951
7952 ins_cost(125);
7953 format %{ "movss $dst, $src\t# float stk" %}
7954 ins_encode %{
7955 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7956 %}
7957 ins_pipe(pipe_slow); // XXX
7958 %}
7959
7960 // Use the same format since predicate() can not be used here.
7961 instruct loadSSD(regD dst, stackSlotD src)
7962 %{
7963 match(Set dst src);
7964
7965 ins_cost(125);
7966 format %{ "movsd $dst, $src\t# double stk" %}
7967 ins_encode %{
7968 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7969 %}
7970 ins_pipe(pipe_slow); // XXX
7971 %}
7972
7973 // Prefetch instructions for allocation.
7974 // Must be safe to execute with invalid address (cannot fault).
7975
7976 instruct prefetchAlloc( memory mem ) %{
7977 predicate(AllocatePrefetchInstr==3);
7978 match(PrefetchAllocation mem);
7979 ins_cost(125);
7980
7981 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7982 ins_encode %{
7983 __ prefetchw($mem$$Address);
7984 %}
7985 ins_pipe(ialu_mem);
7986 %}
7987
7988 instruct prefetchAllocNTA( memory mem ) %{
7989 predicate(AllocatePrefetchInstr==0);
7990 match(PrefetchAllocation mem);
7991 ins_cost(125);
7992
7993 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7994 ins_encode %{
7995 __ prefetchnta($mem$$Address);
7996 %}
7997 ins_pipe(ialu_mem);
7998 %}
7999
8000 instruct prefetchAllocT0( memory mem ) %{
8001 predicate(AllocatePrefetchInstr==1);
8002 match(PrefetchAllocation mem);
8003 ins_cost(125);
8004
8005 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8006 ins_encode %{
8007 __ prefetcht0($mem$$Address);
8008 %}
8009 ins_pipe(ialu_mem);
8010 %}
8011
8012 instruct prefetchAllocT2( memory mem ) %{
8013 predicate(AllocatePrefetchInstr==2);
8014 match(PrefetchAllocation mem);
8015 ins_cost(125);
8016
8017 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8018 ins_encode %{
8019 __ prefetcht2($mem$$Address);
8020 %}
8021 ins_pipe(ialu_mem);
8022 %}
8023
8024 //----------Store Instructions-------------------------------------------------
8025
8026 // Store Byte
8027 instruct storeB(memory mem, rRegI src)
8028 %{
8029 match(Set mem (StoreB mem src));
8030
8031 ins_cost(125); // XXX
8032 format %{ "movb $mem, $src\t# byte" %}
8033 ins_encode %{
8034 __ movb($mem$$Address, $src$$Register);
8035 %}
8036 ins_pipe(ialu_mem_reg);
8037 %}
8038
8039 // Store Char/Short
8040 instruct storeC(memory mem, rRegI src)
8041 %{
8042 match(Set mem (StoreC mem src));
8043
8044 ins_cost(125); // XXX
8045 format %{ "movw $mem, $src\t# char/short" %}
8046 ins_encode %{
8047 __ movw($mem$$Address, $src$$Register);
8048 %}
8049 ins_pipe(ialu_mem_reg);
8050 %}
8051
8052 // Store Integer
8053 instruct storeI(memory mem, rRegI src)
8054 %{
8055 match(Set mem (StoreI mem src));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, $src\t# int" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, $src$$Register);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 // Store Long
8066 instruct storeL(memory mem, rRegL src)
8067 %{
8068 match(Set mem (StoreL mem src));
8069
8070 ins_cost(125); // XXX
8071 format %{ "movq $mem, $src\t# long" %}
8072 ins_encode %{
8073 __ movq($mem$$Address, $src$$Register);
8074 %}
8075 ins_pipe(ialu_mem_reg); // XXX
8076 %}
8077
8078 // Store Pointer
8079 instruct storeP(memory mem, any_RegP src)
8080 %{
8081 predicate(n->as_Store()->barrier_data() == 0);
8082 match(Set mem (StoreP mem src));
8083
8084 ins_cost(125); // XXX
8085 format %{ "movq $mem, $src\t# ptr" %}
8086 ins_encode %{
8087 __ movq($mem$$Address, $src$$Register);
8088 %}
8089 ins_pipe(ialu_mem_reg);
8090 %}
8091
8092 instruct storeImmP0(memory mem, immP0 zero)
8093 %{
8094 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8095 match(Set mem (StoreP mem zero));
8096
8097 ins_cost(125); // XXX
8098 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8099 ins_encode %{
8100 __ movq($mem$$Address, r12);
8101 %}
8102 ins_pipe(ialu_mem_reg);
8103 %}
8104
8105 // Store Null Pointer, mark word, or other simple pointer constant.
8106 instruct storeImmP(memory mem, immP31 src)
8107 %{
8108 predicate(n->as_Store()->barrier_data() == 0);
8109 match(Set mem (StoreP mem src));
8110
8111 ins_cost(150); // XXX
8112 format %{ "movq $mem, $src\t# ptr" %}
8113 ins_encode %{
8114 __ movq($mem$$Address, $src$$constant);
8115 %}
8116 ins_pipe(ialu_mem_imm);
8117 %}
8118
8119 // Store Compressed Pointer
8120 instruct storeN(memory mem, rRegN src)
8121 %{
8122 predicate(n->as_Store()->barrier_data() == 0);
8123 match(Set mem (StoreN mem src));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, $src\t# compressed ptr" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, $src$$Register);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeNKlass(memory mem, rRegN src)
8134 %{
8135 match(Set mem (StoreNKlass mem src));
8136
8137 ins_cost(125); // XXX
8138 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8139 ins_encode %{
8140 __ movl($mem$$Address, $src$$Register);
8141 %}
8142 ins_pipe(ialu_mem_reg);
8143 %}
8144
8145 instruct storeImmN0(memory mem, immN0 zero)
8146 %{
8147 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8148 match(Set mem (StoreN mem zero));
8149
8150 ins_cost(125); // XXX
8151 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8152 ins_encode %{
8153 __ movl($mem$$Address, r12);
8154 %}
8155 ins_pipe(ialu_mem_reg);
8156 %}
8157
8158 instruct storeImmN(memory mem, immN src)
8159 %{
8160 predicate(n->as_Store()->barrier_data() == 0);
8161 match(Set mem (StoreN mem src));
8162
8163 ins_cost(150); // XXX
8164 format %{ "movl $mem, $src\t# compressed ptr" %}
8165 ins_encode %{
8166 address con = (address)$src$$constant;
8167 if (con == nullptr) {
8168 __ movl($mem$$Address, 0);
8169 } else {
8170 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8171 }
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 instruct storeImmNKlass(memory mem, immNKlass src)
8177 %{
8178 match(Set mem (StoreNKlass mem src));
8179
8180 ins_cost(150); // XXX
8181 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8182 ins_encode %{
8183 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8184 %}
8185 ins_pipe(ialu_mem_imm);
8186 %}
8187
8188 // Store Integer Immediate
8189 instruct storeImmI0(memory mem, immI_0 zero)
8190 %{
8191 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8192 match(Set mem (StoreI mem zero));
8193
8194 ins_cost(125); // XXX
8195 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, r12);
8198 %}
8199 ins_pipe(ialu_mem_reg);
8200 %}
8201
8202 instruct storeImmI(memory mem, immI src)
8203 %{
8204 match(Set mem (StoreI mem src));
8205
8206 ins_cost(150);
8207 format %{ "movl $mem, $src\t# int" %}
8208 ins_encode %{
8209 __ movl($mem$$Address, $src$$constant);
8210 %}
8211 ins_pipe(ialu_mem_imm);
8212 %}
8213
8214 // Store Long Immediate
8215 instruct storeImmL0(memory mem, immL0 zero)
8216 %{
8217 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8218 match(Set mem (StoreL mem zero));
8219
8220 ins_cost(125); // XXX
8221 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8222 ins_encode %{
8223 __ movq($mem$$Address, r12);
8224 %}
8225 ins_pipe(ialu_mem_reg);
8226 %}
8227
8228 instruct storeImmL(memory mem, immL32 src)
8229 %{
8230 match(Set mem (StoreL mem src));
8231
8232 ins_cost(150);
8233 format %{ "movq $mem, $src\t# long" %}
8234 ins_encode %{
8235 __ movq($mem$$Address, $src$$constant);
8236 %}
8237 ins_pipe(ialu_mem_imm);
8238 %}
8239
8240 // Store Short/Char Immediate
8241 instruct storeImmC0(memory mem, immI_0 zero)
8242 %{
8243 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8244 match(Set mem (StoreC mem zero));
8245
8246 ins_cost(125); // XXX
8247 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8248 ins_encode %{
8249 __ movw($mem$$Address, r12);
8250 %}
8251 ins_pipe(ialu_mem_reg);
8252 %}
8253
8254 instruct storeImmI16(memory mem, immI16 src)
8255 %{
8256 predicate(UseStoreImmI16);
8257 match(Set mem (StoreC mem src));
8258
8259 ins_cost(150);
8260 format %{ "movw $mem, $src\t# short/char" %}
8261 ins_encode %{
8262 __ movw($mem$$Address, $src$$constant);
8263 %}
8264 ins_pipe(ialu_mem_imm);
8265 %}
8266
8267 // Store Byte Immediate
8268 instruct storeImmB0(memory mem, immI_0 zero)
8269 %{
8270 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8271 match(Set mem (StoreB mem zero));
8272
8273 ins_cost(125); // XXX
8274 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8275 ins_encode %{
8276 __ movb($mem$$Address, r12);
8277 %}
8278 ins_pipe(ialu_mem_reg);
8279 %}
8280
8281 instruct storeImmB(memory mem, immI8 src)
8282 %{
8283 match(Set mem (StoreB mem src));
8284
8285 ins_cost(150); // XXX
8286 format %{ "movb $mem, $src\t# byte" %}
8287 ins_encode %{
8288 __ movb($mem$$Address, $src$$constant);
8289 %}
8290 ins_pipe(ialu_mem_imm);
8291 %}
8292
8293 // Store Float
8294 instruct storeF(memory mem, regF src)
8295 %{
8296 match(Set mem (StoreF mem src));
8297
8298 ins_cost(95); // XXX
8299 format %{ "movss $mem, $src\t# float" %}
8300 ins_encode %{
8301 __ movflt($mem$$Address, $src$$XMMRegister);
8302 %}
8303 ins_pipe(pipe_slow); // XXX
8304 %}
8305
8306 // Store immediate Float value (it is faster than store from XMM register)
8307 instruct storeF0(memory mem, immF0 zero)
8308 %{
8309 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8310 match(Set mem (StoreF mem zero));
8311
8312 ins_cost(25); // XXX
8313 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8314 ins_encode %{
8315 __ movl($mem$$Address, r12);
8316 %}
8317 ins_pipe(ialu_mem_reg);
8318 %}
8319
8320 instruct storeF_imm(memory mem, immF src)
8321 %{
8322 match(Set mem (StoreF mem src));
8323
8324 ins_cost(50);
8325 format %{ "movl $mem, $src\t# float" %}
8326 ins_encode %{
8327 __ movl($mem$$Address, jint_cast($src$$constant));
8328 %}
8329 ins_pipe(ialu_mem_imm);
8330 %}
8331
8332 // Store Double
8333 instruct storeD(memory mem, regD src)
8334 %{
8335 match(Set mem (StoreD mem src));
8336
8337 ins_cost(95); // XXX
8338 format %{ "movsd $mem, $src\t# double" %}
8339 ins_encode %{
8340 __ movdbl($mem$$Address, $src$$XMMRegister);
8341 %}
8342 ins_pipe(pipe_slow); // XXX
8343 %}
8344
8345 // Store immediate double 0.0 (it is faster than store from XMM register)
8346 instruct storeD0_imm(memory mem, immD0 src)
8347 %{
8348 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8349 match(Set mem (StoreD mem src));
8350
8351 ins_cost(50);
8352 format %{ "movq $mem, $src\t# double 0." %}
8353 ins_encode %{
8354 __ movq($mem$$Address, $src$$constant);
8355 %}
8356 ins_pipe(ialu_mem_imm);
8357 %}
8358
8359 instruct storeD0(memory mem, immD0 zero)
8360 %{
8361 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8362 match(Set mem (StoreD mem zero));
8363
8364 ins_cost(25); // XXX
8365 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8366 ins_encode %{
8367 __ movq($mem$$Address, r12);
8368 %}
8369 ins_pipe(ialu_mem_reg);
8370 %}
8371
8372 instruct storeSSI(stackSlotI dst, rRegI src)
8373 %{
8374 match(Set dst src);
8375
8376 ins_cost(100);
8377 format %{ "movl $dst, $src\t# int stk" %}
8378 ins_encode %{
8379 __ movl($dst$$Address, $src$$Register);
8380 %}
8381 ins_pipe( ialu_mem_reg );
8382 %}
8383
8384 instruct storeSSL(stackSlotL dst, rRegL src)
8385 %{
8386 match(Set dst src);
8387
8388 ins_cost(100);
8389 format %{ "movq $dst, $src\t# long stk" %}
8390 ins_encode %{
8391 __ movq($dst$$Address, $src$$Register);
8392 %}
8393 ins_pipe(ialu_mem_reg);
8394 %}
8395
8396 instruct storeSSP(stackSlotP dst, rRegP src)
8397 %{
8398 match(Set dst src);
8399
8400 ins_cost(100);
8401 format %{ "movq $dst, $src\t# ptr stk" %}
8402 ins_encode %{
8403 __ movq($dst$$Address, $src$$Register);
8404 %}
8405 ins_pipe(ialu_mem_reg);
8406 %}
8407
8408 instruct storeSSF(stackSlotF dst, regF src)
8409 %{
8410 match(Set dst src);
8411
8412 ins_cost(95); // XXX
8413 format %{ "movss $dst, $src\t# float stk" %}
8414 ins_encode %{
8415 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8416 %}
8417 ins_pipe(pipe_slow); // XXX
8418 %}
8419
8420 instruct storeSSD(stackSlotD dst, regD src)
8421 %{
8422 match(Set dst src);
8423
8424 ins_cost(95); // XXX
8425 format %{ "movsd $dst, $src\t# double stk" %}
8426 ins_encode %{
8427 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8428 %}
8429 ins_pipe(pipe_slow); // XXX
8430 %}
8431
8432 instruct cacheWB(indirect addr)
8433 %{
8434 predicate(VM_Version::supports_data_cache_line_flush());
8435 match(CacheWB addr);
8436
8437 ins_cost(100);
8438 format %{"cache wb $addr" %}
8439 ins_encode %{
8440 assert($addr->index_position() < 0, "should be");
8441 assert($addr$$disp == 0, "should be");
8442 __ cache_wb(Address($addr$$base$$Register, 0));
8443 %}
8444 ins_pipe(pipe_slow); // XXX
8445 %}
8446
8447 instruct cacheWBPreSync()
8448 %{
8449 predicate(VM_Version::supports_data_cache_line_flush());
8450 match(CacheWBPreSync);
8451
8452 ins_cost(100);
8453 format %{"cache wb presync" %}
8454 ins_encode %{
8455 __ cache_wbsync(true);
8456 %}
8457 ins_pipe(pipe_slow); // XXX
8458 %}
8459
8460 instruct cacheWBPostSync()
8461 %{
8462 predicate(VM_Version::supports_data_cache_line_flush());
8463 match(CacheWBPostSync);
8464
8465 ins_cost(100);
8466 format %{"cache wb postsync" %}
8467 ins_encode %{
8468 __ cache_wbsync(false);
8469 %}
8470 ins_pipe(pipe_slow); // XXX
8471 %}
8472
8473 //----------BSWAP Instructions-------------------------------------------------
8474 instruct bytes_reverse_int(rRegI dst) %{
8475 match(Set dst (ReverseBytesI dst));
8476
8477 format %{ "bswapl $dst" %}
8478 ins_encode %{
8479 __ bswapl($dst$$Register);
8480 %}
8481 ins_pipe( ialu_reg );
8482 %}
8483
8484 instruct bytes_reverse_long(rRegL dst) %{
8485 match(Set dst (ReverseBytesL dst));
8486
8487 format %{ "bswapq $dst" %}
8488 ins_encode %{
8489 __ bswapq($dst$$Register);
8490 %}
8491 ins_pipe( ialu_reg);
8492 %}
8493
8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8495 match(Set dst (ReverseBytesUS dst));
8496 effect(KILL cr);
8497
8498 format %{ "bswapl $dst\n\t"
8499 "shrl $dst,16\n\t" %}
8500 ins_encode %{
8501 __ bswapl($dst$$Register);
8502 __ shrl($dst$$Register, 16);
8503 %}
8504 ins_pipe( ialu_reg );
8505 %}
8506
8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8508 match(Set dst (ReverseBytesS dst));
8509 effect(KILL cr);
8510
8511 format %{ "bswapl $dst\n\t"
8512 "sar $dst,16\n\t" %}
8513 ins_encode %{
8514 __ bswapl($dst$$Register);
8515 __ sarl($dst$$Register, 16);
8516 %}
8517 ins_pipe( ialu_reg );
8518 %}
8519
8520 //---------- Zeros Count Instructions ------------------------------------------
8521
8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8523 predicate(UseCountLeadingZerosInstruction);
8524 match(Set dst (CountLeadingZerosI src));
8525 effect(KILL cr);
8526
8527 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8528 ins_encode %{
8529 __ lzcntl($dst$$Register, $src$$Register);
8530 %}
8531 ins_pipe(ialu_reg);
8532 %}
8533
8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8535 predicate(UseCountLeadingZerosInstruction);
8536 match(Set dst (CountLeadingZerosI (LoadI src)));
8537 effect(KILL cr);
8538 ins_cost(175);
8539 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8540 ins_encode %{
8541 __ lzcntl($dst$$Register, $src$$Address);
8542 %}
8543 ins_pipe(ialu_reg_mem);
8544 %}
8545
8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8547 predicate(!UseCountLeadingZerosInstruction);
8548 match(Set dst (CountLeadingZerosI src));
8549 effect(KILL cr);
8550
8551 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8552 "jnz skip\n\t"
8553 "movl $dst, -1\n"
8554 "skip:\n\t"
8555 "negl $dst\n\t"
8556 "addl $dst, 31" %}
8557 ins_encode %{
8558 Register Rdst = $dst$$Register;
8559 Register Rsrc = $src$$Register;
8560 Label skip;
8561 __ bsrl(Rdst, Rsrc);
8562 __ jccb(Assembler::notZero, skip);
8563 __ movl(Rdst, -1);
8564 __ bind(skip);
8565 __ negl(Rdst);
8566 __ addl(Rdst, BitsPerInt - 1);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosL src));
8574 effect(KILL cr);
8575
8576 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8577 ins_encode %{
8578 __ lzcntq($dst$$Register, $src$$Register);
8579 %}
8580 ins_pipe(ialu_reg);
8581 %}
8582
8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8584 predicate(UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosL (LoadL src)));
8586 effect(KILL cr);
8587 ins_cost(175);
8588 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8589 ins_encode %{
8590 __ lzcntq($dst$$Register, $src$$Address);
8591 %}
8592 ins_pipe(ialu_reg_mem);
8593 %}
8594
8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8596 predicate(!UseCountLeadingZerosInstruction);
8597 match(Set dst (CountLeadingZerosL src));
8598 effect(KILL cr);
8599
8600 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8601 "jnz skip\n\t"
8602 "movl $dst, -1\n"
8603 "skip:\n\t"
8604 "negl $dst\n\t"
8605 "addl $dst, 63" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Register Rsrc = $src$$Register;
8609 Label skip;
8610 __ bsrq(Rdst, Rsrc);
8611 __ jccb(Assembler::notZero, skip);
8612 __ movl(Rdst, -1);
8613 __ bind(skip);
8614 __ negl(Rdst);
8615 __ addl(Rdst, BitsPerLong - 1);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8621 predicate(UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosI src));
8623 effect(KILL cr);
8624
8625 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8626 ins_encode %{
8627 __ tzcntl($dst$$Register, $src$$Register);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8633 predicate(UseCountTrailingZerosInstruction);
8634 match(Set dst (CountTrailingZerosI (LoadI src)));
8635 effect(KILL cr);
8636 ins_cost(175);
8637 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8638 ins_encode %{
8639 __ tzcntl($dst$$Register, $src$$Address);
8640 %}
8641 ins_pipe(ialu_reg_mem);
8642 %}
8643
8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8645 predicate(!UseCountTrailingZerosInstruction);
8646 match(Set dst (CountTrailingZerosI src));
8647 effect(KILL cr);
8648
8649 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8650 "jnz done\n\t"
8651 "movl $dst, 32\n"
8652 "done:" %}
8653 ins_encode %{
8654 Register Rdst = $dst$$Register;
8655 Label done;
8656 __ bsfl(Rdst, $src$$Register);
8657 __ jccb(Assembler::notZero, done);
8658 __ movl(Rdst, BitsPerInt);
8659 __ bind(done);
8660 %}
8661 ins_pipe(ialu_reg);
8662 %}
8663
8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8665 predicate(UseCountTrailingZerosInstruction);
8666 match(Set dst (CountTrailingZerosL src));
8667 effect(KILL cr);
8668
8669 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8670 ins_encode %{
8671 __ tzcntq($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8677 predicate(UseCountTrailingZerosInstruction);
8678 match(Set dst (CountTrailingZerosL (LoadL src)));
8679 effect(KILL cr);
8680 ins_cost(175);
8681 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8682 ins_encode %{
8683 __ tzcntq($dst$$Register, $src$$Address);
8684 %}
8685 ins_pipe(ialu_reg_mem);
8686 %}
8687
8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8689 predicate(!UseCountTrailingZerosInstruction);
8690 match(Set dst (CountTrailingZerosL src));
8691 effect(KILL cr);
8692
8693 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8694 "jnz done\n\t"
8695 "movl $dst, 64\n"
8696 "done:" %}
8697 ins_encode %{
8698 Register Rdst = $dst$$Register;
8699 Label done;
8700 __ bsfq(Rdst, $src$$Register);
8701 __ jccb(Assembler::notZero, done);
8702 __ movl(Rdst, BitsPerLong);
8703 __ bind(done);
8704 %}
8705 ins_pipe(ialu_reg);
8706 %}
8707
8708 //--------------- Reverse Operation Instructions ----------------
8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8710 predicate(!VM_Version::supports_gfni());
8711 match(Set dst (ReverseI src));
8712 effect(TEMP dst, TEMP rtmp, KILL cr);
8713 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8714 ins_encode %{
8715 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8716 %}
8717 ins_pipe( ialu_reg );
8718 %}
8719
8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8721 predicate(VM_Version::supports_gfni());
8722 match(Set dst (ReverseI src));
8723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8724 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8725 ins_encode %{
8726 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8727 %}
8728 ins_pipe( ialu_reg );
8729 %}
8730
8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8732 predicate(!VM_Version::supports_gfni());
8733 match(Set dst (ReverseL src));
8734 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8735 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8736 ins_encode %{
8737 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8738 %}
8739 ins_pipe( ialu_reg );
8740 %}
8741
8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8743 predicate(VM_Version::supports_gfni());
8744 match(Set dst (ReverseL src));
8745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8746 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8747 ins_encode %{
8748 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8749 %}
8750 ins_pipe( ialu_reg );
8751 %}
8752
8753 //---------- Population Count Instructions -------------------------------------
8754
8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8756 predicate(UsePopCountInstruction);
8757 match(Set dst (PopCountI src));
8758 effect(KILL cr);
8759
8760 format %{ "popcnt $dst, $src" %}
8761 ins_encode %{
8762 __ popcntl($dst$$Register, $src$$Register);
8763 %}
8764 ins_pipe(ialu_reg);
8765 %}
8766
8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8768 predicate(UsePopCountInstruction);
8769 match(Set dst (PopCountI (LoadI mem)));
8770 effect(KILL cr);
8771
8772 format %{ "popcnt $dst, $mem" %}
8773 ins_encode %{
8774 __ popcntl($dst$$Register, $mem$$Address);
8775 %}
8776 ins_pipe(ialu_reg);
8777 %}
8778
8779 // Note: Long.bitCount(long) returns an int.
8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8781 predicate(UsePopCountInstruction);
8782 match(Set dst (PopCountL src));
8783 effect(KILL cr);
8784
8785 format %{ "popcnt $dst, $src" %}
8786 ins_encode %{
8787 __ popcntq($dst$$Register, $src$$Register);
8788 %}
8789 ins_pipe(ialu_reg);
8790 %}
8791
8792 // Note: Long.bitCount(long) returns an int.
8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8794 predicate(UsePopCountInstruction);
8795 match(Set dst (PopCountL (LoadL mem)));
8796 effect(KILL cr);
8797
8798 format %{ "popcnt $dst, $mem" %}
8799 ins_encode %{
8800 __ popcntq($dst$$Register, $mem$$Address);
8801 %}
8802 ins_pipe(ialu_reg);
8803 %}
8804
8805
8806 //----------MemBar Instructions-----------------------------------------------
8807 // Memory barrier flavors
8808
8809 instruct membar_acquire()
8810 %{
8811 match(MemBarAcquire);
8812 match(LoadFence);
8813 ins_cost(0);
8814
8815 size(0);
8816 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8817 ins_encode();
8818 ins_pipe(empty);
8819 %}
8820
8821 instruct membar_acquire_lock()
8822 %{
8823 match(MemBarAcquireLock);
8824 ins_cost(0);
8825
8826 size(0);
8827 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8828 ins_encode();
8829 ins_pipe(empty);
8830 %}
8831
8832 instruct membar_release()
8833 %{
8834 match(MemBarRelease);
8835 match(StoreFence);
8836 ins_cost(0);
8837
8838 size(0);
8839 format %{ "MEMBAR-release ! (empty encoding)" %}
8840 ins_encode();
8841 ins_pipe(empty);
8842 %}
8843
8844 instruct membar_release_lock()
8845 %{
8846 match(MemBarReleaseLock);
8847 ins_cost(0);
8848
8849 size(0);
8850 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8851 ins_encode();
8852 ins_pipe(empty);
8853 %}
8854
8855 instruct membar_storeload(rFlagsReg cr) %{
8856 match(MemBarStoreLoad);
8857 effect(KILL cr);
8858 ins_cost(400);
8859
8860 format %{
8861 $$template
8862 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8863 %}
8864 ins_encode %{
8865 __ membar(Assembler::StoreLoad);
8866 %}
8867 ins_pipe(pipe_slow);
8868 %}
8869
8870 instruct membar_volatile(rFlagsReg cr) %{
8871 match(MemBarVolatile);
8872 effect(KILL cr);
8873 ins_cost(400);
8874
8875 format %{
8876 $$template
8877 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8878 %}
8879 ins_encode %{
8880 __ membar(Assembler::StoreLoad);
8881 %}
8882 ins_pipe(pipe_slow);
8883 %}
8884
8885 instruct unnecessary_membar_volatile()
8886 %{
8887 match(MemBarVolatile);
8888 predicate(Matcher::post_store_load_barrier(n));
8889 ins_cost(0);
8890
8891 size(0);
8892 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8893 ins_encode();
8894 ins_pipe(empty);
8895 %}
8896
8897 instruct membar_full(rFlagsReg cr) %{
8898 match(MemBarFull);
8899 effect(KILL cr);
8900 ins_cost(400);
8901
8902 format %{
8903 $$template
8904 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8905 %}
8906 ins_encode %{
8907 __ membar(Assembler::StoreLoad);
8908 %}
8909 ins_pipe(pipe_slow);
8910 %}
8911
8912 instruct membar_storestore() %{
8913 match(MemBarStoreStore);
8914 match(StoreStoreFence);
8915 ins_cost(0);
8916
8917 size(0);
8918 format %{ "MEMBAR-storestore (empty encoding)" %}
8919 ins_encode( );
8920 ins_pipe(empty);
8921 %}
8922
8923 //----------Move Instructions--------------------------------------------------
8924
8925 instruct castX2P(rRegP dst, rRegL src)
8926 %{
8927 match(Set dst (CastX2P src));
8928
8929 format %{ "movq $dst, $src\t# long->ptr" %}
8930 ins_encode %{
8931 if ($dst$$reg != $src$$reg) {
8932 __ movptr($dst$$Register, $src$$Register);
8933 }
8934 %}
8935 ins_pipe(ialu_reg_reg); // XXX
8936 %}
8937
8938 instruct castP2X(rRegL dst, rRegP src)
8939 %{
8940 match(Set dst (CastP2X src));
8941
8942 format %{ "movq $dst, $src\t# ptr -> long" %}
8943 ins_encode %{
8944 if ($dst$$reg != $src$$reg) {
8945 __ movptr($dst$$Register, $src$$Register);
8946 }
8947 %}
8948 ins_pipe(ialu_reg_reg); // XXX
8949 %}
8950
8951 // Convert oop into int for vectors alignment masking
8952 instruct convP2I(rRegI dst, rRegP src)
8953 %{
8954 match(Set dst (ConvL2I (CastP2X src)));
8955
8956 format %{ "movl $dst, $src\t# ptr -> int" %}
8957 ins_encode %{
8958 __ movl($dst$$Register, $src$$Register);
8959 %}
8960 ins_pipe(ialu_reg_reg); // XXX
8961 %}
8962
8963 // Convert compressed oop into int for vectors alignment masking
8964 // in case of 32bit oops (heap < 4Gb).
8965 instruct convN2I(rRegI dst, rRegN src)
8966 %{
8967 predicate(CompressedOops::shift() == 0);
8968 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8969
8970 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8971 ins_encode %{
8972 __ movl($dst$$Register, $src$$Register);
8973 %}
8974 ins_pipe(ialu_reg_reg); // XXX
8975 %}
8976
8977 // Convert oop pointer into compressed form
8978 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8979 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8980 match(Set dst (EncodeP src));
8981 effect(KILL cr);
8982 format %{ "encode_heap_oop $dst,$src" %}
8983 ins_encode %{
8984 Register s = $src$$Register;
8985 Register d = $dst$$Register;
8986 if (s != d) {
8987 __ movq(d, s);
8988 }
8989 __ encode_heap_oop(d);
8990 %}
8991 ins_pipe(ialu_reg_long);
8992 %}
8993
8994 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8995 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8996 match(Set dst (EncodeP src));
8997 effect(KILL cr);
8998 format %{ "encode_heap_oop_not_null $dst,$src" %}
8999 ins_encode %{
9000 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9001 %}
9002 ins_pipe(ialu_reg_long);
9003 %}
9004
9005 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9006 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9007 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9008 match(Set dst (DecodeN src));
9009 effect(KILL cr);
9010 format %{ "decode_heap_oop $dst,$src" %}
9011 ins_encode %{
9012 Register s = $src$$Register;
9013 Register d = $dst$$Register;
9014 if (s != d) {
9015 __ movq(d, s);
9016 }
9017 __ decode_heap_oop(d);
9018 %}
9019 ins_pipe(ialu_reg_long);
9020 %}
9021
9022 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9023 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9024 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9025 match(Set dst (DecodeN src));
9026 effect(KILL cr);
9027 format %{ "decode_heap_oop_not_null $dst,$src" %}
9028 ins_encode %{
9029 Register s = $src$$Register;
9030 Register d = $dst$$Register;
9031 if (s != d) {
9032 __ decode_heap_oop_not_null(d, s);
9033 } else {
9034 __ decode_heap_oop_not_null(d);
9035 }
9036 %}
9037 ins_pipe(ialu_reg_long);
9038 %}
9039
9040 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9041 match(Set dst (EncodePKlass src));
9042 effect(TEMP dst, KILL cr);
9043 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9044 ins_encode %{
9045 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9046 %}
9047 ins_pipe(ialu_reg_long);
9048 %}
9049
9050 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9051 match(Set dst (DecodeNKlass src));
9052 effect(TEMP dst, KILL cr);
9053 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9054 ins_encode %{
9055 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9056 %}
9057 ins_pipe(ialu_reg_long);
9058 %}
9059
9060 //----------Conditional Move---------------------------------------------------
9061 // Jump
9062 // dummy instruction for generating temp registers
9063 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9064 match(Jump (LShiftL switch_val shift));
9065 ins_cost(350);
9066 predicate(false);
9067 effect(TEMP dest);
9068
9069 format %{ "leaq $dest, [$constantaddress]\n\t"
9070 "jmp [$dest + $switch_val << $shift]\n\t" %}
9071 ins_encode %{
9072 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9073 // to do that and the compiler is using that register as one it can allocate.
9074 // So we build it all by hand.
9075 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9076 // ArrayAddress dispatch(table, index);
9077 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9078 __ lea($dest$$Register, $constantaddress);
9079 __ jmp(dispatch);
9080 %}
9081 ins_pipe(pipe_jmp);
9082 %}
9083
9084 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9085 match(Jump (AddL (LShiftL switch_val shift) offset));
9086 ins_cost(350);
9087 effect(TEMP dest);
9088
9089 format %{ "leaq $dest, [$constantaddress]\n\t"
9090 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9091 ins_encode %{
9092 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9093 // to do that and the compiler is using that register as one it can allocate.
9094 // So we build it all by hand.
9095 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9096 // ArrayAddress dispatch(table, index);
9097 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9098 __ lea($dest$$Register, $constantaddress);
9099 __ jmp(dispatch);
9100 %}
9101 ins_pipe(pipe_jmp);
9102 %}
9103
9104 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9105 match(Jump switch_val);
9106 ins_cost(350);
9107 effect(TEMP dest);
9108
9109 format %{ "leaq $dest, [$constantaddress]\n\t"
9110 "jmp [$dest + $switch_val]\n\t" %}
9111 ins_encode %{
9112 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9113 // to do that and the compiler is using that register as one it can allocate.
9114 // So we build it all by hand.
9115 // Address index(noreg, switch_reg, Address::times_1);
9116 // ArrayAddress dispatch(table, index);
9117 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9118 __ lea($dest$$Register, $constantaddress);
9119 __ jmp(dispatch);
9120 %}
9121 ins_pipe(pipe_jmp);
9122 %}
9123
9124 // Conditional move
9125 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9126 %{
9127 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9128 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9129
9130 ins_cost(100); // XXX
9131 format %{ "setbn$cop $dst\t# signed, int" %}
9132 ins_encode %{
9133 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9134 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9135 %}
9136 ins_pipe(ialu_reg);
9137 %}
9138
9139 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9140 %{
9141 predicate(!UseAPX);
9142 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9143
9144 ins_cost(200); // XXX
9145 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9146 ins_encode %{
9147 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9148 %}
9149 ins_pipe(pipe_cmov_reg);
9150 %}
9151
9152 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9153 %{
9154 predicate(UseAPX);
9155 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9156
9157 ins_cost(200);
9158 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9159 ins_encode %{
9160 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9161 %}
9162 ins_pipe(pipe_cmov_reg);
9163 %}
9164
9165 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9166 %{
9167 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9168 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9169
9170 ins_cost(100); // XXX
9171 format %{ "setbn$cop $dst\t# unsigned, int" %}
9172 ins_encode %{
9173 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9174 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9175 %}
9176 ins_pipe(ialu_reg);
9177 %}
9178
9179 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9180 predicate(!UseAPX);
9181 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9182
9183 ins_cost(200); // XXX
9184 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9185 ins_encode %{
9186 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9187 %}
9188 ins_pipe(pipe_cmov_reg);
9189 %}
9190
9191 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9192 predicate(UseAPX);
9193 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9194
9195 ins_cost(200);
9196 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9197 ins_encode %{
9198 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9199 %}
9200 ins_pipe(pipe_cmov_reg);
9201 %}
9202
9203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9204 %{
9205 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9206 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9207
9208 ins_cost(100); // XXX
9209 format %{ "setbn$cop $dst\t# unsigned, int" %}
9210 ins_encode %{
9211 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9212 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9213 %}
9214 ins_pipe(ialu_reg);
9215 %}
9216
9217 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9218 %{
9219 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9220 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9221
9222 ins_cost(100); // XXX
9223 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9224 ins_encode %{
9225 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9226 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9227 %}
9228 ins_pipe(ialu_reg);
9229 %}
9230
9231 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9232 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9233
9234 ins_cost(200);
9235 expand %{
9236 cmovI_regU(cop, cr, dst, src);
9237 %}
9238 %}
9239
9240 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9241 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9242
9243 ins_cost(200);
9244 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9245 ins_encode %{
9246 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9247 %}
9248 ins_pipe(pipe_cmov_reg);
9249 %}
9250
9251 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9252 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9253 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9254
9255 ins_cost(200); // XXX
9256 format %{ "cmovpl $dst, $src\n\t"
9257 "cmovnel $dst, $src" %}
9258 ins_encode %{
9259 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9260 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9261 %}
9262 ins_pipe(pipe_cmov_reg);
9263 %}
9264
9265 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9266 // inputs of the CMove
9267 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9268 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9269 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9270 effect(TEMP dst);
9271
9272 ins_cost(200); // XXX
9273 format %{ "cmovpl $dst, $src\n\t"
9274 "cmovnel $dst, $src" %}
9275 ins_encode %{
9276 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9277 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9278 %}
9279 ins_pipe(pipe_cmov_reg);
9280 %}
9281
9282 // Conditional move
9283 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9284 predicate(!UseAPX);
9285 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9286
9287 ins_cost(250); // XXX
9288 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9289 ins_encode %{
9290 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9291 %}
9292 ins_pipe(pipe_cmov_mem);
9293 %}
9294
9295 // Conditional move
9296 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9297 %{
9298 predicate(UseAPX);
9299 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9300
9301 ins_cost(250);
9302 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9303 ins_encode %{
9304 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9305 %}
9306 ins_pipe(pipe_cmov_mem);
9307 %}
9308
9309 // Conditional move
9310 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9311 %{
9312 predicate(!UseAPX);
9313 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9314
9315 ins_cost(250); // XXX
9316 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9317 ins_encode %{
9318 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9319 %}
9320 ins_pipe(pipe_cmov_mem);
9321 %}
9322
9323 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9324 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9325
9326 ins_cost(250);
9327 expand %{
9328 cmovI_memU(cop, cr, dst, src);
9329 %}
9330 %}
9331
9332 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9333 %{
9334 predicate(UseAPX);
9335 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9336
9337 ins_cost(250);
9338 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9339 ins_encode %{
9340 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9341 %}
9342 ins_pipe(pipe_cmov_mem);
9343 %}
9344
9345 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9346 %{
9347 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9348
9349 ins_cost(250);
9350 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9351 ins_encode %{
9352 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9353 %}
9354 ins_pipe(pipe_cmov_mem);
9355 %}
9356
9357 // Conditional move
9358 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9359 %{
9360 predicate(!UseAPX);
9361 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9362
9363 ins_cost(200); // XXX
9364 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9365 ins_encode %{
9366 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9367 %}
9368 ins_pipe(pipe_cmov_reg);
9369 %}
9370
9371 // Conditional move ndd
9372 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9373 %{
9374 predicate(UseAPX);
9375 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9376
9377 ins_cost(200);
9378 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9379 ins_encode %{
9380 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9381 %}
9382 ins_pipe(pipe_cmov_reg);
9383 %}
9384
9385 // Conditional move
9386 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9387 %{
9388 predicate(!UseAPX);
9389 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9390
9391 ins_cost(200); // XXX
9392 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9393 ins_encode %{
9394 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9395 %}
9396 ins_pipe(pipe_cmov_reg);
9397 %}
9398
9399 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9400 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9401
9402 ins_cost(200);
9403 expand %{
9404 cmovN_regU(cop, cr, dst, src);
9405 %}
9406 %}
9407
9408 // Conditional move ndd
9409 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9410 %{
9411 predicate(UseAPX);
9412 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9413
9414 ins_cost(200);
9415 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9416 ins_encode %{
9417 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9418 %}
9419 ins_pipe(pipe_cmov_reg);
9420 %}
9421
9422 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9423 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9424
9425 ins_cost(200);
9426 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9427 ins_encode %{
9428 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9429 %}
9430 ins_pipe(pipe_cmov_reg);
9431 %}
9432
9433 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9434 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9435 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9436
9437 ins_cost(200); // XXX
9438 format %{ "cmovpl $dst, $src\n\t"
9439 "cmovnel $dst, $src" %}
9440 ins_encode %{
9441 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9442 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9443 %}
9444 ins_pipe(pipe_cmov_reg);
9445 %}
9446
9447 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9448 // inputs of the CMove
9449 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9450 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9451 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9452
9453 ins_cost(200); // XXX
9454 format %{ "cmovpl $dst, $src\n\t"
9455 "cmovnel $dst, $src" %}
9456 ins_encode %{
9457 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9458 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9459 %}
9460 ins_pipe(pipe_cmov_reg);
9461 %}
9462
9463 // Conditional move
9464 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9465 %{
9466 predicate(!UseAPX);
9467 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9468
9469 ins_cost(200); // XXX
9470 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9471 ins_encode %{
9472 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9473 %}
9474 ins_pipe(pipe_cmov_reg); // XXX
9475 %}
9476
9477 // Conditional move ndd
9478 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9479 %{
9480 predicate(UseAPX);
9481 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9482
9483 ins_cost(200);
9484 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9485 ins_encode %{
9486 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9487 %}
9488 ins_pipe(pipe_cmov_reg);
9489 %}
9490
9491 // Conditional move
9492 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9493 %{
9494 predicate(!UseAPX);
9495 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9496
9497 ins_cost(200); // XXX
9498 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9499 ins_encode %{
9500 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9501 %}
9502 ins_pipe(pipe_cmov_reg); // XXX
9503 %}
9504
9505 // Conditional move ndd
9506 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9507 %{
9508 predicate(UseAPX);
9509 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9510
9511 ins_cost(200);
9512 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9513 ins_encode %{
9514 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9515 %}
9516 ins_pipe(pipe_cmov_reg);
9517 %}
9518
9519 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9520 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9521
9522 ins_cost(200);
9523 expand %{
9524 cmovP_regU(cop, cr, dst, src);
9525 %}
9526 %}
9527
9528 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9529 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9530
9531 ins_cost(200);
9532 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9533 ins_encode %{
9534 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9535 %}
9536 ins_pipe(pipe_cmov_reg);
9537 %}
9538
9539 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9540 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9541 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9542
9543 ins_cost(200); // XXX
9544 format %{ "cmovpq $dst, $src\n\t"
9545 "cmovneq $dst, $src" %}
9546 ins_encode %{
9547 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9548 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9549 %}
9550 ins_pipe(pipe_cmov_reg);
9551 %}
9552
9553 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9554 // inputs of the CMove
9555 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9556 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9557 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9558
9559 ins_cost(200); // XXX
9560 format %{ "cmovpq $dst, $src\n\t"
9561 "cmovneq $dst, $src" %}
9562 ins_encode %{
9563 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9564 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9565 %}
9566 ins_pipe(pipe_cmov_reg);
9567 %}
9568
9569 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9570 %{
9571 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9572 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9573
9574 ins_cost(100); // XXX
9575 format %{ "setbn$cop $dst\t# signed, long" %}
9576 ins_encode %{
9577 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9578 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9579 %}
9580 ins_pipe(ialu_reg);
9581 %}
9582
9583 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9584 %{
9585 predicate(!UseAPX);
9586 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9587
9588 ins_cost(200); // XXX
9589 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9590 ins_encode %{
9591 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9592 %}
9593 ins_pipe(pipe_cmov_reg); // XXX
9594 %}
9595
9596 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9597 %{
9598 predicate(UseAPX);
9599 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9600
9601 ins_cost(200);
9602 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9603 ins_encode %{
9604 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9605 %}
9606 ins_pipe(pipe_cmov_reg);
9607 %}
9608
9609 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9610 %{
9611 predicate(!UseAPX);
9612 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9613
9614 ins_cost(200); // XXX
9615 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9616 ins_encode %{
9617 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9618 %}
9619 ins_pipe(pipe_cmov_mem); // XXX
9620 %}
9621
9622 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9623 %{
9624 predicate(UseAPX);
9625 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9626
9627 ins_cost(200);
9628 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9629 ins_encode %{
9630 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9631 %}
9632 ins_pipe(pipe_cmov_mem);
9633 %}
9634
9635 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9636 %{
9637 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9638 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9639
9640 ins_cost(100); // XXX
9641 format %{ "setbn$cop $dst\t# unsigned, long" %}
9642 ins_encode %{
9643 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9644 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9645 %}
9646 ins_pipe(ialu_reg);
9647 %}
9648
9649 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9650 %{
9651 predicate(!UseAPX);
9652 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9653
9654 ins_cost(200); // XXX
9655 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9656 ins_encode %{
9657 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9658 %}
9659 ins_pipe(pipe_cmov_reg); // XXX
9660 %}
9661
9662 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9663 %{
9664 predicate(UseAPX);
9665 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9666
9667 ins_cost(200);
9668 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9669 ins_encode %{
9670 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9671 %}
9672 ins_pipe(pipe_cmov_reg);
9673 %}
9674
9675 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9676 %{
9677 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9678 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9679
9680 ins_cost(100); // XXX
9681 format %{ "setbn$cop $dst\t# unsigned, long" %}
9682 ins_encode %{
9683 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9684 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9685 %}
9686 ins_pipe(ialu_reg);
9687 %}
9688
9689 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9690 %{
9691 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9692 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9693
9694 ins_cost(100); // XXX
9695 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9696 ins_encode %{
9697 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9698 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9699 %}
9700 ins_pipe(ialu_reg);
9701 %}
9702
9703 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9704 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9705
9706 ins_cost(200);
9707 expand %{
9708 cmovL_regU(cop, cr, dst, src);
9709 %}
9710 %}
9711
9712 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9713 %{
9714 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9715
9716 ins_cost(200);
9717 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9718 ins_encode %{
9719 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9720 %}
9721 ins_pipe(pipe_cmov_reg);
9722 %}
9723
9724 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9725 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9726 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9727
9728 ins_cost(200); // XXX
9729 format %{ "cmovpq $dst, $src\n\t"
9730 "cmovneq $dst, $src" %}
9731 ins_encode %{
9732 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9733 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9734 %}
9735 ins_pipe(pipe_cmov_reg);
9736 %}
9737
9738 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9739 // inputs of the CMove
9740 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9741 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9742 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9743
9744 ins_cost(200); // XXX
9745 format %{ "cmovpq $dst, $src\n\t"
9746 "cmovneq $dst, $src" %}
9747 ins_encode %{
9748 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9749 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9750 %}
9751 ins_pipe(pipe_cmov_reg);
9752 %}
9753
9754 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9755 %{
9756 predicate(!UseAPX);
9757 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9758
9759 ins_cost(200); // XXX
9760 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9761 ins_encode %{
9762 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9763 %}
9764 ins_pipe(pipe_cmov_mem); // XXX
9765 %}
9766
9767 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9768 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9769
9770 ins_cost(200);
9771 expand %{
9772 cmovL_memU(cop, cr, dst, src);
9773 %}
9774 %}
9775
9776 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9777 %{
9778 predicate(UseAPX);
9779 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9780
9781 ins_cost(200);
9782 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9783 ins_encode %{
9784 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9785 %}
9786 ins_pipe(pipe_cmov_mem);
9787 %}
9788
9789 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9790 %{
9791 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9792
9793 ins_cost(200);
9794 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9795 ins_encode %{
9796 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9797 %}
9798 ins_pipe(pipe_cmov_mem);
9799 %}
9800
9801 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9802 %{
9803 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9804
9805 ins_cost(200); // XXX
9806 format %{ "jn$cop skip\t# signed cmove float\n\t"
9807 "movss $dst, $src\n"
9808 "skip:" %}
9809 ins_encode %{
9810 Label Lskip;
9811 // Invert sense of branch from sense of CMOV
9812 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9813 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9814 __ bind(Lskip);
9815 %}
9816 ins_pipe(pipe_slow);
9817 %}
9818
9819 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9820 %{
9821 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9822
9823 ins_cost(200); // XXX
9824 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9825 "movss $dst, $src\n"
9826 "skip:" %}
9827 ins_encode %{
9828 Label Lskip;
9829 // Invert sense of branch from sense of CMOV
9830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9831 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9832 __ bind(Lskip);
9833 %}
9834 ins_pipe(pipe_slow);
9835 %}
9836
9837 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9838 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9839
9840 ins_cost(200);
9841 expand %{
9842 cmovF_regU(cop, cr, dst, src);
9843 %}
9844 %}
9845
9846 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9847 %{
9848 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9849
9850 ins_cost(200); // XXX
9851 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9852 "movss $dst, $src\n"
9853 "skip:" %}
9854 ins_encode %{
9855 Label Lskip;
9856 // Invert sense of branch from sense of CMOV
9857 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9858 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9859 __ bind(Lskip);
9860 %}
9861 ins_pipe(pipe_slow);
9862 %}
9863
9864 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9865 %{
9866 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9867
9868 ins_cost(200); // XXX
9869 format %{ "jn$cop skip\t# signed cmove double\n\t"
9870 "movsd $dst, $src\n"
9871 "skip:" %}
9872 ins_encode %{
9873 Label Lskip;
9874 // Invert sense of branch from sense of CMOV
9875 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9876 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9877 __ bind(Lskip);
9878 %}
9879 ins_pipe(pipe_slow);
9880 %}
9881
9882 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9883 %{
9884 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9885
9886 ins_cost(200); // XXX
9887 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9888 "movsd $dst, $src\n"
9889 "skip:" %}
9890 ins_encode %{
9891 Label Lskip;
9892 // Invert sense of branch from sense of CMOV
9893 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9894 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9895 __ bind(Lskip);
9896 %}
9897 ins_pipe(pipe_slow);
9898 %}
9899
9900 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9901 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9902
9903 ins_cost(200);
9904 expand %{
9905 cmovD_regU(cop, cr, dst, src);
9906 %}
9907 %}
9908
9909 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9910 %{
9911 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9912
9913 ins_cost(200); // XXX
9914 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9915 "movsd $dst, $src\n"
9916 "skip:" %}
9917 ins_encode %{
9918 Label Lskip;
9919 // Invert sense of branch from sense of CMOV
9920 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9921 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9922 __ bind(Lskip);
9923 %}
9924 ins_pipe(pipe_slow);
9925 %}
9926
9927 //----------Arithmetic Instructions--------------------------------------------
9928 //----------Addition Instructions----------------------------------------------
9929
9930 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9931 %{
9932 predicate(!UseAPX);
9933 match(Set dst (AddI dst src));
9934 effect(KILL cr);
9935 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9936 format %{ "addl $dst, $src\t# int" %}
9937 ins_encode %{
9938 __ addl($dst$$Register, $src$$Register);
9939 %}
9940 ins_pipe(ialu_reg_reg);
9941 %}
9942
9943 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9944 %{
9945 predicate(UseAPX);
9946 match(Set dst (AddI src1 src2));
9947 effect(KILL cr);
9948 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9949
9950 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9951 ins_encode %{
9952 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9953 %}
9954 ins_pipe(ialu_reg_reg);
9955 %}
9956
9957 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9958 %{
9959 predicate(!UseAPX);
9960 match(Set dst (AddI dst src));
9961 effect(KILL cr);
9962 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9963
9964 format %{ "addl $dst, $src\t# int" %}
9965 ins_encode %{
9966 __ addl($dst$$Register, $src$$constant);
9967 %}
9968 ins_pipe( ialu_reg );
9969 %}
9970
9971 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9972 %{
9973 predicate(UseAPX);
9974 match(Set dst (AddI src1 src2));
9975 effect(KILL cr);
9976 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9977
9978 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9979 ins_encode %{
9980 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9981 %}
9982 ins_pipe( ialu_reg );
9983 %}
9984
9985 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9986 %{
9987 predicate(UseAPX);
9988 match(Set dst (AddI (LoadI src1) src2));
9989 effect(KILL cr);
9990 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9991
9992 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9993 ins_encode %{
9994 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9995 %}
9996 ins_pipe( ialu_reg );
9997 %}
9998
9999 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10000 %{
10001 predicate(!UseAPX);
10002 match(Set dst (AddI dst (LoadI src)));
10003 effect(KILL cr);
10004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005
10006 ins_cost(150); // XXX
10007 format %{ "addl $dst, $src\t# int" %}
10008 ins_encode %{
10009 __ addl($dst$$Register, $src$$Address);
10010 %}
10011 ins_pipe(ialu_reg_mem);
10012 %}
10013
10014 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10015 %{
10016 predicate(UseAPX);
10017 match(Set dst (AddI src1 (LoadI src2)));
10018 effect(KILL cr);
10019 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10020
10021 ins_cost(150);
10022 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10023 ins_encode %{
10024 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10025 %}
10026 ins_pipe(ialu_reg_mem);
10027 %}
10028
10029 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10030 %{
10031 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10032 effect(KILL cr);
10033 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10034
10035 ins_cost(150); // XXX
10036 format %{ "addl $dst, $src\t# int" %}
10037 ins_encode %{
10038 __ addl($dst$$Address, $src$$Register);
10039 %}
10040 ins_pipe(ialu_mem_reg);
10041 %}
10042
10043 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10044 %{
10045 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046 effect(KILL cr);
10047 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048
10049
10050 ins_cost(125); // XXX
10051 format %{ "addl $dst, $src\t# int" %}
10052 ins_encode %{
10053 __ addl($dst$$Address, $src$$constant);
10054 %}
10055 ins_pipe(ialu_mem_imm);
10056 %}
10057
10058 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10059 %{
10060 predicate(!UseAPX && UseIncDec);
10061 match(Set dst (AddI dst src));
10062 effect(KILL cr);
10063
10064 format %{ "incl $dst\t# int" %}
10065 ins_encode %{
10066 __ incrementl($dst$$Register);
10067 %}
10068 ins_pipe(ialu_reg);
10069 %}
10070
10071 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10072 %{
10073 predicate(UseAPX && UseIncDec);
10074 match(Set dst (AddI src val));
10075 effect(KILL cr);
10076 flag(PD::Flag_ndd_demotable_opr1);
10077
10078 format %{ "eincl $dst, $src\t# int ndd" %}
10079 ins_encode %{
10080 __ eincl($dst$$Register, $src$$Register, false);
10081 %}
10082 ins_pipe(ialu_reg);
10083 %}
10084
10085 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10086 %{
10087 predicate(UseAPX && UseIncDec);
10088 match(Set dst (AddI (LoadI src) val));
10089 effect(KILL cr);
10090
10091 format %{ "eincl $dst, $src\t# int ndd" %}
10092 ins_encode %{
10093 __ eincl($dst$$Register, $src$$Address, false);
10094 %}
10095 ins_pipe(ialu_reg);
10096 %}
10097
10098 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10099 %{
10100 predicate(UseIncDec);
10101 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102 effect(KILL cr);
10103
10104 ins_cost(125); // XXX
10105 format %{ "incl $dst\t# int" %}
10106 ins_encode %{
10107 __ incrementl($dst$$Address);
10108 %}
10109 ins_pipe(ialu_mem_imm);
10110 %}
10111
10112 // XXX why does that use AddI
10113 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10114 %{
10115 predicate(!UseAPX && UseIncDec);
10116 match(Set dst (AddI dst src));
10117 effect(KILL cr);
10118
10119 format %{ "decl $dst\t# int" %}
10120 ins_encode %{
10121 __ decrementl($dst$$Register);
10122 %}
10123 ins_pipe(ialu_reg);
10124 %}
10125
10126 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10127 %{
10128 predicate(UseAPX && UseIncDec);
10129 match(Set dst (AddI src val));
10130 effect(KILL cr);
10131 flag(PD::Flag_ndd_demotable_opr1);
10132
10133 format %{ "edecl $dst, $src\t# int ndd" %}
10134 ins_encode %{
10135 __ edecl($dst$$Register, $src$$Register, false);
10136 %}
10137 ins_pipe(ialu_reg);
10138 %}
10139
10140 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10141 %{
10142 predicate(UseAPX && UseIncDec);
10143 match(Set dst (AddI (LoadI src) val));
10144 effect(KILL cr);
10145
10146 format %{ "edecl $dst, $src\t# int ndd" %}
10147 ins_encode %{
10148 __ edecl($dst$$Register, $src$$Address, false);
10149 %}
10150 ins_pipe(ialu_reg);
10151 %}
10152
10153 // XXX why does that use AddI
10154 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10155 %{
10156 predicate(UseIncDec);
10157 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158 effect(KILL cr);
10159
10160 ins_cost(125); // XXX
10161 format %{ "decl $dst\t# int" %}
10162 ins_encode %{
10163 __ decrementl($dst$$Address);
10164 %}
10165 ins_pipe(ialu_mem_imm);
10166 %}
10167
10168 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10169 %{
10170 predicate(VM_Version::supports_fast_2op_lea());
10171 match(Set dst (AddI (LShiftI index scale) disp));
10172
10173 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10174 ins_encode %{
10175 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10177 %}
10178 ins_pipe(ialu_reg_reg);
10179 %}
10180
10181 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10182 %{
10183 predicate(VM_Version::supports_fast_3op_lea());
10184 match(Set dst (AddI (AddI base index) disp));
10185
10186 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10187 ins_encode %{
10188 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10189 %}
10190 ins_pipe(ialu_reg_reg);
10191 %}
10192
10193 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10194 %{
10195 predicate(VM_Version::supports_fast_2op_lea());
10196 match(Set dst (AddI base (LShiftI index scale)));
10197
10198 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10199 ins_encode %{
10200 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10201 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10202 %}
10203 ins_pipe(ialu_reg_reg);
10204 %}
10205
10206 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10207 %{
10208 predicate(VM_Version::supports_fast_3op_lea());
10209 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10210
10211 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10212 ins_encode %{
10213 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10215 %}
10216 ins_pipe(ialu_reg_reg);
10217 %}
10218
10219 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10220 %{
10221 predicate(!UseAPX);
10222 match(Set dst (AddL dst src));
10223 effect(KILL cr);
10224 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10225
10226 format %{ "addq $dst, $src\t# long" %}
10227 ins_encode %{
10228 __ addq($dst$$Register, $src$$Register);
10229 %}
10230 ins_pipe(ialu_reg_reg);
10231 %}
10232
10233 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10234 %{
10235 predicate(UseAPX);
10236 match(Set dst (AddL src1 src2));
10237 effect(KILL cr);
10238 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10239
10240 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10241 ins_encode %{
10242 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10243 %}
10244 ins_pipe(ialu_reg_reg);
10245 %}
10246
10247 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10248 %{
10249 predicate(!UseAPX);
10250 match(Set dst (AddL dst src));
10251 effect(KILL cr);
10252 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10253
10254 format %{ "addq $dst, $src\t# long" %}
10255 ins_encode %{
10256 __ addq($dst$$Register, $src$$constant);
10257 %}
10258 ins_pipe( ialu_reg );
10259 %}
10260
10261 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10262 %{
10263 predicate(UseAPX);
10264 match(Set dst (AddL src1 src2));
10265 effect(KILL cr);
10266 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10267
10268 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10269 ins_encode %{
10270 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10271 %}
10272 ins_pipe( ialu_reg );
10273 %}
10274
10275 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10276 %{
10277 predicate(UseAPX);
10278 match(Set dst (AddL (LoadL src1) src2));
10279 effect(KILL cr);
10280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281
10282 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10283 ins_encode %{
10284 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10285 %}
10286 ins_pipe( ialu_reg );
10287 %}
10288
10289 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10290 %{
10291 predicate(!UseAPX);
10292 match(Set dst (AddL dst (LoadL src)));
10293 effect(KILL cr);
10294 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295
10296 ins_cost(150); // XXX
10297 format %{ "addq $dst, $src\t# long" %}
10298 ins_encode %{
10299 __ addq($dst$$Register, $src$$Address);
10300 %}
10301 ins_pipe(ialu_reg_mem);
10302 %}
10303
10304 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10305 %{
10306 predicate(UseAPX);
10307 match(Set dst (AddL src1 (LoadL src2)));
10308 effect(KILL cr);
10309 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10310
10311 ins_cost(150);
10312 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10313 ins_encode %{
10314 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10315 %}
10316 ins_pipe(ialu_reg_mem);
10317 %}
10318
10319 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10320 %{
10321 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10322 effect(KILL cr);
10323 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10324
10325 ins_cost(150); // XXX
10326 format %{ "addq $dst, $src\t# long" %}
10327 ins_encode %{
10328 __ addq($dst$$Address, $src$$Register);
10329 %}
10330 ins_pipe(ialu_mem_reg);
10331 %}
10332
10333 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10334 %{
10335 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336 effect(KILL cr);
10337 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338
10339 ins_cost(125); // XXX
10340 format %{ "addq $dst, $src\t# long" %}
10341 ins_encode %{
10342 __ addq($dst$$Address, $src$$constant);
10343 %}
10344 ins_pipe(ialu_mem_imm);
10345 %}
10346
10347 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10348 %{
10349 predicate(!UseAPX && UseIncDec);
10350 match(Set dst (AddL dst src));
10351 effect(KILL cr);
10352
10353 format %{ "incq $dst\t# long" %}
10354 ins_encode %{
10355 __ incrementq($dst$$Register);
10356 %}
10357 ins_pipe(ialu_reg);
10358 %}
10359
10360 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10361 %{
10362 predicate(UseAPX && UseIncDec);
10363 match(Set dst (AddL src val));
10364 effect(KILL cr);
10365 flag(PD::Flag_ndd_demotable_opr1);
10366
10367 format %{ "eincq $dst, $src\t# long ndd" %}
10368 ins_encode %{
10369 __ eincq($dst$$Register, $src$$Register, false);
10370 %}
10371 ins_pipe(ialu_reg);
10372 %}
10373
10374 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10375 %{
10376 predicate(UseAPX && UseIncDec);
10377 match(Set dst (AddL (LoadL src) val));
10378 effect(KILL cr);
10379
10380 format %{ "eincq $dst, $src\t# long ndd" %}
10381 ins_encode %{
10382 __ eincq($dst$$Register, $src$$Address, false);
10383 %}
10384 ins_pipe(ialu_reg);
10385 %}
10386
10387 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10388 %{
10389 predicate(UseIncDec);
10390 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10391 effect(KILL cr);
10392
10393 ins_cost(125); // XXX
10394 format %{ "incq $dst\t# long" %}
10395 ins_encode %{
10396 __ incrementq($dst$$Address);
10397 %}
10398 ins_pipe(ialu_mem_imm);
10399 %}
10400
10401 // XXX why does that use AddL
10402 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10403 %{
10404 predicate(!UseAPX && UseIncDec);
10405 match(Set dst (AddL dst src));
10406 effect(KILL cr);
10407
10408 format %{ "decq $dst\t# long" %}
10409 ins_encode %{
10410 __ decrementq($dst$$Register);
10411 %}
10412 ins_pipe(ialu_reg);
10413 %}
10414
10415 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10416 %{
10417 predicate(UseAPX && UseIncDec);
10418 match(Set dst (AddL src val));
10419 effect(KILL cr);
10420 flag(PD::Flag_ndd_demotable_opr1);
10421
10422 format %{ "edecq $dst, $src\t# long ndd" %}
10423 ins_encode %{
10424 __ edecq($dst$$Register, $src$$Register, false);
10425 %}
10426 ins_pipe(ialu_reg);
10427 %}
10428
10429 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10430 %{
10431 predicate(UseAPX && UseIncDec);
10432 match(Set dst (AddL (LoadL src) val));
10433 effect(KILL cr);
10434
10435 format %{ "edecq $dst, $src\t# long ndd" %}
10436 ins_encode %{
10437 __ edecq($dst$$Register, $src$$Address, false);
10438 %}
10439 ins_pipe(ialu_reg);
10440 %}
10441
10442 // XXX why does that use AddL
10443 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10444 %{
10445 predicate(UseIncDec);
10446 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447 effect(KILL cr);
10448
10449 ins_cost(125); // XXX
10450 format %{ "decq $dst\t# long" %}
10451 ins_encode %{
10452 __ decrementq($dst$$Address);
10453 %}
10454 ins_pipe(ialu_mem_imm);
10455 %}
10456
10457 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10458 %{
10459 predicate(VM_Version::supports_fast_2op_lea());
10460 match(Set dst (AddL (LShiftL index scale) disp));
10461
10462 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10463 ins_encode %{
10464 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10465 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10466 %}
10467 ins_pipe(ialu_reg_reg);
10468 %}
10469
10470 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10471 %{
10472 predicate(VM_Version::supports_fast_3op_lea());
10473 match(Set dst (AddL (AddL base index) disp));
10474
10475 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10476 ins_encode %{
10477 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10478 %}
10479 ins_pipe(ialu_reg_reg);
10480 %}
10481
10482 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10483 %{
10484 predicate(VM_Version::supports_fast_2op_lea());
10485 match(Set dst (AddL base (LShiftL index scale)));
10486
10487 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10488 ins_encode %{
10489 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10490 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10491 %}
10492 ins_pipe(ialu_reg_reg);
10493 %}
10494
10495 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10496 %{
10497 predicate(VM_Version::supports_fast_3op_lea());
10498 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10499
10500 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10501 ins_encode %{
10502 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10504 %}
10505 ins_pipe(ialu_reg_reg);
10506 %}
10507
10508 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10509 %{
10510 match(Set dst (AddP dst src));
10511 effect(KILL cr);
10512 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10513
10514 format %{ "addq $dst, $src\t# ptr" %}
10515 ins_encode %{
10516 __ addq($dst$$Register, $src$$Register);
10517 %}
10518 ins_pipe(ialu_reg_reg);
10519 %}
10520
10521 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10522 %{
10523 match(Set dst (AddP dst src));
10524 effect(KILL cr);
10525 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526
10527 format %{ "addq $dst, $src\t# ptr" %}
10528 ins_encode %{
10529 __ addq($dst$$Register, $src$$constant);
10530 %}
10531 ins_pipe( ialu_reg );
10532 %}
10533
10534 // XXX addP mem ops ????
10535
10536 instruct checkCastPP(rRegP dst)
10537 %{
10538 match(Set dst (CheckCastPP dst));
10539
10540 size(0);
10541 format %{ "# checkcastPP of $dst" %}
10542 ins_encode(/* empty encoding */);
10543 ins_pipe(empty);
10544 %}
10545
10546 instruct castPP(rRegP dst)
10547 %{
10548 match(Set dst (CastPP dst));
10549
10550 size(0);
10551 format %{ "# castPP of $dst" %}
10552 ins_encode(/* empty encoding */);
10553 ins_pipe(empty);
10554 %}
10555
10556 instruct castII(rRegI dst)
10557 %{
10558 predicate(VerifyConstraintCasts == 0);
10559 match(Set dst (CastII dst));
10560
10561 size(0);
10562 format %{ "# castII of $dst" %}
10563 ins_encode(/* empty encoding */);
10564 ins_cost(0);
10565 ins_pipe(empty);
10566 %}
10567
10568 instruct castII_checked(rRegI dst, rFlagsReg cr)
10569 %{
10570 predicate(VerifyConstraintCasts > 0);
10571 match(Set dst (CastII dst));
10572
10573 effect(KILL cr);
10574 format %{ "# cast_checked_II $dst" %}
10575 ins_encode %{
10576 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10577 %}
10578 ins_pipe(pipe_slow);
10579 %}
10580
10581 instruct castLL(rRegL dst)
10582 %{
10583 predicate(VerifyConstraintCasts == 0);
10584 match(Set dst (CastLL dst));
10585
10586 size(0);
10587 format %{ "# castLL of $dst" %}
10588 ins_encode(/* empty encoding */);
10589 ins_cost(0);
10590 ins_pipe(empty);
10591 %}
10592
10593 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10594 %{
10595 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10596 match(Set dst (CastLL dst));
10597
10598 effect(KILL cr);
10599 format %{ "# cast_checked_LL $dst" %}
10600 ins_encode %{
10601 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10602 %}
10603 ins_pipe(pipe_slow);
10604 %}
10605
10606 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10607 %{
10608 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10609 match(Set dst (CastLL dst));
10610
10611 effect(KILL cr, TEMP tmp);
10612 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10613 ins_encode %{
10614 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10615 %}
10616 ins_pipe(pipe_slow);
10617 %}
10618
10619 instruct castFF(regF dst)
10620 %{
10621 match(Set dst (CastFF dst));
10622
10623 size(0);
10624 format %{ "# castFF of $dst" %}
10625 ins_encode(/* empty encoding */);
10626 ins_cost(0);
10627 ins_pipe(empty);
10628 %}
10629
10630 instruct castHH(regF dst)
10631 %{
10632 match(Set dst (CastHH dst));
10633
10634 size(0);
10635 format %{ "# castHH of $dst" %}
10636 ins_encode(/* empty encoding */);
10637 ins_cost(0);
10638 ins_pipe(empty);
10639 %}
10640
10641 instruct castDD(regD dst)
10642 %{
10643 match(Set dst (CastDD dst));
10644
10645 size(0);
10646 format %{ "# castDD of $dst" %}
10647 ins_encode(/* empty encoding */);
10648 ins_cost(0);
10649 ins_pipe(empty);
10650 %}
10651
10652 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10653 instruct compareAndSwapP(rRegI res,
10654 memory mem_ptr,
10655 rax_RegP oldval, rRegP newval,
10656 rFlagsReg cr)
10657 %{
10658 predicate(n->as_LoadStore()->barrier_data() == 0);
10659 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10660 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10661 effect(KILL cr, KILL oldval);
10662
10663 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10664 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10665 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10666 ins_encode %{
10667 __ lock();
10668 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10669 __ setcc(Assembler::equal, $res$$Register);
10670 %}
10671 ins_pipe( pipe_cmpxchg );
10672 %}
10673
10674 instruct compareAndSwapL(rRegI res,
10675 memory mem_ptr,
10676 rax_RegL oldval, rRegL newval,
10677 rFlagsReg cr)
10678 %{
10679 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10680 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10681 effect(KILL cr, KILL oldval);
10682
10683 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10684 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10685 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10686 ins_encode %{
10687 __ lock();
10688 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10689 __ setcc(Assembler::equal, $res$$Register);
10690 %}
10691 ins_pipe( pipe_cmpxchg );
10692 %}
10693
10694 instruct compareAndSwapI(rRegI res,
10695 memory mem_ptr,
10696 rax_RegI oldval, rRegI newval,
10697 rFlagsReg cr)
10698 %{
10699 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10700 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10701 effect(KILL cr, KILL oldval);
10702
10703 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10704 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10705 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10706 ins_encode %{
10707 __ lock();
10708 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10709 __ setcc(Assembler::equal, $res$$Register);
10710 %}
10711 ins_pipe( pipe_cmpxchg );
10712 %}
10713
10714 instruct compareAndSwapB(rRegI res,
10715 memory mem_ptr,
10716 rax_RegI oldval, rRegI newval,
10717 rFlagsReg cr)
10718 %{
10719 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10720 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10721 effect(KILL cr, KILL oldval);
10722
10723 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10724 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10725 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10726 ins_encode %{
10727 __ lock();
10728 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10729 __ setcc(Assembler::equal, $res$$Register);
10730 %}
10731 ins_pipe( pipe_cmpxchg );
10732 %}
10733
10734 instruct compareAndSwapS(rRegI res,
10735 memory mem_ptr,
10736 rax_RegI oldval, rRegI newval,
10737 rFlagsReg cr)
10738 %{
10739 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10740 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10741 effect(KILL cr, KILL oldval);
10742
10743 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10744 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10745 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10746 ins_encode %{
10747 __ lock();
10748 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10749 __ setcc(Assembler::equal, $res$$Register);
10750 %}
10751 ins_pipe( pipe_cmpxchg );
10752 %}
10753
10754 instruct compareAndSwapN(rRegI res,
10755 memory mem_ptr,
10756 rax_RegN oldval, rRegN newval,
10757 rFlagsReg cr) %{
10758 predicate(n->as_LoadStore()->barrier_data() == 0);
10759 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10760 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10761 effect(KILL cr, KILL oldval);
10762
10763 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10764 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10765 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10766 ins_encode %{
10767 __ lock();
10768 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10769 __ setcc(Assembler::equal, $res$$Register);
10770 %}
10771 ins_pipe( pipe_cmpxchg );
10772 %}
10773
10774 instruct compareAndExchangeB(
10775 memory mem_ptr,
10776 rax_RegI oldval, rRegI newval,
10777 rFlagsReg cr)
10778 %{
10779 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10780 effect(KILL cr);
10781
10782 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10783 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10784 ins_encode %{
10785 __ lock();
10786 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10787 %}
10788 ins_pipe( pipe_cmpxchg );
10789 %}
10790
10791 instruct compareAndExchangeS(
10792 memory mem_ptr,
10793 rax_RegI oldval, rRegI newval,
10794 rFlagsReg cr)
10795 %{
10796 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10797 effect(KILL cr);
10798
10799 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10801 ins_encode %{
10802 __ lock();
10803 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10804 %}
10805 ins_pipe( pipe_cmpxchg );
10806 %}
10807
10808 instruct compareAndExchangeI(
10809 memory mem_ptr,
10810 rax_RegI oldval, rRegI newval,
10811 rFlagsReg cr)
10812 %{
10813 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10814 effect(KILL cr);
10815
10816 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10817 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10818 ins_encode %{
10819 __ lock();
10820 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10821 %}
10822 ins_pipe( pipe_cmpxchg );
10823 %}
10824
10825 instruct compareAndExchangeL(
10826 memory mem_ptr,
10827 rax_RegL oldval, rRegL newval,
10828 rFlagsReg cr)
10829 %{
10830 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10831 effect(KILL cr);
10832
10833 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10834 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10835 ins_encode %{
10836 __ lock();
10837 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10838 %}
10839 ins_pipe( pipe_cmpxchg );
10840 %}
10841
10842 instruct compareAndExchangeN(
10843 memory mem_ptr,
10844 rax_RegN oldval, rRegN newval,
10845 rFlagsReg cr) %{
10846 predicate(n->as_LoadStore()->barrier_data() == 0);
10847 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10848 effect(KILL cr);
10849
10850 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10851 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10852 ins_encode %{
10853 __ lock();
10854 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10855 %}
10856 ins_pipe( pipe_cmpxchg );
10857 %}
10858
10859 instruct compareAndExchangeP(
10860 memory mem_ptr,
10861 rax_RegP oldval, rRegP newval,
10862 rFlagsReg cr)
10863 %{
10864 predicate(n->as_LoadStore()->barrier_data() == 0);
10865 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10866 effect(KILL cr);
10867
10868 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10869 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10870 ins_encode %{
10871 __ lock();
10872 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10873 %}
10874 ins_pipe( pipe_cmpxchg );
10875 %}
10876
10877 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878 predicate(n->as_LoadStore()->result_not_used());
10879 match(Set dummy (GetAndAddB mem add));
10880 effect(KILL cr);
10881 format %{ "addb_lock $mem, $add" %}
10882 ins_encode %{
10883 __ lock();
10884 __ addb($mem$$Address, $add$$Register);
10885 %}
10886 ins_pipe(pipe_cmpxchg);
10887 %}
10888
10889 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890 predicate(n->as_LoadStore()->result_not_used());
10891 match(Set dummy (GetAndAddB mem add));
10892 effect(KILL cr);
10893 format %{ "addb_lock $mem, $add" %}
10894 ins_encode %{
10895 __ lock();
10896 __ addb($mem$$Address, $add$$constant);
10897 %}
10898 ins_pipe(pipe_cmpxchg);
10899 %}
10900
10901 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10902 predicate(!n->as_LoadStore()->result_not_used());
10903 match(Set newval (GetAndAddB mem newval));
10904 effect(KILL cr);
10905 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10906 ins_encode %{
10907 __ lock();
10908 __ xaddb($mem$$Address, $newval$$Register);
10909 __ narrow_subword_type($newval$$Register, T_BYTE);
10910 %}
10911 ins_pipe(pipe_cmpxchg);
10912 %}
10913
10914 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10915 predicate(n->as_LoadStore()->result_not_used());
10916 match(Set dummy (GetAndAddS mem add));
10917 effect(KILL cr);
10918 format %{ "addw_lock $mem, $add" %}
10919 ins_encode %{
10920 __ lock();
10921 __ addw($mem$$Address, $add$$Register);
10922 %}
10923 ins_pipe(pipe_cmpxchg);
10924 %}
10925
10926 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10927 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10928 match(Set dummy (GetAndAddS mem add));
10929 effect(KILL cr);
10930 format %{ "addw_lock $mem, $add" %}
10931 ins_encode %{
10932 __ lock();
10933 __ addw($mem$$Address, $add$$constant);
10934 %}
10935 ins_pipe(pipe_cmpxchg);
10936 %}
10937
10938 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10939 predicate(!n->as_LoadStore()->result_not_used());
10940 match(Set newval (GetAndAddS mem newval));
10941 effect(KILL cr);
10942 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10943 ins_encode %{
10944 __ lock();
10945 __ xaddw($mem$$Address, $newval$$Register);
10946 __ narrow_subword_type($newval$$Register, T_SHORT);
10947 %}
10948 ins_pipe(pipe_cmpxchg);
10949 %}
10950
10951 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10952 predicate(n->as_LoadStore()->result_not_used());
10953 match(Set dummy (GetAndAddI mem add));
10954 effect(KILL cr);
10955 format %{ "addl_lock $mem, $add" %}
10956 ins_encode %{
10957 __ lock();
10958 __ addl($mem$$Address, $add$$Register);
10959 %}
10960 ins_pipe(pipe_cmpxchg);
10961 %}
10962
10963 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10964 predicate(n->as_LoadStore()->result_not_used());
10965 match(Set dummy (GetAndAddI mem add));
10966 effect(KILL cr);
10967 format %{ "addl_lock $mem, $add" %}
10968 ins_encode %{
10969 __ lock();
10970 __ addl($mem$$Address, $add$$constant);
10971 %}
10972 ins_pipe(pipe_cmpxchg);
10973 %}
10974
10975 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10976 predicate(!n->as_LoadStore()->result_not_used());
10977 match(Set newval (GetAndAddI mem newval));
10978 effect(KILL cr);
10979 format %{ "xaddl_lock $mem, $newval" %}
10980 ins_encode %{
10981 __ lock();
10982 __ xaddl($mem$$Address, $newval$$Register);
10983 %}
10984 ins_pipe(pipe_cmpxchg);
10985 %}
10986
10987 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10988 predicate(n->as_LoadStore()->result_not_used());
10989 match(Set dummy (GetAndAddL mem add));
10990 effect(KILL cr);
10991 format %{ "addq_lock $mem, $add" %}
10992 ins_encode %{
10993 __ lock();
10994 __ addq($mem$$Address, $add$$Register);
10995 %}
10996 ins_pipe(pipe_cmpxchg);
10997 %}
10998
10999 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11000 predicate(n->as_LoadStore()->result_not_used());
11001 match(Set dummy (GetAndAddL mem add));
11002 effect(KILL cr);
11003 format %{ "addq_lock $mem, $add" %}
11004 ins_encode %{
11005 __ lock();
11006 __ addq($mem$$Address, $add$$constant);
11007 %}
11008 ins_pipe(pipe_cmpxchg);
11009 %}
11010
11011 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11012 predicate(!n->as_LoadStore()->result_not_used());
11013 match(Set newval (GetAndAddL mem newval));
11014 effect(KILL cr);
11015 format %{ "xaddq_lock $mem, $newval" %}
11016 ins_encode %{
11017 __ lock();
11018 __ xaddq($mem$$Address, $newval$$Register);
11019 %}
11020 ins_pipe(pipe_cmpxchg);
11021 %}
11022
11023 instruct xchgB( memory mem, rRegI newval) %{
11024 match(Set newval (GetAndSetB mem newval));
11025 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
11026 ins_encode %{
11027 __ xchgb($newval$$Register, $mem$$Address);
11028 __ narrow_subword_type($newval$$Register, T_BYTE);
11029 %}
11030 ins_pipe( pipe_cmpxchg );
11031 %}
11032
11033 instruct xchgS( memory mem, rRegI newval) %{
11034 match(Set newval (GetAndSetS mem newval));
11035 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
11036 ins_encode %{
11037 __ xchgw($newval$$Register, $mem$$Address);
11038 __ narrow_subword_type($newval$$Register, T_SHORT);
11039 %}
11040 ins_pipe( pipe_cmpxchg );
11041 %}
11042
11043 instruct xchgI( memory mem, rRegI newval) %{
11044 match(Set newval (GetAndSetI mem newval));
11045 format %{ "XCHGL $newval,[$mem]" %}
11046 ins_encode %{
11047 __ xchgl($newval$$Register, $mem$$Address);
11048 %}
11049 ins_pipe( pipe_cmpxchg );
11050 %}
11051
11052 instruct xchgL( memory mem, rRegL newval) %{
11053 match(Set newval (GetAndSetL mem newval));
11054 format %{ "XCHGL $newval,[$mem]" %}
11055 ins_encode %{
11056 __ xchgq($newval$$Register, $mem$$Address);
11057 %}
11058 ins_pipe( pipe_cmpxchg );
11059 %}
11060
11061 instruct xchgP( memory mem, rRegP newval) %{
11062 match(Set newval (GetAndSetP mem newval));
11063 predicate(n->as_LoadStore()->barrier_data() == 0);
11064 format %{ "XCHGQ $newval,[$mem]" %}
11065 ins_encode %{
11066 __ xchgq($newval$$Register, $mem$$Address);
11067 %}
11068 ins_pipe( pipe_cmpxchg );
11069 %}
11070
11071 instruct xchgN( memory mem, rRegN newval) %{
11072 predicate(n->as_LoadStore()->barrier_data() == 0);
11073 match(Set newval (GetAndSetN mem newval));
11074 format %{ "XCHGL $newval,$mem]" %}
11075 ins_encode %{
11076 __ xchgl($newval$$Register, $mem$$Address);
11077 %}
11078 ins_pipe( pipe_cmpxchg );
11079 %}
11080
11081 //----------Abs Instructions-------------------------------------------
11082
11083 // Integer Absolute Instructions
11084 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11085 %{
11086 match(Set dst (AbsI src));
11087 effect(TEMP dst, KILL cr);
11088 format %{ "xorl $dst, $dst\t# abs int\n\t"
11089 "subl $dst, $src\n\t"
11090 "cmovll $dst, $src" %}
11091 ins_encode %{
11092 __ xorl($dst$$Register, $dst$$Register);
11093 __ subl($dst$$Register, $src$$Register);
11094 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11095 %}
11096
11097 ins_pipe(ialu_reg_reg);
11098 %}
11099
11100 // Long Absolute Instructions
11101 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11102 %{
11103 match(Set dst (AbsL src));
11104 effect(TEMP dst, KILL cr);
11105 format %{ "xorl $dst, $dst\t# abs long\n\t"
11106 "subq $dst, $src\n\t"
11107 "cmovlq $dst, $src" %}
11108 ins_encode %{
11109 __ xorl($dst$$Register, $dst$$Register);
11110 __ subq($dst$$Register, $src$$Register);
11111 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11112 %}
11113
11114 ins_pipe(ialu_reg_reg);
11115 %}
11116
11117 //----------Subtraction Instructions-------------------------------------------
11118
11119 // Integer Subtraction Instructions
11120 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11121 %{
11122 predicate(!UseAPX);
11123 match(Set dst (SubI dst src));
11124 effect(KILL cr);
11125 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11126
11127 format %{ "subl $dst, $src\t# int" %}
11128 ins_encode %{
11129 __ subl($dst$$Register, $src$$Register);
11130 %}
11131 ins_pipe(ialu_reg_reg);
11132 %}
11133
11134 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11135 %{
11136 predicate(UseAPX);
11137 match(Set dst (SubI src1 src2));
11138 effect(KILL cr);
11139 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11140
11141 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11142 ins_encode %{
11143 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11144 %}
11145 ins_pipe(ialu_reg_reg);
11146 %}
11147
11148 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11149 %{
11150 predicate(UseAPX);
11151 match(Set dst (SubI src1 src2));
11152 effect(KILL cr);
11153 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11154
11155 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11156 ins_encode %{
11157 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11158 %}
11159 ins_pipe(ialu_reg_reg);
11160 %}
11161
11162 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11163 %{
11164 predicate(UseAPX);
11165 match(Set dst (SubI (LoadI src1) src2));
11166 effect(KILL cr);
11167 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11168
11169 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11170 ins_encode %{
11171 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11172 %}
11173 ins_pipe(ialu_reg_reg);
11174 %}
11175
11176 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11177 %{
11178 predicate(!UseAPX);
11179 match(Set dst (SubI dst (LoadI src)));
11180 effect(KILL cr);
11181 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11182
11183 ins_cost(150);
11184 format %{ "subl $dst, $src\t# int" %}
11185 ins_encode %{
11186 __ subl($dst$$Register, $src$$Address);
11187 %}
11188 ins_pipe(ialu_reg_mem);
11189 %}
11190
11191 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11192 %{
11193 predicate(UseAPX);
11194 match(Set dst (SubI src1 (LoadI src2)));
11195 effect(KILL cr);
11196 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11197
11198 ins_cost(150);
11199 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11200 ins_encode %{
11201 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11202 %}
11203 ins_pipe(ialu_reg_mem);
11204 %}
11205
11206 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11207 %{
11208 predicate(UseAPX);
11209 match(Set dst (SubI (LoadI src1) src2));
11210 effect(KILL cr);
11211 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11212
11213 ins_cost(150);
11214 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11215 ins_encode %{
11216 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11217 %}
11218 ins_pipe(ialu_reg_mem);
11219 %}
11220
11221 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11222 %{
11223 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11224 effect(KILL cr);
11225 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11226
11227 ins_cost(150);
11228 format %{ "subl $dst, $src\t# int" %}
11229 ins_encode %{
11230 __ subl($dst$$Address, $src$$Register);
11231 %}
11232 ins_pipe(ialu_mem_reg);
11233 %}
11234
11235 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11236 %{
11237 predicate(!UseAPX);
11238 match(Set dst (SubL dst src));
11239 effect(KILL cr);
11240 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11241
11242 format %{ "subq $dst, $src\t# long" %}
11243 ins_encode %{
11244 __ subq($dst$$Register, $src$$Register);
11245 %}
11246 ins_pipe(ialu_reg_reg);
11247 %}
11248
11249 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11250 %{
11251 predicate(UseAPX);
11252 match(Set dst (SubL src1 src2));
11253 effect(KILL cr);
11254 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11255
11256 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11257 ins_encode %{
11258 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11259 %}
11260 ins_pipe(ialu_reg_reg);
11261 %}
11262
11263 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11264 %{
11265 predicate(UseAPX);
11266 match(Set dst (SubL src1 src2));
11267 effect(KILL cr);
11268 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11269
11270 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11271 ins_encode %{
11272 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11273 %}
11274 ins_pipe(ialu_reg_reg);
11275 %}
11276
11277 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11278 %{
11279 predicate(UseAPX);
11280 match(Set dst (SubL (LoadL src1) src2));
11281 effect(KILL cr);
11282 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11283
11284 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11285 ins_encode %{
11286 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11287 %}
11288 ins_pipe(ialu_reg_reg);
11289 %}
11290
11291 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11292 %{
11293 predicate(!UseAPX);
11294 match(Set dst (SubL dst (LoadL src)));
11295 effect(KILL cr);
11296 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11297
11298 ins_cost(150);
11299 format %{ "subq $dst, $src\t# long" %}
11300 ins_encode %{
11301 __ subq($dst$$Register, $src$$Address);
11302 %}
11303 ins_pipe(ialu_reg_mem);
11304 %}
11305
11306 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11307 %{
11308 predicate(UseAPX);
11309 match(Set dst (SubL src1 (LoadL src2)));
11310 effect(KILL cr);
11311 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11312
11313 ins_cost(150);
11314 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11315 ins_encode %{
11316 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11317 %}
11318 ins_pipe(ialu_reg_mem);
11319 %}
11320
11321 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11322 %{
11323 predicate(UseAPX);
11324 match(Set dst (SubL (LoadL src1) src2));
11325 effect(KILL cr);
11326 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11327
11328 ins_cost(150);
11329 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11330 ins_encode %{
11331 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11332 %}
11333 ins_pipe(ialu_reg_mem);
11334 %}
11335
11336 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11337 %{
11338 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11339 effect(KILL cr);
11340 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11341
11342 ins_cost(150);
11343 format %{ "subq $dst, $src\t# long" %}
11344 ins_encode %{
11345 __ subq($dst$$Address, $src$$Register);
11346 %}
11347 ins_pipe(ialu_mem_reg);
11348 %}
11349
11350 // Subtract from a pointer
11351 // XXX hmpf???
11352 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11353 %{
11354 match(Set dst (AddP dst (SubI zero src)));
11355 effect(KILL cr);
11356
11357 format %{ "subq $dst, $src\t# ptr - int" %}
11358 ins_encode %{
11359 __ subq($dst$$Register, $src$$Register);
11360 %}
11361 ins_pipe(ialu_reg_reg);
11362 %}
11363
11364 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11365 %{
11366 predicate(!UseAPX);
11367 match(Set dst (SubI zero dst));
11368 effect(KILL cr);
11369 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11370
11371 format %{ "negl $dst\t# int" %}
11372 ins_encode %{
11373 __ negl($dst$$Register);
11374 %}
11375 ins_pipe(ialu_reg);
11376 %}
11377
11378 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11379 %{
11380 predicate(UseAPX);
11381 match(Set dst (SubI zero src));
11382 effect(KILL cr);
11383 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11384
11385 format %{ "enegl $dst, $src\t# int ndd" %}
11386 ins_encode %{
11387 __ enegl($dst$$Register, $src$$Register, false);
11388 %}
11389 ins_pipe(ialu_reg);
11390 %}
11391
11392 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11393 %{
11394 predicate(!UseAPX);
11395 match(Set dst (NegI dst));
11396 effect(KILL cr);
11397 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11398
11399 format %{ "negl $dst\t# int" %}
11400 ins_encode %{
11401 __ negl($dst$$Register);
11402 %}
11403 ins_pipe(ialu_reg);
11404 %}
11405
11406 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11407 %{
11408 predicate(UseAPX);
11409 match(Set dst (NegI src));
11410 effect(KILL cr);
11411 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11412
11413 format %{ "enegl $dst, $src\t# int ndd" %}
11414 ins_encode %{
11415 __ enegl($dst$$Register, $src$$Register, false);
11416 %}
11417 ins_pipe(ialu_reg);
11418 %}
11419
11420 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11421 %{
11422 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11423 effect(KILL cr);
11424 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11425
11426 format %{ "negl $dst\t# int" %}
11427 ins_encode %{
11428 __ negl($dst$$Address);
11429 %}
11430 ins_pipe(ialu_reg);
11431 %}
11432
11433 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11434 %{
11435 predicate(!UseAPX);
11436 match(Set dst (SubL zero dst));
11437 effect(KILL cr);
11438 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11439
11440 format %{ "negq $dst\t# long" %}
11441 ins_encode %{
11442 __ negq($dst$$Register);
11443 %}
11444 ins_pipe(ialu_reg);
11445 %}
11446
11447 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11448 %{
11449 predicate(UseAPX);
11450 match(Set dst (SubL zero src));
11451 effect(KILL cr);
11452 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11453
11454 format %{ "enegq $dst, $src\t# long ndd" %}
11455 ins_encode %{
11456 __ enegq($dst$$Register, $src$$Register, false);
11457 %}
11458 ins_pipe(ialu_reg);
11459 %}
11460
11461 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11462 %{
11463 predicate(!UseAPX);
11464 match(Set dst (NegL dst));
11465 effect(KILL cr);
11466 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11467
11468 format %{ "negq $dst\t# int" %}
11469 ins_encode %{
11470 __ negq($dst$$Register);
11471 %}
11472 ins_pipe(ialu_reg);
11473 %}
11474
11475 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11476 %{
11477 predicate(UseAPX);
11478 match(Set dst (NegL src));
11479 effect(KILL cr);
11480 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11481
11482 format %{ "enegq $dst, $src\t# long ndd" %}
11483 ins_encode %{
11484 __ enegq($dst$$Register, $src$$Register, false);
11485 %}
11486 ins_pipe(ialu_reg);
11487 %}
11488
11489 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11490 %{
11491 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11492 effect(KILL cr);
11493 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11494
11495 format %{ "negq $dst\t# long" %}
11496 ins_encode %{
11497 __ negq($dst$$Address);
11498 %}
11499 ins_pipe(ialu_reg);
11500 %}
11501
11502 //----------Multiplication/Division Instructions-------------------------------
11503 // Integer Multiplication Instructions
11504 // Multiply Register
11505
11506 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11507 %{
11508 predicate(!UseAPX);
11509 match(Set dst (MulI dst src));
11510 effect(KILL cr);
11511
11512 ins_cost(300);
11513 format %{ "imull $dst, $src\t# int" %}
11514 ins_encode %{
11515 __ imull($dst$$Register, $src$$Register);
11516 %}
11517 ins_pipe(ialu_reg_reg_alu0);
11518 %}
11519
11520 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11521 %{
11522 predicate(UseAPX);
11523 match(Set dst (MulI src1 src2));
11524 effect(KILL cr);
11525 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11526
11527 ins_cost(300);
11528 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11529 ins_encode %{
11530 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11531 %}
11532 ins_pipe(ialu_reg_reg_alu0);
11533 %}
11534
11535 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11536 %{
11537 match(Set dst (MulI src imm));
11538 effect(KILL cr);
11539
11540 ins_cost(300);
11541 format %{ "imull $dst, $src, $imm\t# int" %}
11542 ins_encode %{
11543 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11544 %}
11545 ins_pipe(ialu_reg_reg_alu0);
11546 %}
11547
11548 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11549 %{
11550 predicate(!UseAPX);
11551 match(Set dst (MulI dst (LoadI src)));
11552 effect(KILL cr);
11553
11554 ins_cost(350);
11555 format %{ "imull $dst, $src\t# int" %}
11556 ins_encode %{
11557 __ imull($dst$$Register, $src$$Address);
11558 %}
11559 ins_pipe(ialu_reg_mem_alu0);
11560 %}
11561
11562 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11563 %{
11564 predicate(UseAPX);
11565 match(Set dst (MulI src1 (LoadI src2)));
11566 effect(KILL cr);
11567 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11568
11569 ins_cost(350);
11570 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11571 ins_encode %{
11572 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11573 %}
11574 ins_pipe(ialu_reg_mem_alu0);
11575 %}
11576
11577 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11578 %{
11579 match(Set dst (MulI (LoadI src) imm));
11580 effect(KILL cr);
11581
11582 ins_cost(300);
11583 format %{ "imull $dst, $src, $imm\t# int" %}
11584 ins_encode %{
11585 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11586 %}
11587 ins_pipe(ialu_reg_mem_alu0);
11588 %}
11589
11590 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11591 %{
11592 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11593 effect(KILL cr, KILL src2);
11594
11595 expand %{ mulI_rReg(dst, src1, cr);
11596 mulI_rReg(src2, src3, cr);
11597 addI_rReg(dst, src2, cr); %}
11598 %}
11599
11600 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11601 %{
11602 predicate(!UseAPX);
11603 match(Set dst (MulL dst src));
11604 effect(KILL cr);
11605
11606 ins_cost(300);
11607 format %{ "imulq $dst, $src\t# long" %}
11608 ins_encode %{
11609 __ imulq($dst$$Register, $src$$Register);
11610 %}
11611 ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613
11614 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11615 %{
11616 predicate(UseAPX);
11617 match(Set dst (MulL src1 src2));
11618 effect(KILL cr);
11619 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11620
11621 ins_cost(300);
11622 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11623 ins_encode %{
11624 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11625 %}
11626 ins_pipe(ialu_reg_reg_alu0);
11627 %}
11628
11629 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11630 %{
11631 match(Set dst (MulL src imm));
11632 effect(KILL cr);
11633
11634 ins_cost(300);
11635 format %{ "imulq $dst, $src, $imm\t# long" %}
11636 ins_encode %{
11637 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11638 %}
11639 ins_pipe(ialu_reg_reg_alu0);
11640 %}
11641
11642 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11643 %{
11644 predicate(!UseAPX);
11645 match(Set dst (MulL dst (LoadL src)));
11646 effect(KILL cr);
11647
11648 ins_cost(350);
11649 format %{ "imulq $dst, $src\t# long" %}
11650 ins_encode %{
11651 __ imulq($dst$$Register, $src$$Address);
11652 %}
11653 ins_pipe(ialu_reg_mem_alu0);
11654 %}
11655
11656 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11657 %{
11658 predicate(UseAPX);
11659 match(Set dst (MulL src1 (LoadL src2)));
11660 effect(KILL cr);
11661 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11662
11663 ins_cost(350);
11664 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11665 ins_encode %{
11666 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11667 %}
11668 ins_pipe(ialu_reg_mem_alu0);
11669 %}
11670
11671 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11672 %{
11673 match(Set dst (MulL (LoadL src) imm));
11674 effect(KILL cr);
11675
11676 ins_cost(300);
11677 format %{ "imulq $dst, $src, $imm\t# long" %}
11678 ins_encode %{
11679 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11680 %}
11681 ins_pipe(ialu_reg_mem_alu0);
11682 %}
11683
11684 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11685 %{
11686 match(Set dst (MulHiL src rax));
11687 effect(USE_KILL rax, KILL cr);
11688
11689 ins_cost(300);
11690 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11691 ins_encode %{
11692 __ imulq($src$$Register);
11693 %}
11694 ins_pipe(ialu_reg_reg_alu0);
11695 %}
11696
11697 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11698 %{
11699 match(Set dst (UMulHiL src rax));
11700 effect(USE_KILL rax, KILL cr);
11701
11702 ins_cost(300);
11703 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11704 ins_encode %{
11705 __ mulq($src$$Register);
11706 %}
11707 ins_pipe(ialu_reg_reg_alu0);
11708 %}
11709
11710 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11711 rFlagsReg cr)
11712 %{
11713 match(Set rax (DivI rax div));
11714 effect(KILL rdx, KILL cr);
11715
11716 ins_cost(30*100+10*100); // XXX
11717 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11718 "jne,s normal\n\t"
11719 "xorl rdx, rdx\n\t"
11720 "cmpl $div, -1\n\t"
11721 "je,s done\n"
11722 "normal: cdql\n\t"
11723 "idivl $div\n"
11724 "done:" %}
11725 ins_encode(cdql_enc(div));
11726 ins_pipe(ialu_reg_reg_alu0);
11727 %}
11728
11729 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11730 rFlagsReg cr)
11731 %{
11732 match(Set rax (DivL rax div));
11733 effect(KILL rdx, KILL cr);
11734
11735 ins_cost(30*100+10*100); // XXX
11736 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11737 "cmpq rax, rdx\n\t"
11738 "jne,s normal\n\t"
11739 "xorl rdx, rdx\n\t"
11740 "cmpq $div, -1\n\t"
11741 "je,s done\n"
11742 "normal: cdqq\n\t"
11743 "idivq $div\n"
11744 "done:" %}
11745 ins_encode(cdqq_enc(div));
11746 ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748
11749 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11750 %{
11751 match(Set rax (UDivI rax div));
11752 effect(KILL rdx, KILL cr);
11753
11754 ins_cost(300);
11755 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11756 ins_encode %{
11757 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11758 %}
11759 ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761
11762 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11763 %{
11764 match(Set rax (UDivL rax div));
11765 effect(KILL rdx, KILL cr);
11766
11767 ins_cost(300);
11768 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11769 ins_encode %{
11770 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11771 %}
11772 ins_pipe(ialu_reg_reg_alu0);
11773 %}
11774
11775 // Integer DIVMOD with Register, both quotient and mod results
11776 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11777 rFlagsReg cr)
11778 %{
11779 match(DivModI rax div);
11780 effect(KILL cr);
11781
11782 ins_cost(30*100+10*100); // XXX
11783 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11784 "jne,s normal\n\t"
11785 "xorl rdx, rdx\n\t"
11786 "cmpl $div, -1\n\t"
11787 "je,s done\n"
11788 "normal: cdql\n\t"
11789 "idivl $div\n"
11790 "done:" %}
11791 ins_encode(cdql_enc(div));
11792 ins_pipe(pipe_slow);
11793 %}
11794
11795 // Long DIVMOD with Register, both quotient and mod results
11796 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11797 rFlagsReg cr)
11798 %{
11799 match(DivModL rax div);
11800 effect(KILL cr);
11801
11802 ins_cost(30*100+10*100); // XXX
11803 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11804 "cmpq rax, rdx\n\t"
11805 "jne,s normal\n\t"
11806 "xorl rdx, rdx\n\t"
11807 "cmpq $div, -1\n\t"
11808 "je,s done\n"
11809 "normal: cdqq\n\t"
11810 "idivq $div\n"
11811 "done:" %}
11812 ins_encode(cdqq_enc(div));
11813 ins_pipe(pipe_slow);
11814 %}
11815
11816 // Unsigned integer DIVMOD with Register, both quotient and mod results
11817 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11818 no_rax_rdx_RegI div, rFlagsReg cr)
11819 %{
11820 match(UDivModI rax div);
11821 effect(TEMP tmp, KILL cr);
11822
11823 ins_cost(300);
11824 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11825 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11826 %}
11827 ins_encode %{
11828 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11829 %}
11830 ins_pipe(pipe_slow);
11831 %}
11832
11833 // Unsigned long DIVMOD with Register, both quotient and mod results
11834 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11835 no_rax_rdx_RegL div, rFlagsReg cr)
11836 %{
11837 match(UDivModL rax div);
11838 effect(TEMP tmp, KILL cr);
11839
11840 ins_cost(300);
11841 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11842 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11843 %}
11844 ins_encode %{
11845 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11846 %}
11847 ins_pipe(pipe_slow);
11848 %}
11849
11850 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11851 rFlagsReg cr)
11852 %{
11853 match(Set rdx (ModI rax div));
11854 effect(KILL rax, KILL cr);
11855
11856 ins_cost(300); // XXX
11857 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11858 "jne,s normal\n\t"
11859 "xorl rdx, rdx\n\t"
11860 "cmpl $div, -1\n\t"
11861 "je,s done\n"
11862 "normal: cdql\n\t"
11863 "idivl $div\n"
11864 "done:" %}
11865 ins_encode(cdql_enc(div));
11866 ins_pipe(ialu_reg_reg_alu0);
11867 %}
11868
11869 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11870 rFlagsReg cr)
11871 %{
11872 match(Set rdx (ModL rax div));
11873 effect(KILL rax, KILL cr);
11874
11875 ins_cost(300); // XXX
11876 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11877 "cmpq rax, rdx\n\t"
11878 "jne,s normal\n\t"
11879 "xorl rdx, rdx\n\t"
11880 "cmpq $div, -1\n\t"
11881 "je,s done\n"
11882 "normal: cdqq\n\t"
11883 "idivq $div\n"
11884 "done:" %}
11885 ins_encode(cdqq_enc(div));
11886 ins_pipe(ialu_reg_reg_alu0);
11887 %}
11888
11889 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11890 %{
11891 match(Set rdx (UModI rax div));
11892 effect(KILL rax, KILL cr);
11893
11894 ins_cost(300);
11895 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11896 ins_encode %{
11897 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11898 %}
11899 ins_pipe(ialu_reg_reg_alu0);
11900 %}
11901
11902 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11903 %{
11904 match(Set rdx (UModL rax div));
11905 effect(KILL rax, KILL cr);
11906
11907 ins_cost(300);
11908 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11909 ins_encode %{
11910 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11911 %}
11912 ins_pipe(ialu_reg_reg_alu0);
11913 %}
11914
11915 // Integer Shift Instructions
11916 // Shift Left by one, two, three
11917 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11918 %{
11919 predicate(!UseAPX);
11920 match(Set dst (LShiftI dst shift));
11921 effect(KILL cr);
11922
11923 format %{ "sall $dst, $shift" %}
11924 ins_encode %{
11925 __ sall($dst$$Register, $shift$$constant);
11926 %}
11927 ins_pipe(ialu_reg);
11928 %}
11929
11930 // Shift Left by one, two, three
11931 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11932 %{
11933 predicate(UseAPX);
11934 match(Set dst (LShiftI src shift));
11935 effect(KILL cr);
11936 flag(PD::Flag_ndd_demotable_opr1);
11937
11938 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11939 ins_encode %{
11940 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11941 %}
11942 ins_pipe(ialu_reg);
11943 %}
11944
11945 // Shift Left by 8-bit immediate
11946 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11947 %{
11948 predicate(!UseAPX);
11949 match(Set dst (LShiftI dst shift));
11950 effect(KILL cr);
11951
11952 format %{ "sall $dst, $shift" %}
11953 ins_encode %{
11954 __ sall($dst$$Register, $shift$$constant);
11955 %}
11956 ins_pipe(ialu_reg);
11957 %}
11958
11959 // Shift Left by 8-bit immediate
11960 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11961 %{
11962 predicate(UseAPX);
11963 match(Set dst (LShiftI src shift));
11964 effect(KILL cr);
11965 flag(PD::Flag_ndd_demotable_opr1);
11966
11967 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11968 ins_encode %{
11969 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11970 %}
11971 ins_pipe(ialu_reg);
11972 %}
11973
11974 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11975 %{
11976 predicate(UseAPX);
11977 match(Set dst (LShiftI (LoadI src) shift));
11978 effect(KILL cr);
11979
11980 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11981 ins_encode %{
11982 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11983 %}
11984 ins_pipe(ialu_reg);
11985 %}
11986
11987 // Shift Left by 8-bit immediate
11988 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11989 %{
11990 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11991 effect(KILL cr);
11992
11993 format %{ "sall $dst, $shift" %}
11994 ins_encode %{
11995 __ sall($dst$$Address, $shift$$constant);
11996 %}
11997 ins_pipe(ialu_mem_imm);
11998 %}
11999
12000 // Shift Left by variable
12001 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12002 %{
12003 predicate(!VM_Version::supports_bmi2());
12004 match(Set dst (LShiftI dst shift));
12005 effect(KILL cr);
12006
12007 format %{ "sall $dst, $shift" %}
12008 ins_encode %{
12009 __ sall($dst$$Register);
12010 %}
12011 ins_pipe(ialu_reg_reg);
12012 %}
12013
12014 // Shift Left by variable
12015 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12016 %{
12017 predicate(!VM_Version::supports_bmi2());
12018 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12019 effect(KILL cr);
12020
12021 format %{ "sall $dst, $shift" %}
12022 ins_encode %{
12023 __ sall($dst$$Address);
12024 %}
12025 ins_pipe(ialu_mem_reg);
12026 %}
12027
12028 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12029 %{
12030 predicate(VM_Version::supports_bmi2());
12031 match(Set dst (LShiftI src shift));
12032
12033 format %{ "shlxl $dst, $src, $shift" %}
12034 ins_encode %{
12035 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12036 %}
12037 ins_pipe(ialu_reg_reg);
12038 %}
12039
12040 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12041 %{
12042 predicate(VM_Version::supports_bmi2());
12043 match(Set dst (LShiftI (LoadI src) shift));
12044 ins_cost(175);
12045 format %{ "shlxl $dst, $src, $shift" %}
12046 ins_encode %{
12047 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12048 %}
12049 ins_pipe(ialu_reg_mem);
12050 %}
12051
12052 // Arithmetic Shift Right by 8-bit immediate
12053 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12054 %{
12055 predicate(!UseAPX);
12056 match(Set dst (RShiftI dst shift));
12057 effect(KILL cr);
12058
12059 format %{ "sarl $dst, $shift" %}
12060 ins_encode %{
12061 __ sarl($dst$$Register, $shift$$constant);
12062 %}
12063 ins_pipe(ialu_mem_imm);
12064 %}
12065
12066 // Arithmetic Shift Right by 8-bit immediate
12067 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12068 %{
12069 predicate(UseAPX);
12070 match(Set dst (RShiftI src shift));
12071 effect(KILL cr);
12072 flag(PD::Flag_ndd_demotable_opr1);
12073
12074 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12075 ins_encode %{
12076 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12077 %}
12078 ins_pipe(ialu_mem_imm);
12079 %}
12080
12081 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12082 %{
12083 predicate(UseAPX);
12084 match(Set dst (RShiftI (LoadI src) shift));
12085 effect(KILL cr);
12086
12087 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12088 ins_encode %{
12089 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12090 %}
12091 ins_pipe(ialu_mem_imm);
12092 %}
12093
12094 // Arithmetic Shift Right by 8-bit immediate
12095 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12096 %{
12097 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12098 effect(KILL cr);
12099
12100 format %{ "sarl $dst, $shift" %}
12101 ins_encode %{
12102 __ sarl($dst$$Address, $shift$$constant);
12103 %}
12104 ins_pipe(ialu_mem_imm);
12105 %}
12106
12107 // Arithmetic Shift Right by variable
12108 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12109 %{
12110 predicate(!VM_Version::supports_bmi2());
12111 match(Set dst (RShiftI dst shift));
12112 effect(KILL cr);
12113
12114 format %{ "sarl $dst, $shift" %}
12115 ins_encode %{
12116 __ sarl($dst$$Register);
12117 %}
12118 ins_pipe(ialu_reg_reg);
12119 %}
12120
12121 // Arithmetic Shift Right by variable
12122 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12123 %{
12124 predicate(!VM_Version::supports_bmi2());
12125 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12126 effect(KILL cr);
12127
12128 format %{ "sarl $dst, $shift" %}
12129 ins_encode %{
12130 __ sarl($dst$$Address);
12131 %}
12132 ins_pipe(ialu_mem_reg);
12133 %}
12134
12135 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12136 %{
12137 predicate(VM_Version::supports_bmi2());
12138 match(Set dst (RShiftI src shift));
12139
12140 format %{ "sarxl $dst, $src, $shift" %}
12141 ins_encode %{
12142 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12143 %}
12144 ins_pipe(ialu_reg_reg);
12145 %}
12146
12147 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12148 %{
12149 predicate(VM_Version::supports_bmi2());
12150 match(Set dst (RShiftI (LoadI src) shift));
12151 ins_cost(175);
12152 format %{ "sarxl $dst, $src, $shift" %}
12153 ins_encode %{
12154 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12155 %}
12156 ins_pipe(ialu_reg_mem);
12157 %}
12158
12159 // Logical Shift Right by 8-bit immediate
12160 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12161 %{
12162 predicate(!UseAPX);
12163 match(Set dst (URShiftI dst shift));
12164 effect(KILL cr);
12165
12166 format %{ "shrl $dst, $shift" %}
12167 ins_encode %{
12168 __ shrl($dst$$Register, $shift$$constant);
12169 %}
12170 ins_pipe(ialu_reg);
12171 %}
12172
12173 // Logical Shift Right by 8-bit immediate
12174 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12175 %{
12176 predicate(UseAPX);
12177 match(Set dst (URShiftI src shift));
12178 effect(KILL cr);
12179 flag(PD::Flag_ndd_demotable_opr1);
12180
12181 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12182 ins_encode %{
12183 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12184 %}
12185 ins_pipe(ialu_reg);
12186 %}
12187
12188 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12189 %{
12190 predicate(UseAPX);
12191 match(Set dst (URShiftI (LoadI src) shift));
12192 effect(KILL cr);
12193
12194 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12195 ins_encode %{
12196 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12197 %}
12198 ins_pipe(ialu_reg);
12199 %}
12200
12201 // Logical Shift Right by 8-bit immediate
12202 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12203 %{
12204 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12205 effect(KILL cr);
12206
12207 format %{ "shrl $dst, $shift" %}
12208 ins_encode %{
12209 __ shrl($dst$$Address, $shift$$constant);
12210 %}
12211 ins_pipe(ialu_mem_imm);
12212 %}
12213
12214 // Logical Shift Right by variable
12215 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12216 %{
12217 predicate(!VM_Version::supports_bmi2());
12218 match(Set dst (URShiftI dst shift));
12219 effect(KILL cr);
12220
12221 format %{ "shrl $dst, $shift" %}
12222 ins_encode %{
12223 __ shrl($dst$$Register);
12224 %}
12225 ins_pipe(ialu_reg_reg);
12226 %}
12227
12228 // Logical Shift Right by variable
12229 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12230 %{
12231 predicate(!VM_Version::supports_bmi2());
12232 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12233 effect(KILL cr);
12234
12235 format %{ "shrl $dst, $shift" %}
12236 ins_encode %{
12237 __ shrl($dst$$Address);
12238 %}
12239 ins_pipe(ialu_mem_reg);
12240 %}
12241
12242 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12243 %{
12244 predicate(VM_Version::supports_bmi2());
12245 match(Set dst (URShiftI src shift));
12246
12247 format %{ "shrxl $dst, $src, $shift" %}
12248 ins_encode %{
12249 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12250 %}
12251 ins_pipe(ialu_reg_reg);
12252 %}
12253
12254 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12255 %{
12256 predicate(VM_Version::supports_bmi2());
12257 match(Set dst (URShiftI (LoadI src) shift));
12258 ins_cost(175);
12259 format %{ "shrxl $dst, $src, $shift" %}
12260 ins_encode %{
12261 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12262 %}
12263 ins_pipe(ialu_reg_mem);
12264 %}
12265
12266 // Long Shift Instructions
12267 // Shift Left by one, two, three
12268 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12269 %{
12270 predicate(!UseAPX);
12271 match(Set dst (LShiftL dst shift));
12272 effect(KILL cr);
12273
12274 format %{ "salq $dst, $shift" %}
12275 ins_encode %{
12276 __ salq($dst$$Register, $shift$$constant);
12277 %}
12278 ins_pipe(ialu_reg);
12279 %}
12280
12281 // Shift Left by one, two, three
12282 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12283 %{
12284 predicate(UseAPX);
12285 match(Set dst (LShiftL src shift));
12286 effect(KILL cr);
12287 flag(PD::Flag_ndd_demotable_opr1);
12288
12289 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12290 ins_encode %{
12291 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12292 %}
12293 ins_pipe(ialu_reg);
12294 %}
12295
12296 // Shift Left by 8-bit immediate
12297 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12298 %{
12299 predicate(!UseAPX);
12300 match(Set dst (LShiftL dst shift));
12301 effect(KILL cr);
12302
12303 format %{ "salq $dst, $shift" %}
12304 ins_encode %{
12305 __ salq($dst$$Register, $shift$$constant);
12306 %}
12307 ins_pipe(ialu_reg);
12308 %}
12309
12310 // Shift Left by 8-bit immediate
12311 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12312 %{
12313 predicate(UseAPX);
12314 match(Set dst (LShiftL src shift));
12315 effect(KILL cr);
12316 flag(PD::Flag_ndd_demotable_opr1);
12317
12318 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12319 ins_encode %{
12320 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12321 %}
12322 ins_pipe(ialu_reg);
12323 %}
12324
12325 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12326 %{
12327 predicate(UseAPX);
12328 match(Set dst (LShiftL (LoadL src) shift));
12329 effect(KILL cr);
12330
12331 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12332 ins_encode %{
12333 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12334 %}
12335 ins_pipe(ialu_reg);
12336 %}
12337
12338 // Shift Left by 8-bit immediate
12339 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12340 %{
12341 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12342 effect(KILL cr);
12343
12344 format %{ "salq $dst, $shift" %}
12345 ins_encode %{
12346 __ salq($dst$$Address, $shift$$constant);
12347 %}
12348 ins_pipe(ialu_mem_imm);
12349 %}
12350
12351 // Shift Left by variable
12352 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12353 %{
12354 predicate(!VM_Version::supports_bmi2());
12355 match(Set dst (LShiftL dst shift));
12356 effect(KILL cr);
12357
12358 format %{ "salq $dst, $shift" %}
12359 ins_encode %{
12360 __ salq($dst$$Register);
12361 %}
12362 ins_pipe(ialu_reg_reg);
12363 %}
12364
12365 // Shift Left by variable
12366 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12367 %{
12368 predicate(!VM_Version::supports_bmi2());
12369 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12370 effect(KILL cr);
12371
12372 format %{ "salq $dst, $shift" %}
12373 ins_encode %{
12374 __ salq($dst$$Address);
12375 %}
12376 ins_pipe(ialu_mem_reg);
12377 %}
12378
12379 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12380 %{
12381 predicate(VM_Version::supports_bmi2());
12382 match(Set dst (LShiftL src shift));
12383
12384 format %{ "shlxq $dst, $src, $shift" %}
12385 ins_encode %{
12386 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12387 %}
12388 ins_pipe(ialu_reg_reg);
12389 %}
12390
12391 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12392 %{
12393 predicate(VM_Version::supports_bmi2());
12394 match(Set dst (LShiftL (LoadL src) shift));
12395 ins_cost(175);
12396 format %{ "shlxq $dst, $src, $shift" %}
12397 ins_encode %{
12398 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12399 %}
12400 ins_pipe(ialu_reg_mem);
12401 %}
12402
12403 // Arithmetic Shift Right by 8-bit immediate
12404 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12405 %{
12406 predicate(!UseAPX);
12407 match(Set dst (RShiftL dst shift));
12408 effect(KILL cr);
12409
12410 format %{ "sarq $dst, $shift" %}
12411 ins_encode %{
12412 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12413 %}
12414 ins_pipe(ialu_mem_imm);
12415 %}
12416
12417 // Arithmetic Shift Right by 8-bit immediate
12418 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12419 %{
12420 predicate(UseAPX);
12421 match(Set dst (RShiftL src shift));
12422 effect(KILL cr);
12423 flag(PD::Flag_ndd_demotable_opr1);
12424
12425 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12426 ins_encode %{
12427 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12428 %}
12429 ins_pipe(ialu_mem_imm);
12430 %}
12431
12432 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12433 %{
12434 predicate(UseAPX);
12435 match(Set dst (RShiftL (LoadL src) shift));
12436 effect(KILL cr);
12437
12438 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12439 ins_encode %{
12440 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12441 %}
12442 ins_pipe(ialu_mem_imm);
12443 %}
12444
12445 // Arithmetic Shift Right by 8-bit immediate
12446 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12447 %{
12448 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12449 effect(KILL cr);
12450
12451 format %{ "sarq $dst, $shift" %}
12452 ins_encode %{
12453 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12454 %}
12455 ins_pipe(ialu_mem_imm);
12456 %}
12457
12458 // Arithmetic Shift Right by variable
12459 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12460 %{
12461 predicate(!VM_Version::supports_bmi2());
12462 match(Set dst (RShiftL dst shift));
12463 effect(KILL cr);
12464
12465 format %{ "sarq $dst, $shift" %}
12466 ins_encode %{
12467 __ sarq($dst$$Register);
12468 %}
12469 ins_pipe(ialu_reg_reg);
12470 %}
12471
12472 // Arithmetic Shift Right by variable
12473 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12474 %{
12475 predicate(!VM_Version::supports_bmi2());
12476 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12477 effect(KILL cr);
12478
12479 format %{ "sarq $dst, $shift" %}
12480 ins_encode %{
12481 __ sarq($dst$$Address);
12482 %}
12483 ins_pipe(ialu_mem_reg);
12484 %}
12485
12486 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12487 %{
12488 predicate(VM_Version::supports_bmi2());
12489 match(Set dst (RShiftL src shift));
12490
12491 format %{ "sarxq $dst, $src, $shift" %}
12492 ins_encode %{
12493 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12494 %}
12495 ins_pipe(ialu_reg_reg);
12496 %}
12497
12498 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12499 %{
12500 predicate(VM_Version::supports_bmi2());
12501 match(Set dst (RShiftL (LoadL src) shift));
12502 ins_cost(175);
12503 format %{ "sarxq $dst, $src, $shift" %}
12504 ins_encode %{
12505 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12506 %}
12507 ins_pipe(ialu_reg_mem);
12508 %}
12509
12510 // Logical Shift Right by 8-bit immediate
12511 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12512 %{
12513 predicate(!UseAPX);
12514 match(Set dst (URShiftL dst shift));
12515 effect(KILL cr);
12516
12517 format %{ "shrq $dst, $shift" %}
12518 ins_encode %{
12519 __ shrq($dst$$Register, $shift$$constant);
12520 %}
12521 ins_pipe(ialu_reg);
12522 %}
12523
12524 // Logical Shift Right by 8-bit immediate
12525 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12526 %{
12527 predicate(UseAPX);
12528 match(Set dst (URShiftL src shift));
12529 effect(KILL cr);
12530 flag(PD::Flag_ndd_demotable_opr1);
12531
12532 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12533 ins_encode %{
12534 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12535 %}
12536 ins_pipe(ialu_reg);
12537 %}
12538
12539 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12540 %{
12541 predicate(UseAPX);
12542 match(Set dst (URShiftL (LoadL src) shift));
12543 effect(KILL cr);
12544
12545 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12546 ins_encode %{
12547 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12548 %}
12549 ins_pipe(ialu_reg);
12550 %}
12551
12552 // Logical Shift Right by 8-bit immediate
12553 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12554 %{
12555 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12556 effect(KILL cr);
12557
12558 format %{ "shrq $dst, $shift" %}
12559 ins_encode %{
12560 __ shrq($dst$$Address, $shift$$constant);
12561 %}
12562 ins_pipe(ialu_mem_imm);
12563 %}
12564
12565 // Logical Shift Right by variable
12566 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12567 %{
12568 predicate(!VM_Version::supports_bmi2());
12569 match(Set dst (URShiftL dst shift));
12570 effect(KILL cr);
12571
12572 format %{ "shrq $dst, $shift" %}
12573 ins_encode %{
12574 __ shrq($dst$$Register);
12575 %}
12576 ins_pipe(ialu_reg_reg);
12577 %}
12578
12579 // Logical Shift Right by variable
12580 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12581 %{
12582 predicate(!VM_Version::supports_bmi2());
12583 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12584 effect(KILL cr);
12585
12586 format %{ "shrq $dst, $shift" %}
12587 ins_encode %{
12588 __ shrq($dst$$Address);
12589 %}
12590 ins_pipe(ialu_mem_reg);
12591 %}
12592
12593 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12594 %{
12595 predicate(VM_Version::supports_bmi2());
12596 match(Set dst (URShiftL src shift));
12597
12598 format %{ "shrxq $dst, $src, $shift" %}
12599 ins_encode %{
12600 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12601 %}
12602 ins_pipe(ialu_reg_reg);
12603 %}
12604
12605 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12606 %{
12607 predicate(VM_Version::supports_bmi2());
12608 match(Set dst (URShiftL (LoadL src) shift));
12609 ins_cost(175);
12610 format %{ "shrxq $dst, $src, $shift" %}
12611 ins_encode %{
12612 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12613 %}
12614 ins_pipe(ialu_reg_mem);
12615 %}
12616
12617 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12618 // This idiom is used by the compiler for the i2b bytecode.
12619 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12620 %{
12621 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12622
12623 format %{ "movsbl $dst, $src\t# i2b" %}
12624 ins_encode %{
12625 __ movsbl($dst$$Register, $src$$Register);
12626 %}
12627 ins_pipe(ialu_reg_reg);
12628 %}
12629
12630 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12631 // This idiom is used by the compiler the i2s bytecode.
12632 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12633 %{
12634 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12635
12636 format %{ "movswl $dst, $src\t# i2s" %}
12637 ins_encode %{
12638 __ movswl($dst$$Register, $src$$Register);
12639 %}
12640 ins_pipe(ialu_reg_reg);
12641 %}
12642
12643 // ROL/ROR instructions
12644
12645 // Rotate left by constant.
12646 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12647 %{
12648 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12649 match(Set dst (RotateLeft dst shift));
12650 effect(KILL cr);
12651 format %{ "roll $dst, $shift" %}
12652 ins_encode %{
12653 __ roll($dst$$Register, $shift$$constant);
12654 %}
12655 ins_pipe(ialu_reg);
12656 %}
12657
12658 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12659 %{
12660 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12661 match(Set dst (RotateLeft src shift));
12662 format %{ "rolxl $dst, $src, $shift" %}
12663 ins_encode %{
12664 int shift = 32 - ($shift$$constant & 31);
12665 __ rorxl($dst$$Register, $src$$Register, shift);
12666 %}
12667 ins_pipe(ialu_reg_reg);
12668 %}
12669
12670 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12671 %{
12672 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12673 match(Set dst (RotateLeft (LoadI src) shift));
12674 ins_cost(175);
12675 format %{ "rolxl $dst, $src, $shift" %}
12676 ins_encode %{
12677 int shift = 32 - ($shift$$constant & 31);
12678 __ rorxl($dst$$Register, $src$$Address, shift);
12679 %}
12680 ins_pipe(ialu_reg_mem);
12681 %}
12682
12683 // Rotate Left by variable
12684 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12685 %{
12686 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12687 match(Set dst (RotateLeft dst shift));
12688 effect(KILL cr);
12689 format %{ "roll $dst, $shift" %}
12690 ins_encode %{
12691 __ roll($dst$$Register);
12692 %}
12693 ins_pipe(ialu_reg_reg);
12694 %}
12695
12696 // Rotate Left by variable
12697 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12698 %{
12699 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12700 match(Set dst (RotateLeft src shift));
12701 effect(KILL cr);
12702 flag(PD::Flag_ndd_demotable_opr1);
12703
12704 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12705 ins_encode %{
12706 __ eroll($dst$$Register, $src$$Register, false);
12707 %}
12708 ins_pipe(ialu_reg_reg);
12709 %}
12710
12711 // Rotate Right by constant.
12712 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12713 %{
12714 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12715 match(Set dst (RotateRight dst shift));
12716 effect(KILL cr);
12717 format %{ "rorl $dst, $shift" %}
12718 ins_encode %{
12719 __ rorl($dst$$Register, $shift$$constant);
12720 %}
12721 ins_pipe(ialu_reg);
12722 %}
12723
12724 // Rotate Right by constant.
12725 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12726 %{
12727 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728 match(Set dst (RotateRight src shift));
12729 format %{ "rorxl $dst, $src, $shift" %}
12730 ins_encode %{
12731 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12732 %}
12733 ins_pipe(ialu_reg_reg);
12734 %}
12735
12736 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12737 %{
12738 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12739 match(Set dst (RotateRight (LoadI src) shift));
12740 ins_cost(175);
12741 format %{ "rorxl $dst, $src, $shift" %}
12742 ins_encode %{
12743 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12744 %}
12745 ins_pipe(ialu_reg_mem);
12746 %}
12747
12748 // Rotate Right by variable
12749 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12750 %{
12751 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12752 match(Set dst (RotateRight dst shift));
12753 effect(KILL cr);
12754 format %{ "rorl $dst, $shift" %}
12755 ins_encode %{
12756 __ rorl($dst$$Register);
12757 %}
12758 ins_pipe(ialu_reg_reg);
12759 %}
12760
12761 // Rotate Right by variable
12762 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12763 %{
12764 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12765 match(Set dst (RotateRight src shift));
12766 effect(KILL cr);
12767 flag(PD::Flag_ndd_demotable_opr1);
12768
12769 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12770 ins_encode %{
12771 __ erorl($dst$$Register, $src$$Register, false);
12772 %}
12773 ins_pipe(ialu_reg_reg);
12774 %}
12775
12776 // Rotate Left by constant.
12777 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12778 %{
12779 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12780 match(Set dst (RotateLeft dst shift));
12781 effect(KILL cr);
12782 format %{ "rolq $dst, $shift" %}
12783 ins_encode %{
12784 __ rolq($dst$$Register, $shift$$constant);
12785 %}
12786 ins_pipe(ialu_reg);
12787 %}
12788
12789 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12790 %{
12791 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12792 match(Set dst (RotateLeft src shift));
12793 format %{ "rolxq $dst, $src, $shift" %}
12794 ins_encode %{
12795 int shift = 64 - ($shift$$constant & 63);
12796 __ rorxq($dst$$Register, $src$$Register, shift);
12797 %}
12798 ins_pipe(ialu_reg_reg);
12799 %}
12800
12801 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12802 %{
12803 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12804 match(Set dst (RotateLeft (LoadL src) shift));
12805 ins_cost(175);
12806 format %{ "rolxq $dst, $src, $shift" %}
12807 ins_encode %{
12808 int shift = 64 - ($shift$$constant & 63);
12809 __ rorxq($dst$$Register, $src$$Address, shift);
12810 %}
12811 ins_pipe(ialu_reg_mem);
12812 %}
12813
12814 // Rotate Left by variable
12815 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12816 %{
12817 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12818 match(Set dst (RotateLeft dst shift));
12819 effect(KILL cr);
12820
12821 format %{ "rolq $dst, $shift" %}
12822 ins_encode %{
12823 __ rolq($dst$$Register);
12824 %}
12825 ins_pipe(ialu_reg_reg);
12826 %}
12827
12828 // Rotate Left by variable
12829 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12830 %{
12831 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12832 match(Set dst (RotateLeft src shift));
12833 effect(KILL cr);
12834 flag(PD::Flag_ndd_demotable_opr1);
12835
12836 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12837 ins_encode %{
12838 __ erolq($dst$$Register, $src$$Register, false);
12839 %}
12840 ins_pipe(ialu_reg_reg);
12841 %}
12842
12843 // Rotate Right by constant.
12844 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12845 %{
12846 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12847 match(Set dst (RotateRight dst shift));
12848 effect(KILL cr);
12849 format %{ "rorq $dst, $shift" %}
12850 ins_encode %{
12851 __ rorq($dst$$Register, $shift$$constant);
12852 %}
12853 ins_pipe(ialu_reg);
12854 %}
12855
12856 // Rotate Right by constant
12857 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12858 %{
12859 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12860 match(Set dst (RotateRight src shift));
12861 format %{ "rorxq $dst, $src, $shift" %}
12862 ins_encode %{
12863 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12864 %}
12865 ins_pipe(ialu_reg_reg);
12866 %}
12867
12868 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12869 %{
12870 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12871 match(Set dst (RotateRight (LoadL src) shift));
12872 ins_cost(175);
12873 format %{ "rorxq $dst, $src, $shift" %}
12874 ins_encode %{
12875 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12876 %}
12877 ins_pipe(ialu_reg_mem);
12878 %}
12879
12880 // Rotate Right by variable
12881 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12882 %{
12883 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12884 match(Set dst (RotateRight dst shift));
12885 effect(KILL cr);
12886 format %{ "rorq $dst, $shift" %}
12887 ins_encode %{
12888 __ rorq($dst$$Register);
12889 %}
12890 ins_pipe(ialu_reg_reg);
12891 %}
12892
12893 // Rotate Right by variable
12894 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12895 %{
12896 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12897 match(Set dst (RotateRight src shift));
12898 effect(KILL cr);
12899 flag(PD::Flag_ndd_demotable_opr1);
12900
12901 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12902 ins_encode %{
12903 __ erorq($dst$$Register, $src$$Register, false);
12904 %}
12905 ins_pipe(ialu_reg_reg);
12906 %}
12907
12908 //----------------------------- CompressBits/ExpandBits ------------------------
12909
12910 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12911 predicate(n->bottom_type()->isa_long());
12912 match(Set dst (CompressBits src mask));
12913 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12914 ins_encode %{
12915 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12916 %}
12917 ins_pipe( pipe_slow );
12918 %}
12919
12920 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12921 predicate(n->bottom_type()->isa_long());
12922 match(Set dst (ExpandBits src mask));
12923 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12924 ins_encode %{
12925 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12926 %}
12927 ins_pipe( pipe_slow );
12928 %}
12929
12930 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12931 predicate(n->bottom_type()->isa_long());
12932 match(Set dst (CompressBits src (LoadL mask)));
12933 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12934 ins_encode %{
12935 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12936 %}
12937 ins_pipe( pipe_slow );
12938 %}
12939
12940 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12941 predicate(n->bottom_type()->isa_long());
12942 match(Set dst (ExpandBits src (LoadL mask)));
12943 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12944 ins_encode %{
12945 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12946 %}
12947 ins_pipe( pipe_slow );
12948 %}
12949
12950
12951 // Logical Instructions
12952
12953 // Integer Logical Instructions
12954
12955 // And Instructions
12956 // And Register with Register
12957 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12958 %{
12959 predicate(!UseAPX);
12960 match(Set dst (AndI dst src));
12961 effect(KILL cr);
12962 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12963
12964 format %{ "andl $dst, $src\t# int" %}
12965 ins_encode %{
12966 __ andl($dst$$Register, $src$$Register);
12967 %}
12968 ins_pipe(ialu_reg_reg);
12969 %}
12970
12971 // And Register with Register using New Data Destination (NDD)
12972 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12973 %{
12974 predicate(UseAPX);
12975 match(Set dst (AndI src1 src2));
12976 effect(KILL cr);
12977 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12978
12979 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12980 ins_encode %{
12981 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12982
12983 %}
12984 ins_pipe(ialu_reg_reg);
12985 %}
12986
12987 // And Register with Immediate 255
12988 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12989 %{
12990 match(Set dst (AndI src mask));
12991
12992 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12993 ins_encode %{
12994 __ movzbl($dst$$Register, $src$$Register);
12995 %}
12996 ins_pipe(ialu_reg);
12997 %}
12998
12999 // And Register with Immediate 255 and promote to long
13000 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13001 %{
13002 match(Set dst (ConvI2L (AndI src mask)));
13003
13004 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13005 ins_encode %{
13006 __ movzbl($dst$$Register, $src$$Register);
13007 %}
13008 ins_pipe(ialu_reg);
13009 %}
13010
13011 // And Register with Immediate 65535
13012 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13013 %{
13014 match(Set dst (AndI src mask));
13015
13016 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13017 ins_encode %{
13018 __ movzwl($dst$$Register, $src$$Register);
13019 %}
13020 ins_pipe(ialu_reg);
13021 %}
13022
13023 // And Register with Immediate 65535 and promote to long
13024 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13025 %{
13026 match(Set dst (ConvI2L (AndI src mask)));
13027
13028 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13029 ins_encode %{
13030 __ movzwl($dst$$Register, $src$$Register);
13031 %}
13032 ins_pipe(ialu_reg);
13033 %}
13034
13035 // Can skip int2long conversions after AND with small bitmask
13036 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13037 %{
13038 predicate(VM_Version::supports_bmi2());
13039 ins_cost(125);
13040 effect(TEMP tmp, KILL cr);
13041 match(Set dst (ConvI2L (AndI src mask)));
13042 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13043 ins_encode %{
13044 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13045 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13046 %}
13047 ins_pipe(ialu_reg_reg);
13048 %}
13049
13050 // And Register with Immediate
13051 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13052 %{
13053 predicate(!UseAPX);
13054 match(Set dst (AndI dst src));
13055 effect(KILL cr);
13056 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13057
13058 format %{ "andl $dst, $src\t# int" %}
13059 ins_encode %{
13060 __ andl($dst$$Register, $src$$constant);
13061 %}
13062 ins_pipe(ialu_reg);
13063 %}
13064
13065 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13066 %{
13067 predicate(UseAPX);
13068 match(Set dst (AndI src1 src2));
13069 effect(KILL cr);
13070 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13071
13072 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13073 ins_encode %{
13074 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13075 %}
13076 ins_pipe(ialu_reg);
13077 %}
13078
13079 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13080 %{
13081 predicate(UseAPX);
13082 match(Set dst (AndI (LoadI src1) src2));
13083 effect(KILL cr);
13084 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13085
13086 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13087 ins_encode %{
13088 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13089 %}
13090 ins_pipe(ialu_reg);
13091 %}
13092
13093 // And Register with Memory
13094 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13095 %{
13096 predicate(!UseAPX);
13097 match(Set dst (AndI dst (LoadI src)));
13098 effect(KILL cr);
13099 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13100
13101 ins_cost(150);
13102 format %{ "andl $dst, $src\t# int" %}
13103 ins_encode %{
13104 __ andl($dst$$Register, $src$$Address);
13105 %}
13106 ins_pipe(ialu_reg_mem);
13107 %}
13108
13109 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13110 %{
13111 predicate(UseAPX);
13112 match(Set dst (AndI src1 (LoadI src2)));
13113 effect(KILL cr);
13114 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13115
13116 ins_cost(150);
13117 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13118 ins_encode %{
13119 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13120 %}
13121 ins_pipe(ialu_reg_mem);
13122 %}
13123
13124 // And Memory with Register
13125 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13126 %{
13127 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13128 effect(KILL cr);
13129 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13130
13131 ins_cost(150);
13132 format %{ "andb $dst, $src\t# byte" %}
13133 ins_encode %{
13134 __ andb($dst$$Address, $src$$Register);
13135 %}
13136 ins_pipe(ialu_mem_reg);
13137 %}
13138
13139 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13140 %{
13141 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13142 effect(KILL cr);
13143 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13144
13145 ins_cost(150);
13146 format %{ "andl $dst, $src\t# int" %}
13147 ins_encode %{
13148 __ andl($dst$$Address, $src$$Register);
13149 %}
13150 ins_pipe(ialu_mem_reg);
13151 %}
13152
13153 // And Memory with Immediate
13154 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13155 %{
13156 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13157 effect(KILL cr);
13158 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13159
13160 ins_cost(125);
13161 format %{ "andl $dst, $src\t# int" %}
13162 ins_encode %{
13163 __ andl($dst$$Address, $src$$constant);
13164 %}
13165 ins_pipe(ialu_mem_imm);
13166 %}
13167
13168 // BMI1 instructions
13169 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13170 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13171 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13172 effect(KILL cr);
13173 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13174
13175 ins_cost(125);
13176 format %{ "andnl $dst, $src1, $src2" %}
13177
13178 ins_encode %{
13179 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13180 %}
13181 ins_pipe(ialu_reg_mem);
13182 %}
13183
13184 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13185 match(Set dst (AndI (XorI src1 minus_1) src2));
13186 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13187 effect(KILL cr);
13188 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13189
13190 format %{ "andnl $dst, $src1, $src2" %}
13191
13192 ins_encode %{
13193 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13194 %}
13195 ins_pipe(ialu_reg);
13196 %}
13197
13198 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13199 match(Set dst (AndI (SubI imm_zero src) src));
13200 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13201 effect(KILL cr);
13202 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13203
13204 format %{ "blsil $dst, $src" %}
13205
13206 ins_encode %{
13207 __ blsil($dst$$Register, $src$$Register);
13208 %}
13209 ins_pipe(ialu_reg);
13210 %}
13211
13212 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13213 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13214 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13215 effect(KILL cr);
13216 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13217
13218 ins_cost(125);
13219 format %{ "blsil $dst, $src" %}
13220
13221 ins_encode %{
13222 __ blsil($dst$$Register, $src$$Address);
13223 %}
13224 ins_pipe(ialu_reg_mem);
13225 %}
13226
13227 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13228 %{
13229 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13230 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13231 effect(KILL cr);
13232 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13233
13234 ins_cost(125);
13235 format %{ "blsmskl $dst, $src" %}
13236
13237 ins_encode %{
13238 __ blsmskl($dst$$Register, $src$$Address);
13239 %}
13240 ins_pipe(ialu_reg_mem);
13241 %}
13242
13243 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13244 %{
13245 match(Set dst (XorI (AddI src minus_1) src));
13246 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13247 effect(KILL cr);
13248 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13249
13250 format %{ "blsmskl $dst, $src" %}
13251
13252 ins_encode %{
13253 __ blsmskl($dst$$Register, $src$$Register);
13254 %}
13255
13256 ins_pipe(ialu_reg);
13257 %}
13258
13259 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13260 %{
13261 match(Set dst (AndI (AddI src minus_1) src) );
13262 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13263 effect(KILL cr);
13264 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13265
13266 format %{ "blsrl $dst, $src" %}
13267
13268 ins_encode %{
13269 __ blsrl($dst$$Register, $src$$Register);
13270 %}
13271
13272 ins_pipe(ialu_reg_mem);
13273 %}
13274
13275 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13276 %{
13277 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13278 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13279 effect(KILL cr);
13280 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13281
13282 ins_cost(125);
13283 format %{ "blsrl $dst, $src" %}
13284
13285 ins_encode %{
13286 __ blsrl($dst$$Register, $src$$Address);
13287 %}
13288
13289 ins_pipe(ialu_reg);
13290 %}
13291
13292 // Or Instructions
13293 // Or Register with Register
13294 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13295 %{
13296 predicate(!UseAPX);
13297 match(Set dst (OrI dst src));
13298 effect(KILL cr);
13299 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13300
13301 format %{ "orl $dst, $src\t# int" %}
13302 ins_encode %{
13303 __ orl($dst$$Register, $src$$Register);
13304 %}
13305 ins_pipe(ialu_reg_reg);
13306 %}
13307
13308 // Or Register with Register using New Data Destination (NDD)
13309 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13310 %{
13311 predicate(UseAPX);
13312 match(Set dst (OrI src1 src2));
13313 effect(KILL cr);
13314 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13315
13316 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13317 ins_encode %{
13318 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13319 %}
13320 ins_pipe(ialu_reg_reg);
13321 %}
13322
13323 // Or Register with Immediate
13324 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13325 %{
13326 predicate(!UseAPX);
13327 match(Set dst (OrI dst src));
13328 effect(KILL cr);
13329 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13330
13331 format %{ "orl $dst, $src\t# int" %}
13332 ins_encode %{
13333 __ orl($dst$$Register, $src$$constant);
13334 %}
13335 ins_pipe(ialu_reg);
13336 %}
13337
13338 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13339 %{
13340 predicate(UseAPX);
13341 match(Set dst (OrI src1 src2));
13342 effect(KILL cr);
13343 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13344
13345 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13346 ins_encode %{
13347 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13348 %}
13349 ins_pipe(ialu_reg);
13350 %}
13351
13352 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13353 %{
13354 predicate(UseAPX);
13355 match(Set dst (OrI src1 src2));
13356 effect(KILL cr);
13357 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13358
13359 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13360 ins_encode %{
13361 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13362 %}
13363 ins_pipe(ialu_reg);
13364 %}
13365
13366 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13367 %{
13368 predicate(UseAPX);
13369 match(Set dst (OrI (LoadI src1) src2));
13370 effect(KILL cr);
13371 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13372
13373 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13374 ins_encode %{
13375 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13376 %}
13377 ins_pipe(ialu_reg);
13378 %}
13379
13380 // Or Register with Memory
13381 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13382 %{
13383 predicate(!UseAPX);
13384 match(Set dst (OrI dst (LoadI src)));
13385 effect(KILL cr);
13386 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13387
13388 ins_cost(150);
13389 format %{ "orl $dst, $src\t# int" %}
13390 ins_encode %{
13391 __ orl($dst$$Register, $src$$Address);
13392 %}
13393 ins_pipe(ialu_reg_mem);
13394 %}
13395
13396 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13397 %{
13398 predicate(UseAPX);
13399 match(Set dst (OrI src1 (LoadI src2)));
13400 effect(KILL cr);
13401 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13402
13403 ins_cost(150);
13404 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13405 ins_encode %{
13406 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13407 %}
13408 ins_pipe(ialu_reg_mem);
13409 %}
13410
13411 // Or Memory with Register
13412 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13413 %{
13414 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13415 effect(KILL cr);
13416 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13417
13418 ins_cost(150);
13419 format %{ "orb $dst, $src\t# byte" %}
13420 ins_encode %{
13421 __ orb($dst$$Address, $src$$Register);
13422 %}
13423 ins_pipe(ialu_mem_reg);
13424 %}
13425
13426 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13427 %{
13428 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13429 effect(KILL cr);
13430 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13431
13432 ins_cost(150);
13433 format %{ "orl $dst, $src\t# int" %}
13434 ins_encode %{
13435 __ orl($dst$$Address, $src$$Register);
13436 %}
13437 ins_pipe(ialu_mem_reg);
13438 %}
13439
13440 // Or Memory with Immediate
13441 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13442 %{
13443 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13444 effect(KILL cr);
13445 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13446
13447 ins_cost(125);
13448 format %{ "orl $dst, $src\t# int" %}
13449 ins_encode %{
13450 __ orl($dst$$Address, $src$$constant);
13451 %}
13452 ins_pipe(ialu_mem_imm);
13453 %}
13454
13455 // Xor Instructions
13456 // Xor Register with Register
13457 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13458 %{
13459 predicate(!UseAPX);
13460 match(Set dst (XorI dst src));
13461 effect(KILL cr);
13462 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13463
13464 format %{ "xorl $dst, $src\t# int" %}
13465 ins_encode %{
13466 __ xorl($dst$$Register, $src$$Register);
13467 %}
13468 ins_pipe(ialu_reg_reg);
13469 %}
13470
13471 // Xor Register with Register using New Data Destination (NDD)
13472 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13473 %{
13474 predicate(UseAPX);
13475 match(Set dst (XorI src1 src2));
13476 effect(KILL cr);
13477 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13478
13479 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13480 ins_encode %{
13481 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13482 %}
13483 ins_pipe(ialu_reg_reg);
13484 %}
13485
13486 // Xor Register with Immediate -1
13487 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13488 %{
13489 predicate(!UseAPX);
13490 match(Set dst (XorI dst imm));
13491
13492 format %{ "notl $dst" %}
13493 ins_encode %{
13494 __ notl($dst$$Register);
13495 %}
13496 ins_pipe(ialu_reg);
13497 %}
13498
13499 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13500 %{
13501 match(Set dst (XorI src imm));
13502 predicate(UseAPX);
13503 flag(PD::Flag_ndd_demotable_opr1);
13504
13505 format %{ "enotl $dst, $src" %}
13506 ins_encode %{
13507 __ enotl($dst$$Register, $src$$Register);
13508 %}
13509 ins_pipe(ialu_reg);
13510 %}
13511
13512 // Xor Register with Immediate
13513 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13514 %{
13515 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13516 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13517 match(Set dst (XorI dst src));
13518 effect(KILL cr);
13519 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13520
13521 format %{ "xorl $dst, $src\t# int" %}
13522 ins_encode %{
13523 __ xorl($dst$$Register, $src$$constant);
13524 %}
13525 ins_pipe(ialu_reg);
13526 %}
13527
13528 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13529 %{
13530 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13531 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13532 match(Set dst (XorI src1 src2));
13533 effect(KILL cr);
13534 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13535
13536 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13537 ins_encode %{
13538 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13539 %}
13540 ins_pipe(ialu_reg);
13541 %}
13542
13543 // Xor Memory with Immediate
13544 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13545 %{
13546 predicate(UseAPX);
13547 match(Set dst (XorI (LoadI src1) src2));
13548 effect(KILL cr);
13549 ins_cost(150);
13550 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13551
13552 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13553 ins_encode %{
13554 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13555 %}
13556 ins_pipe(ialu_reg);
13557 %}
13558
13559 // Xor Register with Memory
13560 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13561 %{
13562 predicate(!UseAPX);
13563 match(Set dst (XorI dst (LoadI src)));
13564 effect(KILL cr);
13565 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13566
13567 ins_cost(150);
13568 format %{ "xorl $dst, $src\t# int" %}
13569 ins_encode %{
13570 __ xorl($dst$$Register, $src$$Address);
13571 %}
13572 ins_pipe(ialu_reg_mem);
13573 %}
13574
13575 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13576 %{
13577 predicate(UseAPX);
13578 match(Set dst (XorI src1 (LoadI src2)));
13579 effect(KILL cr);
13580 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13581
13582 ins_cost(150);
13583 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13584 ins_encode %{
13585 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13586 %}
13587 ins_pipe(ialu_reg_mem);
13588 %}
13589
13590 // Xor Memory with Register
13591 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13592 %{
13593 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13594 effect(KILL cr);
13595 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13596
13597 ins_cost(150);
13598 format %{ "xorb $dst, $src\t# byte" %}
13599 ins_encode %{
13600 __ xorb($dst$$Address, $src$$Register);
13601 %}
13602 ins_pipe(ialu_mem_reg);
13603 %}
13604
13605 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13606 %{
13607 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13608 effect(KILL cr);
13609 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13610
13611 ins_cost(150);
13612 format %{ "xorl $dst, $src\t# int" %}
13613 ins_encode %{
13614 __ xorl($dst$$Address, $src$$Register);
13615 %}
13616 ins_pipe(ialu_mem_reg);
13617 %}
13618
13619 // Xor Memory with Immediate
13620 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13621 %{
13622 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13623 effect(KILL cr);
13624 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13625
13626 ins_cost(125);
13627 format %{ "xorl $dst, $src\t# int" %}
13628 ins_encode %{
13629 __ xorl($dst$$Address, $src$$constant);
13630 %}
13631 ins_pipe(ialu_mem_imm);
13632 %}
13633
13634
13635 // Long Logical Instructions
13636
13637 // And Instructions
13638 // And Register with Register
13639 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13640 %{
13641 predicate(!UseAPX);
13642 match(Set dst (AndL dst src));
13643 effect(KILL cr);
13644 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645
13646 format %{ "andq $dst, $src\t# long" %}
13647 ins_encode %{
13648 __ andq($dst$$Register, $src$$Register);
13649 %}
13650 ins_pipe(ialu_reg_reg);
13651 %}
13652
13653 // And Register with Register using New Data Destination (NDD)
13654 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13655 %{
13656 predicate(UseAPX);
13657 match(Set dst (AndL src1 src2));
13658 effect(KILL cr);
13659 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13660
13661 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13662 ins_encode %{
13663 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13664
13665 %}
13666 ins_pipe(ialu_reg_reg);
13667 %}
13668
13669 // And Register with Immediate 255
13670 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13671 %{
13672 match(Set dst (AndL src mask));
13673
13674 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13675 ins_encode %{
13676 // movzbl zeroes out the upper 32-bit and does not need REX.W
13677 __ movzbl($dst$$Register, $src$$Register);
13678 %}
13679 ins_pipe(ialu_reg);
13680 %}
13681
13682 // And Register with Immediate 65535
13683 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13684 %{
13685 match(Set dst (AndL src mask));
13686
13687 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13688 ins_encode %{
13689 // movzwl zeroes out the upper 32-bit and does not need REX.W
13690 __ movzwl($dst$$Register, $src$$Register);
13691 %}
13692 ins_pipe(ialu_reg);
13693 %}
13694
13695 // And Register with Immediate
13696 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13697 %{
13698 predicate(!UseAPX);
13699 match(Set dst (AndL dst src));
13700 effect(KILL cr);
13701 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13702
13703 format %{ "andq $dst, $src\t# long" %}
13704 ins_encode %{
13705 __ andq($dst$$Register, $src$$constant);
13706 %}
13707 ins_pipe(ialu_reg);
13708 %}
13709
13710 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13711 %{
13712 predicate(UseAPX);
13713 match(Set dst (AndL src1 src2));
13714 effect(KILL cr);
13715 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13716
13717 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13718 ins_encode %{
13719 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13720 %}
13721 ins_pipe(ialu_reg);
13722 %}
13723
13724 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13725 %{
13726 predicate(UseAPX);
13727 match(Set dst (AndL (LoadL src1) src2));
13728 effect(KILL cr);
13729 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13730
13731 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13732 ins_encode %{
13733 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13734 %}
13735 ins_pipe(ialu_reg);
13736 %}
13737
13738 // And Register with Memory
13739 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13740 %{
13741 predicate(!UseAPX);
13742 match(Set dst (AndL dst (LoadL src)));
13743 effect(KILL cr);
13744 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13745
13746 ins_cost(150);
13747 format %{ "andq $dst, $src\t# long" %}
13748 ins_encode %{
13749 __ andq($dst$$Register, $src$$Address);
13750 %}
13751 ins_pipe(ialu_reg_mem);
13752 %}
13753
13754 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13755 %{
13756 predicate(UseAPX);
13757 match(Set dst (AndL src1 (LoadL src2)));
13758 effect(KILL cr);
13759 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13760
13761 ins_cost(150);
13762 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13763 ins_encode %{
13764 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13765 %}
13766 ins_pipe(ialu_reg_mem);
13767 %}
13768
13769 // And Memory with Register
13770 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13771 %{
13772 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13773 effect(KILL cr);
13774 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13775
13776 ins_cost(150);
13777 format %{ "andq $dst, $src\t# long" %}
13778 ins_encode %{
13779 __ andq($dst$$Address, $src$$Register);
13780 %}
13781 ins_pipe(ialu_mem_reg);
13782 %}
13783
13784 // And Memory with Immediate
13785 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13786 %{
13787 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13788 effect(KILL cr);
13789 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13790
13791 ins_cost(125);
13792 format %{ "andq $dst, $src\t# long" %}
13793 ins_encode %{
13794 __ andq($dst$$Address, $src$$constant);
13795 %}
13796 ins_pipe(ialu_mem_imm);
13797 %}
13798
13799 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13800 %{
13801 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13802 // because AND/OR works well enough for 8/32-bit values.
13803 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13804
13805 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13806 effect(KILL cr);
13807
13808 ins_cost(125);
13809 format %{ "btrq $dst, log2(not($con))\t# long" %}
13810 ins_encode %{
13811 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13812 %}
13813 ins_pipe(ialu_mem_imm);
13814 %}
13815
13816 // BMI1 instructions
13817 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13818 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13819 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13820 effect(KILL cr);
13821 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13822
13823 ins_cost(125);
13824 format %{ "andnq $dst, $src1, $src2" %}
13825
13826 ins_encode %{
13827 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13828 %}
13829 ins_pipe(ialu_reg_mem);
13830 %}
13831
13832 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13833 match(Set dst (AndL (XorL src1 minus_1) src2));
13834 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13835 effect(KILL cr);
13836 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13837
13838 format %{ "andnq $dst, $src1, $src2" %}
13839
13840 ins_encode %{
13841 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13842 %}
13843 ins_pipe(ialu_reg_mem);
13844 %}
13845
13846 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13847 match(Set dst (AndL (SubL imm_zero src) src));
13848 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13849 effect(KILL cr);
13850 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13851
13852 format %{ "blsiq $dst, $src" %}
13853
13854 ins_encode %{
13855 __ blsiq($dst$$Register, $src$$Register);
13856 %}
13857 ins_pipe(ialu_reg);
13858 %}
13859
13860 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13861 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13862 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13863 effect(KILL cr);
13864 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13865
13866 ins_cost(125);
13867 format %{ "blsiq $dst, $src" %}
13868
13869 ins_encode %{
13870 __ blsiq($dst$$Register, $src$$Address);
13871 %}
13872 ins_pipe(ialu_reg_mem);
13873 %}
13874
13875 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13876 %{
13877 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13878 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13879 effect(KILL cr);
13880 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13881
13882 ins_cost(125);
13883 format %{ "blsmskq $dst, $src" %}
13884
13885 ins_encode %{
13886 __ blsmskq($dst$$Register, $src$$Address);
13887 %}
13888 ins_pipe(ialu_reg_mem);
13889 %}
13890
13891 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13892 %{
13893 match(Set dst (XorL (AddL src minus_1) src));
13894 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13895 effect(KILL cr);
13896 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13897
13898 format %{ "blsmskq $dst, $src" %}
13899
13900 ins_encode %{
13901 __ blsmskq($dst$$Register, $src$$Register);
13902 %}
13903
13904 ins_pipe(ialu_reg);
13905 %}
13906
13907 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13908 %{
13909 match(Set dst (AndL (AddL src minus_1) src) );
13910 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13911 effect(KILL cr);
13912 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13913
13914 format %{ "blsrq $dst, $src" %}
13915
13916 ins_encode %{
13917 __ blsrq($dst$$Register, $src$$Register);
13918 %}
13919
13920 ins_pipe(ialu_reg);
13921 %}
13922
13923 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13924 %{
13925 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13926 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13927 effect(KILL cr);
13928 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13929
13930 ins_cost(125);
13931 format %{ "blsrq $dst, $src" %}
13932
13933 ins_encode %{
13934 __ blsrq($dst$$Register, $src$$Address);
13935 %}
13936
13937 ins_pipe(ialu_reg);
13938 %}
13939
13940 // Or Instructions
13941 // Or Register with Register
13942 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13943 %{
13944 predicate(!UseAPX);
13945 match(Set dst (OrL dst src));
13946 effect(KILL cr);
13947 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13948
13949 format %{ "orq $dst, $src\t# long" %}
13950 ins_encode %{
13951 __ orq($dst$$Register, $src$$Register);
13952 %}
13953 ins_pipe(ialu_reg_reg);
13954 %}
13955
13956 // Or Register with Register using New Data Destination (NDD)
13957 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13958 %{
13959 predicate(UseAPX);
13960 match(Set dst (OrL src1 src2));
13961 effect(KILL cr);
13962 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13963
13964 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13965 ins_encode %{
13966 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13967
13968 %}
13969 ins_pipe(ialu_reg_reg);
13970 %}
13971
13972 // Use any_RegP to match R15 (TLS register) without spilling.
13973 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13974 match(Set dst (OrL dst (CastP2X src)));
13975 effect(KILL cr);
13976 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13977
13978 format %{ "orq $dst, $src\t# long" %}
13979 ins_encode %{
13980 __ orq($dst$$Register, $src$$Register);
13981 %}
13982 ins_pipe(ialu_reg_reg);
13983 %}
13984
13985 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13986 match(Set dst (OrL src1 (CastP2X src2)));
13987 effect(KILL cr);
13988 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13989
13990 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13991 ins_encode %{
13992 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13993 %}
13994 ins_pipe(ialu_reg_reg);
13995 %}
13996
13997 // Or Register with Immediate
13998 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13999 %{
14000 predicate(!UseAPX);
14001 match(Set dst (OrL dst src));
14002 effect(KILL cr);
14003 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14004
14005 format %{ "orq $dst, $src\t# long" %}
14006 ins_encode %{
14007 __ orq($dst$$Register, $src$$constant);
14008 %}
14009 ins_pipe(ialu_reg);
14010 %}
14011
14012 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14013 %{
14014 predicate(UseAPX);
14015 match(Set dst (OrL src1 src2));
14016 effect(KILL cr);
14017 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14018
14019 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14020 ins_encode %{
14021 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14022 %}
14023 ins_pipe(ialu_reg);
14024 %}
14025
14026 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14027 %{
14028 predicate(UseAPX);
14029 match(Set dst (OrL src1 src2));
14030 effect(KILL cr);
14031 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14032
14033 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14034 ins_encode %{
14035 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14036 %}
14037 ins_pipe(ialu_reg);
14038 %}
14039
14040 // Or Memory with Immediate
14041 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14042 %{
14043 predicate(UseAPX);
14044 match(Set dst (OrL (LoadL src1) src2));
14045 effect(KILL cr);
14046 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14047
14048 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14049 ins_encode %{
14050 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14051 %}
14052 ins_pipe(ialu_reg);
14053 %}
14054
14055 // Or Register with Memory
14056 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14057 %{
14058 predicate(!UseAPX);
14059 match(Set dst (OrL dst (LoadL src)));
14060 effect(KILL cr);
14061 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14062
14063 ins_cost(150);
14064 format %{ "orq $dst, $src\t# long" %}
14065 ins_encode %{
14066 __ orq($dst$$Register, $src$$Address);
14067 %}
14068 ins_pipe(ialu_reg_mem);
14069 %}
14070
14071 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14072 %{
14073 predicate(UseAPX);
14074 match(Set dst (OrL src1 (LoadL src2)));
14075 effect(KILL cr);
14076 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14077
14078 ins_cost(150);
14079 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14080 ins_encode %{
14081 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14082 %}
14083 ins_pipe(ialu_reg_mem);
14084 %}
14085
14086 // Or Memory with Register
14087 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14088 %{
14089 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14090 effect(KILL cr);
14091 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14092
14093 ins_cost(150);
14094 format %{ "orq $dst, $src\t# long" %}
14095 ins_encode %{
14096 __ orq($dst$$Address, $src$$Register);
14097 %}
14098 ins_pipe(ialu_mem_reg);
14099 %}
14100
14101 // Or Memory with Immediate
14102 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14103 %{
14104 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14105 effect(KILL cr);
14106 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14107
14108 ins_cost(125);
14109 format %{ "orq $dst, $src\t# long" %}
14110 ins_encode %{
14111 __ orq($dst$$Address, $src$$constant);
14112 %}
14113 ins_pipe(ialu_mem_imm);
14114 %}
14115
14116 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14117 %{
14118 // con should be a pure 64-bit power of 2 immediate
14119 // because AND/OR works well enough for 8/32-bit values.
14120 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14121
14122 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14123 effect(KILL cr);
14124
14125 ins_cost(125);
14126 format %{ "btsq $dst, log2($con)\t# long" %}
14127 ins_encode %{
14128 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14129 %}
14130 ins_pipe(ialu_mem_imm);
14131 %}
14132
14133 // Xor Instructions
14134 // Xor Register with Register
14135 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14136 %{
14137 predicate(!UseAPX);
14138 match(Set dst (XorL dst src));
14139 effect(KILL cr);
14140 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14141
14142 format %{ "xorq $dst, $src\t# long" %}
14143 ins_encode %{
14144 __ xorq($dst$$Register, $src$$Register);
14145 %}
14146 ins_pipe(ialu_reg_reg);
14147 %}
14148
14149 // Xor Register with Register using New Data Destination (NDD)
14150 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14151 %{
14152 predicate(UseAPX);
14153 match(Set dst (XorL src1 src2));
14154 effect(KILL cr);
14155 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14156
14157 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14158 ins_encode %{
14159 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14160 %}
14161 ins_pipe(ialu_reg_reg);
14162 %}
14163
14164 // Xor Register with Immediate -1
14165 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14166 %{
14167 predicate(!UseAPX);
14168 match(Set dst (XorL dst imm));
14169
14170 format %{ "notq $dst" %}
14171 ins_encode %{
14172 __ notq($dst$$Register);
14173 %}
14174 ins_pipe(ialu_reg);
14175 %}
14176
14177 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14178 %{
14179 predicate(UseAPX);
14180 match(Set dst (XorL src imm));
14181 flag(PD::Flag_ndd_demotable_opr1);
14182
14183 format %{ "enotq $dst, $src" %}
14184 ins_encode %{
14185 __ enotq($dst$$Register, $src$$Register);
14186 %}
14187 ins_pipe(ialu_reg);
14188 %}
14189
14190 // Xor Register with Immediate
14191 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14192 %{
14193 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14194 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14195 match(Set dst (XorL dst src));
14196 effect(KILL cr);
14197 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14198
14199 format %{ "xorq $dst, $src\t# long" %}
14200 ins_encode %{
14201 __ xorq($dst$$Register, $src$$constant);
14202 %}
14203 ins_pipe(ialu_reg);
14204 %}
14205
14206 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14207 %{
14208 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14209 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14210 match(Set dst (XorL src1 src2));
14211 effect(KILL cr);
14212 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14213
14214 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14215 ins_encode %{
14216 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14217 %}
14218 ins_pipe(ialu_reg);
14219 %}
14220
14221 // Xor Memory with Immediate
14222 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14223 %{
14224 predicate(UseAPX);
14225 match(Set dst (XorL (LoadL src1) src2));
14226 effect(KILL cr);
14227 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14228 ins_cost(150);
14229
14230 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14231 ins_encode %{
14232 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14233 %}
14234 ins_pipe(ialu_reg);
14235 %}
14236
14237 // Xor Register with Memory
14238 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14239 %{
14240 predicate(!UseAPX);
14241 match(Set dst (XorL dst (LoadL src)));
14242 effect(KILL cr);
14243 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14244
14245 ins_cost(150);
14246 format %{ "xorq $dst, $src\t# long" %}
14247 ins_encode %{
14248 __ xorq($dst$$Register, $src$$Address);
14249 %}
14250 ins_pipe(ialu_reg_mem);
14251 %}
14252
14253 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14254 %{
14255 predicate(UseAPX);
14256 match(Set dst (XorL src1 (LoadL src2)));
14257 effect(KILL cr);
14258 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14259
14260 ins_cost(150);
14261 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14262 ins_encode %{
14263 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14264 %}
14265 ins_pipe(ialu_reg_mem);
14266 %}
14267
14268 // Xor Memory with Register
14269 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14270 %{
14271 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14272 effect(KILL cr);
14273 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14274
14275 ins_cost(150);
14276 format %{ "xorq $dst, $src\t# long" %}
14277 ins_encode %{
14278 __ xorq($dst$$Address, $src$$Register);
14279 %}
14280 ins_pipe(ialu_mem_reg);
14281 %}
14282
14283 // Xor Memory with Immediate
14284 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14285 %{
14286 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14287 effect(KILL cr);
14288 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14289
14290 ins_cost(125);
14291 format %{ "xorq $dst, $src\t# long" %}
14292 ins_encode %{
14293 __ xorq($dst$$Address, $src$$constant);
14294 %}
14295 ins_pipe(ialu_mem_imm);
14296 %}
14297
14298 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14299 %{
14300 match(Set dst (CmpLTMask p q));
14301 effect(KILL cr);
14302
14303 ins_cost(400);
14304 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14305 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14306 "negl $dst" %}
14307 ins_encode %{
14308 __ cmpl($p$$Register, $q$$Register);
14309 __ setcc(Assembler::less, $dst$$Register);
14310 __ negl($dst$$Register);
14311 %}
14312 ins_pipe(pipe_slow);
14313 %}
14314
14315 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14316 %{
14317 match(Set dst (CmpLTMask dst zero));
14318 effect(KILL cr);
14319
14320 ins_cost(100);
14321 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14322 ins_encode %{
14323 __ sarl($dst$$Register, 31);
14324 %}
14325 ins_pipe(ialu_reg);
14326 %}
14327
14328 /* Better to save a register than avoid a branch */
14329 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14330 %{
14331 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14332 effect(KILL cr);
14333 ins_cost(300);
14334 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14335 "jge done\n\t"
14336 "addl $p,$y\n"
14337 "done: " %}
14338 ins_encode %{
14339 Register Rp = $p$$Register;
14340 Register Rq = $q$$Register;
14341 Register Ry = $y$$Register;
14342 Label done;
14343 __ subl(Rp, Rq);
14344 __ jccb(Assembler::greaterEqual, done);
14345 __ addl(Rp, Ry);
14346 __ bind(done);
14347 %}
14348 ins_pipe(pipe_cmplt);
14349 %}
14350
14351 /* Better to save a register than avoid a branch */
14352 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14353 %{
14354 match(Set y (AndI (CmpLTMask p q) y));
14355 effect(KILL cr);
14356
14357 ins_cost(300);
14358
14359 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14360 "jlt done\n\t"
14361 "xorl $y, $y\n"
14362 "done: " %}
14363 ins_encode %{
14364 Register Rp = $p$$Register;
14365 Register Rq = $q$$Register;
14366 Register Ry = $y$$Register;
14367 Label done;
14368 __ cmpl(Rp, Rq);
14369 __ jccb(Assembler::less, done);
14370 __ xorl(Ry, Ry);
14371 __ bind(done);
14372 %}
14373 ins_pipe(pipe_cmplt);
14374 %}
14375
14376
14377 //---------- FP Instructions------------------------------------------------
14378
14379 // Really expensive, avoid
14380 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14381 %{
14382 match(Set cr (CmpF src1 src2));
14383
14384 ins_cost(500);
14385 format %{ "ucomiss $src1, $src2\n\t"
14386 "jnp,s exit\n\t"
14387 "pushfq\t# saw NaN, set CF\n\t"
14388 "andq [rsp], #0xffffff2b\n\t"
14389 "popfq\n"
14390 "exit:" %}
14391 ins_encode %{
14392 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14393 emit_cmpfp_fixup(masm);
14394 %}
14395 ins_pipe(pipe_slow);
14396 %}
14397
14398 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14399 match(Set cr (CmpF src1 src2));
14400
14401 ins_cost(100);
14402 format %{ "ucomiss $src1, $src2" %}
14403 ins_encode %{
14404 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14405 %}
14406 ins_pipe(pipe_slow);
14407 %}
14408
14409 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14410 match(Set cr (CmpF src1 src2));
14411
14412 ins_cost(100);
14413 format %{ "evucomxss $src1, $src2" %}
14414 ins_encode %{
14415 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14416 %}
14417 ins_pipe(pipe_slow);
14418 %}
14419
14420 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14421 match(Set cr (CmpF src1 (LoadF src2)));
14422
14423 ins_cost(100);
14424 format %{ "ucomiss $src1, $src2" %}
14425 ins_encode %{
14426 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14427 %}
14428 ins_pipe(pipe_slow);
14429 %}
14430
14431 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14432 match(Set cr (CmpF src1 (LoadF src2)));
14433
14434 ins_cost(100);
14435 format %{ "evucomxss $src1, $src2" %}
14436 ins_encode %{
14437 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14438 %}
14439 ins_pipe(pipe_slow);
14440 %}
14441
14442 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14443 match(Set cr (CmpF src con));
14444
14445 ins_cost(100);
14446 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14447 ins_encode %{
14448 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14449 %}
14450 ins_pipe(pipe_slow);
14451 %}
14452
14453 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14454 match(Set cr (CmpF src con));
14455
14456 ins_cost(100);
14457 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14458 ins_encode %{
14459 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14460 %}
14461 ins_pipe(pipe_slow);
14462 %}
14463
14464 // Really expensive, avoid
14465 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14466 %{
14467 match(Set cr (CmpD src1 src2));
14468
14469 ins_cost(500);
14470 format %{ "ucomisd $src1, $src2\n\t"
14471 "jnp,s exit\n\t"
14472 "pushfq\t# saw NaN, set CF\n\t"
14473 "andq [rsp], #0xffffff2b\n\t"
14474 "popfq\n"
14475 "exit:" %}
14476 ins_encode %{
14477 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14478 emit_cmpfp_fixup(masm);
14479 %}
14480 ins_pipe(pipe_slow);
14481 %}
14482
14483 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14484 match(Set cr (CmpD src1 src2));
14485
14486 ins_cost(100);
14487 format %{ "ucomisd $src1, $src2 test" %}
14488 ins_encode %{
14489 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14490 %}
14491 ins_pipe(pipe_slow);
14492 %}
14493
14494 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14495 match(Set cr (CmpD src1 src2));
14496
14497 ins_cost(100);
14498 format %{ "evucomxsd $src1, $src2 test" %}
14499 ins_encode %{
14500 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14501 %}
14502 ins_pipe(pipe_slow);
14503 %}
14504
14505 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14506 match(Set cr (CmpD src1 (LoadD src2)));
14507
14508 ins_cost(100);
14509 format %{ "ucomisd $src1, $src2" %}
14510 ins_encode %{
14511 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14512 %}
14513 ins_pipe(pipe_slow);
14514 %}
14515
14516 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14517 match(Set cr (CmpD src1 (LoadD src2)));
14518
14519 ins_cost(100);
14520 format %{ "evucomxsd $src1, $src2" %}
14521 ins_encode %{
14522 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14523 %}
14524 ins_pipe(pipe_slow);
14525 %}
14526
14527 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14528 match(Set cr (CmpD src con));
14529 ins_cost(100);
14530 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14531 ins_encode %{
14532 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14533 %}
14534 ins_pipe(pipe_slow);
14535 %}
14536
14537 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14538 match(Set cr (CmpD src con));
14539
14540 ins_cost(100);
14541 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14542 ins_encode %{
14543 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14544 %}
14545 ins_pipe(pipe_slow);
14546 %}
14547
14548 // Compare into -1,0,1
14549 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14550 %{
14551 match(Set dst (CmpF3 src1 src2));
14552 effect(KILL cr);
14553
14554 ins_cost(275);
14555 format %{ "ucomiss $src1, $src2\n\t"
14556 "movl $dst, #-1\n\t"
14557 "jp,s done\n\t"
14558 "jb,s done\n\t"
14559 "setne $dst\n\t"
14560 "movzbl $dst, $dst\n"
14561 "done:" %}
14562 ins_encode %{
14563 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14564 emit_cmpfp3(masm, $dst$$Register);
14565 %}
14566 ins_pipe(pipe_slow);
14567 %}
14568
14569 // Compare into -1,0,1
14570 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14571 %{
14572 match(Set dst (CmpF3 src1 (LoadF src2)));
14573 effect(KILL cr);
14574
14575 ins_cost(275);
14576 format %{ "ucomiss $src1, $src2\n\t"
14577 "movl $dst, #-1\n\t"
14578 "jp,s done\n\t"
14579 "jb,s done\n\t"
14580 "setne $dst\n\t"
14581 "movzbl $dst, $dst\n"
14582 "done:" %}
14583 ins_encode %{
14584 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14585 emit_cmpfp3(masm, $dst$$Register);
14586 %}
14587 ins_pipe(pipe_slow);
14588 %}
14589
14590 // Compare into -1,0,1
14591 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14592 match(Set dst (CmpF3 src con));
14593 effect(KILL cr);
14594
14595 ins_cost(275);
14596 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14597 "movl $dst, #-1\n\t"
14598 "jp,s done\n\t"
14599 "jb,s done\n\t"
14600 "setne $dst\n\t"
14601 "movzbl $dst, $dst\n"
14602 "done:" %}
14603 ins_encode %{
14604 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14605 emit_cmpfp3(masm, $dst$$Register);
14606 %}
14607 ins_pipe(pipe_slow);
14608 %}
14609
14610 // Compare into -1,0,1
14611 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14612 %{
14613 match(Set dst (CmpD3 src1 src2));
14614 effect(KILL cr);
14615
14616 ins_cost(275);
14617 format %{ "ucomisd $src1, $src2\n\t"
14618 "movl $dst, #-1\n\t"
14619 "jp,s done\n\t"
14620 "jb,s done\n\t"
14621 "setne $dst\n\t"
14622 "movzbl $dst, $dst\n"
14623 "done:" %}
14624 ins_encode %{
14625 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14626 emit_cmpfp3(masm, $dst$$Register);
14627 %}
14628 ins_pipe(pipe_slow);
14629 %}
14630
14631 // Compare into -1,0,1
14632 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14633 %{
14634 match(Set dst (CmpD3 src1 (LoadD src2)));
14635 effect(KILL cr);
14636
14637 ins_cost(275);
14638 format %{ "ucomisd $src1, $src2\n\t"
14639 "movl $dst, #-1\n\t"
14640 "jp,s done\n\t"
14641 "jb,s done\n\t"
14642 "setne $dst\n\t"
14643 "movzbl $dst, $dst\n"
14644 "done:" %}
14645 ins_encode %{
14646 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14647 emit_cmpfp3(masm, $dst$$Register);
14648 %}
14649 ins_pipe(pipe_slow);
14650 %}
14651
14652 // Compare into -1,0,1
14653 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14654 match(Set dst (CmpD3 src con));
14655 effect(KILL cr);
14656
14657 ins_cost(275);
14658 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14659 "movl $dst, #-1\n\t"
14660 "jp,s done\n\t"
14661 "jb,s done\n\t"
14662 "setne $dst\n\t"
14663 "movzbl $dst, $dst\n"
14664 "done:" %}
14665 ins_encode %{
14666 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14667 emit_cmpfp3(masm, $dst$$Register);
14668 %}
14669 ins_pipe(pipe_slow);
14670 %}
14671
14672 //----------Arithmetic Conversion Instructions---------------------------------
14673
14674 instruct convF2D_reg_reg(regD dst, regF src)
14675 %{
14676 match(Set dst (ConvF2D src));
14677
14678 format %{ "cvtss2sd $dst, $src" %}
14679 ins_encode %{
14680 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14681 %}
14682 ins_pipe(pipe_slow); // XXX
14683 %}
14684
14685 instruct convF2D_reg_mem(regD dst, memory src)
14686 %{
14687 predicate(UseAVX == 0);
14688 match(Set dst (ConvF2D (LoadF src)));
14689
14690 format %{ "cvtss2sd $dst, $src" %}
14691 ins_encode %{
14692 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14693 %}
14694 ins_pipe(pipe_slow); // XXX
14695 %}
14696
14697 instruct convD2F_reg_reg(regF dst, regD src)
14698 %{
14699 match(Set dst (ConvD2F src));
14700
14701 format %{ "cvtsd2ss $dst, $src" %}
14702 ins_encode %{
14703 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14704 %}
14705 ins_pipe(pipe_slow); // XXX
14706 %}
14707
14708 instruct convD2F_reg_mem(regF dst, memory src)
14709 %{
14710 predicate(UseAVX == 0);
14711 match(Set dst (ConvD2F (LoadD src)));
14712
14713 format %{ "cvtsd2ss $dst, $src" %}
14714 ins_encode %{
14715 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14716 %}
14717 ins_pipe(pipe_slow); // XXX
14718 %}
14719
14720 // XXX do mem variants
14721 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14722 %{
14723 predicate(!VM_Version::supports_avx10_2());
14724 match(Set dst (ConvF2I src));
14725 effect(KILL cr);
14726 format %{ "convert_f2i $dst, $src" %}
14727 ins_encode %{
14728 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14729 %}
14730 ins_pipe(pipe_slow);
14731 %}
14732
14733 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14734 %{
14735 predicate(VM_Version::supports_avx10_2());
14736 match(Set dst (ConvF2I src));
14737 format %{ "evcvttss2sisl $dst, $src" %}
14738 ins_encode %{
14739 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14740 %}
14741 ins_pipe(pipe_slow);
14742 %}
14743
14744 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14745 %{
14746 predicate(VM_Version::supports_avx10_2());
14747 match(Set dst (ConvF2I (LoadF src)));
14748 format %{ "evcvttss2sisl $dst, $src" %}
14749 ins_encode %{
14750 __ evcvttss2sisl($dst$$Register, $src$$Address);
14751 %}
14752 ins_pipe(pipe_slow);
14753 %}
14754
14755 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14756 %{
14757 predicate(!VM_Version::supports_avx10_2());
14758 match(Set dst (ConvF2L src));
14759 effect(KILL cr);
14760 format %{ "convert_f2l $dst, $src"%}
14761 ins_encode %{
14762 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14763 %}
14764 ins_pipe(pipe_slow);
14765 %}
14766
14767 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14768 %{
14769 predicate(VM_Version::supports_avx10_2());
14770 match(Set dst (ConvF2L src));
14771 format %{ "evcvttss2sisq $dst, $src" %}
14772 ins_encode %{
14773 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14774 %}
14775 ins_pipe(pipe_slow);
14776 %}
14777
14778 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14779 %{
14780 predicate(VM_Version::supports_avx10_2());
14781 match(Set dst (ConvF2L (LoadF src)));
14782 format %{ "evcvttss2sisq $dst, $src" %}
14783 ins_encode %{
14784 __ evcvttss2sisq($dst$$Register, $src$$Address);
14785 %}
14786 ins_pipe(pipe_slow);
14787 %}
14788
14789 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14790 %{
14791 predicate(!VM_Version::supports_avx10_2());
14792 match(Set dst (ConvD2I src));
14793 effect(KILL cr);
14794 format %{ "convert_d2i $dst, $src"%}
14795 ins_encode %{
14796 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14797 %}
14798 ins_pipe(pipe_slow);
14799 %}
14800
14801 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14802 %{
14803 predicate(VM_Version::supports_avx10_2());
14804 match(Set dst (ConvD2I src));
14805 format %{ "evcvttsd2sisl $dst, $src" %}
14806 ins_encode %{
14807 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14808 %}
14809 ins_pipe(pipe_slow);
14810 %}
14811
14812 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14813 %{
14814 predicate(VM_Version::supports_avx10_2());
14815 match(Set dst (ConvD2I (LoadD src)));
14816 format %{ "evcvttsd2sisl $dst, $src" %}
14817 ins_encode %{
14818 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14819 %}
14820 ins_pipe(pipe_slow);
14821 %}
14822
14823 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14824 %{
14825 predicate(!VM_Version::supports_avx10_2());
14826 match(Set dst (ConvD2L src));
14827 effect(KILL cr);
14828 format %{ "convert_d2l $dst, $src"%}
14829 ins_encode %{
14830 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14831 %}
14832 ins_pipe(pipe_slow);
14833 %}
14834
14835 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14836 %{
14837 predicate(VM_Version::supports_avx10_2());
14838 match(Set dst (ConvD2L src));
14839 format %{ "evcvttsd2sisq $dst, $src" %}
14840 ins_encode %{
14841 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14842 %}
14843 ins_pipe(pipe_slow);
14844 %}
14845
14846 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14847 %{
14848 predicate(VM_Version::supports_avx10_2());
14849 match(Set dst (ConvD2L (LoadD src)));
14850 format %{ "evcvttsd2sisq $dst, $src" %}
14851 ins_encode %{
14852 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14853 %}
14854 ins_pipe(pipe_slow);
14855 %}
14856
14857 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14858 %{
14859 match(Set dst (RoundD src));
14860 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14861 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14862 ins_encode %{
14863 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14864 %}
14865 ins_pipe(pipe_slow);
14866 %}
14867
14868 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14869 %{
14870 match(Set dst (RoundF src));
14871 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14872 format %{ "round_float $dst,$src" %}
14873 ins_encode %{
14874 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14875 %}
14876 ins_pipe(pipe_slow);
14877 %}
14878
14879 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14880 %{
14881 predicate(!UseXmmI2F);
14882 match(Set dst (ConvI2F src));
14883
14884 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14885 ins_encode %{
14886 if (UseAVX > 0) {
14887 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14888 }
14889 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14890 %}
14891 ins_pipe(pipe_slow); // XXX
14892 %}
14893
14894 instruct convI2F_reg_mem(regF dst, memory src)
14895 %{
14896 predicate(UseAVX == 0);
14897 match(Set dst (ConvI2F (LoadI src)));
14898
14899 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14900 ins_encode %{
14901 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14902 %}
14903 ins_pipe(pipe_slow); // XXX
14904 %}
14905
14906 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14907 %{
14908 predicate(!UseXmmI2D);
14909 match(Set dst (ConvI2D src));
14910
14911 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14912 ins_encode %{
14913 if (UseAVX > 0) {
14914 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14915 }
14916 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14917 %}
14918 ins_pipe(pipe_slow); // XXX
14919 %}
14920
14921 instruct convI2D_reg_mem(regD dst, memory src)
14922 %{
14923 predicate(UseAVX == 0);
14924 match(Set dst (ConvI2D (LoadI src)));
14925
14926 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14927 ins_encode %{
14928 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14929 %}
14930 ins_pipe(pipe_slow); // XXX
14931 %}
14932
14933 instruct convXI2F_reg(regF dst, rRegI src)
14934 %{
14935 predicate(UseXmmI2F);
14936 match(Set dst (ConvI2F src));
14937
14938 format %{ "movdl $dst, $src\n\t"
14939 "cvtdq2psl $dst, $dst\t# i2f" %}
14940 ins_encode %{
14941 __ movdl($dst$$XMMRegister, $src$$Register);
14942 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14943 %}
14944 ins_pipe(pipe_slow); // XXX
14945 %}
14946
14947 instruct convXI2D_reg(regD dst, rRegI src)
14948 %{
14949 predicate(UseXmmI2D);
14950 match(Set dst (ConvI2D src));
14951
14952 format %{ "movdl $dst, $src\n\t"
14953 "cvtdq2pdl $dst, $dst\t# i2d" %}
14954 ins_encode %{
14955 __ movdl($dst$$XMMRegister, $src$$Register);
14956 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14957 %}
14958 ins_pipe(pipe_slow); // XXX
14959 %}
14960
14961 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14962 %{
14963 match(Set dst (ConvL2F src));
14964
14965 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14966 ins_encode %{
14967 if (UseAVX > 0) {
14968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14969 }
14970 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14971 %}
14972 ins_pipe(pipe_slow); // XXX
14973 %}
14974
14975 instruct convL2F_reg_mem(regF dst, memory src)
14976 %{
14977 predicate(UseAVX == 0);
14978 match(Set dst (ConvL2F (LoadL src)));
14979
14980 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14981 ins_encode %{
14982 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14983 %}
14984 ins_pipe(pipe_slow); // XXX
14985 %}
14986
14987 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14988 %{
14989 match(Set dst (ConvL2D src));
14990
14991 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14992 ins_encode %{
14993 if (UseAVX > 0) {
14994 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14995 }
14996 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14997 %}
14998 ins_pipe(pipe_slow); // XXX
14999 %}
15000
15001 instruct convL2D_reg_mem(regD dst, memory src)
15002 %{
15003 predicate(UseAVX == 0);
15004 match(Set dst (ConvL2D (LoadL src)));
15005
15006 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15007 ins_encode %{
15008 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15009 %}
15010 ins_pipe(pipe_slow); // XXX
15011 %}
15012
15013 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15014 %{
15015 match(Set dst (ConvI2L src));
15016
15017 ins_cost(125);
15018 format %{ "movslq $dst, $src\t# i2l" %}
15019 ins_encode %{
15020 __ movslq($dst$$Register, $src$$Register);
15021 %}
15022 ins_pipe(ialu_reg_reg);
15023 %}
15024
15025 // Zero-extend convert int to long
15026 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15027 %{
15028 match(Set dst (AndL (ConvI2L src) mask));
15029
15030 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15031 ins_encode %{
15032 if ($dst$$reg != $src$$reg) {
15033 __ movl($dst$$Register, $src$$Register);
15034 }
15035 %}
15036 ins_pipe(ialu_reg_reg);
15037 %}
15038
15039 // Zero-extend convert int to long
15040 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15041 %{
15042 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15043
15044 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15045 ins_encode %{
15046 __ movl($dst$$Register, $src$$Address);
15047 %}
15048 ins_pipe(ialu_reg_mem);
15049 %}
15050
15051 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15052 %{
15053 match(Set dst (AndL src mask));
15054
15055 format %{ "movl $dst, $src\t# zero-extend long" %}
15056 ins_encode %{
15057 __ movl($dst$$Register, $src$$Register);
15058 %}
15059 ins_pipe(ialu_reg_reg);
15060 %}
15061
15062 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15063 %{
15064 match(Set dst (ConvL2I src));
15065
15066 format %{ "movl $dst, $src\t# l2i" %}
15067 ins_encode %{
15068 __ movl($dst$$Register, $src$$Register);
15069 %}
15070 ins_pipe(ialu_reg_reg);
15071 %}
15072
15073
15074 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15075 match(Set dst (MoveF2I src));
15076 effect(DEF dst, USE src);
15077
15078 ins_cost(125);
15079 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15080 ins_encode %{
15081 __ movl($dst$$Register, Address(rsp, $src$$disp));
15082 %}
15083 ins_pipe(ialu_reg_mem);
15084 %}
15085
15086 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15087 match(Set dst (MoveI2F src));
15088 effect(DEF dst, USE src);
15089
15090 ins_cost(125);
15091 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15092 ins_encode %{
15093 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15094 %}
15095 ins_pipe(pipe_slow);
15096 %}
15097
15098 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15099 match(Set dst (MoveD2L src));
15100 effect(DEF dst, USE src);
15101
15102 ins_cost(125);
15103 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15104 ins_encode %{
15105 __ movq($dst$$Register, Address(rsp, $src$$disp));
15106 %}
15107 ins_pipe(ialu_reg_mem);
15108 %}
15109
15110 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15111 predicate(!UseXmmLoadAndClearUpper);
15112 match(Set dst (MoveL2D src));
15113 effect(DEF dst, USE src);
15114
15115 ins_cost(125);
15116 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15117 ins_encode %{
15118 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15119 %}
15120 ins_pipe(pipe_slow);
15121 %}
15122
15123 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15124 predicate(UseXmmLoadAndClearUpper);
15125 match(Set dst (MoveL2D src));
15126 effect(DEF dst, USE src);
15127
15128 ins_cost(125);
15129 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15130 ins_encode %{
15131 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15132 %}
15133 ins_pipe(pipe_slow);
15134 %}
15135
15136
15137 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15138 match(Set dst (MoveF2I src));
15139 effect(DEF dst, USE src);
15140
15141 ins_cost(95); // XXX
15142 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15143 ins_encode %{
15144 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15145 %}
15146 ins_pipe(pipe_slow);
15147 %}
15148
15149 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15150 match(Set dst (MoveI2F src));
15151 effect(DEF dst, USE src);
15152
15153 ins_cost(100);
15154 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15155 ins_encode %{
15156 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15157 %}
15158 ins_pipe( ialu_mem_reg );
15159 %}
15160
15161 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15162 match(Set dst (MoveD2L src));
15163 effect(DEF dst, USE src);
15164
15165 ins_cost(95); // XXX
15166 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15167 ins_encode %{
15168 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15169 %}
15170 ins_pipe(pipe_slow);
15171 %}
15172
15173 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15174 match(Set dst (MoveL2D src));
15175 effect(DEF dst, USE src);
15176
15177 ins_cost(100);
15178 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15179 ins_encode %{
15180 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15181 %}
15182 ins_pipe(ialu_mem_reg);
15183 %}
15184
15185 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15186 match(Set dst (MoveF2I src));
15187 effect(DEF dst, USE src);
15188 ins_cost(85);
15189 format %{ "movd $dst,$src\t# MoveF2I" %}
15190 ins_encode %{
15191 __ movdl($dst$$Register, $src$$XMMRegister);
15192 %}
15193 ins_pipe( pipe_slow );
15194 %}
15195
15196 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15197 match(Set dst (MoveD2L src));
15198 effect(DEF dst, USE src);
15199 ins_cost(85);
15200 format %{ "movd $dst,$src\t# MoveD2L" %}
15201 ins_encode %{
15202 __ movdq($dst$$Register, $src$$XMMRegister);
15203 %}
15204 ins_pipe( pipe_slow );
15205 %}
15206
15207 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15208 match(Set dst (MoveI2F src));
15209 effect(DEF dst, USE src);
15210 ins_cost(100);
15211 format %{ "movd $dst,$src\t# MoveI2F" %}
15212 ins_encode %{
15213 __ movdl($dst$$XMMRegister, $src$$Register);
15214 %}
15215 ins_pipe( pipe_slow );
15216 %}
15217
15218 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15219 match(Set dst (MoveL2D src));
15220 effect(DEF dst, USE src);
15221 ins_cost(100);
15222 format %{ "movd $dst,$src\t# MoveL2D" %}
15223 ins_encode %{
15224 __ movdq($dst$$XMMRegister, $src$$Register);
15225 %}
15226 ins_pipe( pipe_slow );
15227 %}
15228
15229 // Fast clearing of an array
15230 // Small non-constant lenght ClearArray for non-AVX512 targets.
15231 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15232 Universe dummy, rFlagsReg cr)
15233 %{
15234 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15235 match(Set dummy (ClearArray cnt base));
15236 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15237
15238 format %{ $$template
15239 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15240 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15241 $$emit$$"jg LARGE\n\t"
15242 $$emit$$"dec rcx\n\t"
15243 $$emit$$"js DONE\t# Zero length\n\t"
15244 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15245 $$emit$$"dec rcx\n\t"
15246 $$emit$$"jge LOOP\n\t"
15247 $$emit$$"jmp DONE\n\t"
15248 $$emit$$"# LARGE:\n\t"
15249 if (UseFastStosb) {
15250 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15251 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15252 } else if (UseXMMForObjInit) {
15253 $$emit$$"mov rdi,rax\n\t"
15254 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15255 $$emit$$"jmpq L_zero_64_bytes\n\t"
15256 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15258 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15259 $$emit$$"add 0x40,rax\n\t"
15260 $$emit$$"# L_zero_64_bytes:\n\t"
15261 $$emit$$"sub 0x8,rcx\n\t"
15262 $$emit$$"jge L_loop\n\t"
15263 $$emit$$"add 0x4,rcx\n\t"
15264 $$emit$$"jl L_tail\n\t"
15265 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15266 $$emit$$"add 0x20,rax\n\t"
15267 $$emit$$"sub 0x4,rcx\n\t"
15268 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269 $$emit$$"add 0x4,rcx\n\t"
15270 $$emit$$"jle L_end\n\t"
15271 $$emit$$"dec rcx\n\t"
15272 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273 $$emit$$"vmovq xmm0,(rax)\n\t"
15274 $$emit$$"add 0x8,rax\n\t"
15275 $$emit$$"dec rcx\n\t"
15276 $$emit$$"jge L_sloop\n\t"
15277 $$emit$$"# L_end:\n\t"
15278 } else {
15279 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280 }
15281 $$emit$$"# DONE"
15282 %}
15283 ins_encode %{
15284 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15285 $tmp$$XMMRegister, false, knoreg);
15286 %}
15287 ins_pipe(pipe_slow);
15288 %}
15289
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15292 Universe dummy, rFlagsReg cr)
15293 %{
15294 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15295 match(Set dummy (ClearArray cnt base));
15296 ins_cost(125);
15297 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15298
15299 format %{ $$template
15300 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15301 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15302 $$emit$$"jg LARGE\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"js DONE\t# Zero length\n\t"
15305 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15306 $$emit$$"dec rcx\n\t"
15307 $$emit$$"jge LOOP\n\t"
15308 $$emit$$"jmp DONE\n\t"
15309 $$emit$$"# LARGE:\n\t"
15310 if (UseFastStosb) {
15311 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15312 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313 } else if (UseXMMForObjInit) {
15314 $$emit$$"mov rdi,rax\n\t"
15315 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15316 $$emit$$"jmpq L_zero_64_bytes\n\t"
15317 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15318 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15320 $$emit$$"add 0x40,rax\n\t"
15321 $$emit$$"# L_zero_64_bytes:\n\t"
15322 $$emit$$"sub 0x8,rcx\n\t"
15323 $$emit$$"jge L_loop\n\t"
15324 $$emit$$"add 0x4,rcx\n\t"
15325 $$emit$$"jl L_tail\n\t"
15326 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327 $$emit$$"add 0x20,rax\n\t"
15328 $$emit$$"sub 0x4,rcx\n\t"
15329 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330 $$emit$$"add 0x4,rcx\n\t"
15331 $$emit$$"jle L_end\n\t"
15332 $$emit$$"dec rcx\n\t"
15333 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334 $$emit$$"vmovq xmm0,(rax)\n\t"
15335 $$emit$$"add 0x8,rax\n\t"
15336 $$emit$$"dec rcx\n\t"
15337 $$emit$$"jge L_sloop\n\t"
15338 $$emit$$"# L_end:\n\t"
15339 } else {
15340 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341 }
15342 $$emit$$"# DONE"
15343 %}
15344 ins_encode %{
15345 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15346 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15347 %}
15348 ins_pipe(pipe_slow);
15349 %}
15350
15351 // Large non-constant length ClearArray for non-AVX512 targets.
15352 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15353 Universe dummy, rFlagsReg cr)
15354 %{
15355 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15356 match(Set dummy (ClearArray cnt base));
15357 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15358
15359 format %{ $$template
15360 if (UseFastStosb) {
15361 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15362 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15363 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15364 } else if (UseXMMForObjInit) {
15365 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15366 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15367 $$emit$$"jmpq L_zero_64_bytes\n\t"
15368 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15369 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15370 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15371 $$emit$$"add 0x40,rax\n\t"
15372 $$emit$$"# L_zero_64_bytes:\n\t"
15373 $$emit$$"sub 0x8,rcx\n\t"
15374 $$emit$$"jge L_loop\n\t"
15375 $$emit$$"add 0x4,rcx\n\t"
15376 $$emit$$"jl L_tail\n\t"
15377 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15378 $$emit$$"add 0x20,rax\n\t"
15379 $$emit$$"sub 0x4,rcx\n\t"
15380 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15381 $$emit$$"add 0x4,rcx\n\t"
15382 $$emit$$"jle L_end\n\t"
15383 $$emit$$"dec rcx\n\t"
15384 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15385 $$emit$$"vmovq xmm0,(rax)\n\t"
15386 $$emit$$"add 0x8,rax\n\t"
15387 $$emit$$"dec rcx\n\t"
15388 $$emit$$"jge L_sloop\n\t"
15389 $$emit$$"# L_end:\n\t"
15390 } else {
15391 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15392 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15393 }
15394 %}
15395 ins_encode %{
15396 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15397 $tmp$$XMMRegister, true, knoreg);
15398 %}
15399 ins_pipe(pipe_slow);
15400 %}
15401
15402 // Large non-constant length ClearArray for AVX512 targets.
15403 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15404 Universe dummy, rFlagsReg cr)
15405 %{
15406 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15407 match(Set dummy (ClearArray cnt base));
15408 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15409
15410 format %{ $$template
15411 if (UseFastStosb) {
15412 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15413 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15414 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15415 } else if (UseXMMForObjInit) {
15416 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15417 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15418 $$emit$$"jmpq L_zero_64_bytes\n\t"
15419 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15420 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15421 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15422 $$emit$$"add 0x40,rax\n\t"
15423 $$emit$$"# L_zero_64_bytes:\n\t"
15424 $$emit$$"sub 0x8,rcx\n\t"
15425 $$emit$$"jge L_loop\n\t"
15426 $$emit$$"add 0x4,rcx\n\t"
15427 $$emit$$"jl L_tail\n\t"
15428 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15429 $$emit$$"add 0x20,rax\n\t"
15430 $$emit$$"sub 0x4,rcx\n\t"
15431 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15432 $$emit$$"add 0x4,rcx\n\t"
15433 $$emit$$"jle L_end\n\t"
15434 $$emit$$"dec rcx\n\t"
15435 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15436 $$emit$$"vmovq xmm0,(rax)\n\t"
15437 $$emit$$"add 0x8,rax\n\t"
15438 $$emit$$"dec rcx\n\t"
15439 $$emit$$"jge L_sloop\n\t"
15440 $$emit$$"# L_end:\n\t"
15441 } else {
15442 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15443 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15444 }
15445 %}
15446 ins_encode %{
15447 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15448 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15449 %}
15450 ins_pipe(pipe_slow);
15451 %}
15452
15453 // Small constant length ClearArray for AVX512 targets.
15454 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15455 %{
15456 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15457 match(Set dummy (ClearArray cnt base));
15458 ins_cost(100);
15459 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15460 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15461 ins_encode %{
15462 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15463 %}
15464 ins_pipe(pipe_slow);
15465 %}
15466
15467 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15468 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15469 %{
15470 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15471 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15472 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15473
15474 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15475 ins_encode %{
15476 __ string_compare($str1$$Register, $str2$$Register,
15477 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15478 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15479 %}
15480 ins_pipe( pipe_slow );
15481 %}
15482
15483 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15484 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15485 %{
15486 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15487 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15488 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15489
15490 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15491 ins_encode %{
15492 __ string_compare($str1$$Register, $str2$$Register,
15493 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15494 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15495 %}
15496 ins_pipe( pipe_slow );
15497 %}
15498
15499 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15500 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15501 %{
15502 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15503 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15504 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15505
15506 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15507 ins_encode %{
15508 __ string_compare($str1$$Register, $str2$$Register,
15509 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15510 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15511 %}
15512 ins_pipe( pipe_slow );
15513 %}
15514
15515 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15516 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15517 %{
15518 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15519 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15520 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15521
15522 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15523 ins_encode %{
15524 __ string_compare($str1$$Register, $str2$$Register,
15525 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15526 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15527 %}
15528 ins_pipe( pipe_slow );
15529 %}
15530
15531 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15532 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15533 %{
15534 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15535 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15536 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15537
15538 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15539 ins_encode %{
15540 __ string_compare($str1$$Register, $str2$$Register,
15541 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15542 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15543 %}
15544 ins_pipe( pipe_slow );
15545 %}
15546
15547 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15548 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15549 %{
15550 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15551 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15552 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15553
15554 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15555 ins_encode %{
15556 __ string_compare($str1$$Register, $str2$$Register,
15557 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15558 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15559 %}
15560 ins_pipe( pipe_slow );
15561 %}
15562
15563 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15564 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15565 %{
15566 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15567 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15568 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15569
15570 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15571 ins_encode %{
15572 __ string_compare($str2$$Register, $str1$$Register,
15573 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15574 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15575 %}
15576 ins_pipe( pipe_slow );
15577 %}
15578
15579 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15580 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15581 %{
15582 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15583 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15584 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15585
15586 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15587 ins_encode %{
15588 __ string_compare($str2$$Register, $str1$$Register,
15589 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15590 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15591 %}
15592 ins_pipe( pipe_slow );
15593 %}
15594
15595 // fast search of substring with known size.
15596 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15597 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15598 %{
15599 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15600 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15601 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15602
15603 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15604 ins_encode %{
15605 int icnt2 = (int)$int_cnt2$$constant;
15606 if (icnt2 >= 16) {
15607 // IndexOf for constant substrings with size >= 16 elements
15608 // which don't need to be loaded through stack.
15609 __ string_indexofC8($str1$$Register, $str2$$Register,
15610 $cnt1$$Register, $cnt2$$Register,
15611 icnt2, $result$$Register,
15612 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15613 } else {
15614 // Small strings are loaded through stack if they cross page boundary.
15615 __ string_indexof($str1$$Register, $str2$$Register,
15616 $cnt1$$Register, $cnt2$$Register,
15617 icnt2, $result$$Register,
15618 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15619 }
15620 %}
15621 ins_pipe( pipe_slow );
15622 %}
15623
15624 // fast search of substring with known size.
15625 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15626 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15627 %{
15628 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15629 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15630 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15631
15632 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15633 ins_encode %{
15634 int icnt2 = (int)$int_cnt2$$constant;
15635 if (icnt2 >= 8) {
15636 // IndexOf for constant substrings with size >= 8 elements
15637 // which don't need to be loaded through stack.
15638 __ string_indexofC8($str1$$Register, $str2$$Register,
15639 $cnt1$$Register, $cnt2$$Register,
15640 icnt2, $result$$Register,
15641 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15642 } else {
15643 // Small strings are loaded through stack if they cross page boundary.
15644 __ string_indexof($str1$$Register, $str2$$Register,
15645 $cnt1$$Register, $cnt2$$Register,
15646 icnt2, $result$$Register,
15647 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15648 }
15649 %}
15650 ins_pipe( pipe_slow );
15651 %}
15652
15653 // fast search of substring with known size.
15654 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15655 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15656 %{
15657 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15658 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15659 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15660
15661 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15662 ins_encode %{
15663 int icnt2 = (int)$int_cnt2$$constant;
15664 if (icnt2 >= 8) {
15665 // IndexOf for constant substrings with size >= 8 elements
15666 // which don't need to be loaded through stack.
15667 __ string_indexofC8($str1$$Register, $str2$$Register,
15668 $cnt1$$Register, $cnt2$$Register,
15669 icnt2, $result$$Register,
15670 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15671 } else {
15672 // Small strings are loaded through stack if they cross page boundary.
15673 __ string_indexof($str1$$Register, $str2$$Register,
15674 $cnt1$$Register, $cnt2$$Register,
15675 icnt2, $result$$Register,
15676 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15677 }
15678 %}
15679 ins_pipe( pipe_slow );
15680 %}
15681
15682 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15683 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15684 %{
15685 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15686 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15687 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15688
15689 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15690 ins_encode %{
15691 __ string_indexof($str1$$Register, $str2$$Register,
15692 $cnt1$$Register, $cnt2$$Register,
15693 (-1), $result$$Register,
15694 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15695 %}
15696 ins_pipe( pipe_slow );
15697 %}
15698
15699 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15700 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15701 %{
15702 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15703 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15704 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15705
15706 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15707 ins_encode %{
15708 __ string_indexof($str1$$Register, $str2$$Register,
15709 $cnt1$$Register, $cnt2$$Register,
15710 (-1), $result$$Register,
15711 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15712 %}
15713 ins_pipe( pipe_slow );
15714 %}
15715
15716 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15717 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15718 %{
15719 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15720 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15721 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15722
15723 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15724 ins_encode %{
15725 __ string_indexof($str1$$Register, $str2$$Register,
15726 $cnt1$$Register, $cnt2$$Register,
15727 (-1), $result$$Register,
15728 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15729 %}
15730 ins_pipe( pipe_slow );
15731 %}
15732
15733 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15734 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15735 %{
15736 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15737 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15738 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15739 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15740 ins_encode %{
15741 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15742 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15743 %}
15744 ins_pipe( pipe_slow );
15745 %}
15746
15747 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15748 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15749 %{
15750 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15751 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15752 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15753 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15754 ins_encode %{
15755 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15756 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15757 %}
15758 ins_pipe( pipe_slow );
15759 %}
15760
15761 // fast string equals
15762 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15763 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15764 %{
15765 predicate(!VM_Version::supports_avx512vlbw());
15766 match(Set result (StrEquals (Binary str1 str2) cnt));
15767 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15768
15769 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15770 ins_encode %{
15771 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15772 $cnt$$Register, $result$$Register, $tmp3$$Register,
15773 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15774 %}
15775 ins_pipe( pipe_slow );
15776 %}
15777
15778 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15779 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15780 %{
15781 predicate(VM_Version::supports_avx512vlbw());
15782 match(Set result (StrEquals (Binary str1 str2) cnt));
15783 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15784
15785 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15786 ins_encode %{
15787 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15788 $cnt$$Register, $result$$Register, $tmp3$$Register,
15789 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15790 %}
15791 ins_pipe( pipe_slow );
15792 %}
15793
15794 // fast array equals
15795 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15796 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15797 %{
15798 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15799 match(Set result (AryEq ary1 ary2));
15800 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15801
15802 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15803 ins_encode %{
15804 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15805 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15806 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15807 %}
15808 ins_pipe( pipe_slow );
15809 %}
15810
15811 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15812 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15813 %{
15814 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15815 match(Set result (AryEq ary1 ary2));
15816 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15817
15818 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15819 ins_encode %{
15820 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15821 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15822 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15823 %}
15824 ins_pipe( pipe_slow );
15825 %}
15826
15827 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15828 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15829 %{
15830 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15831 match(Set result (AryEq ary1 ary2));
15832 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15833
15834 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15835 ins_encode %{
15836 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15837 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15838 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15839 %}
15840 ins_pipe( pipe_slow );
15841 %}
15842
15843 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15844 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15845 %{
15846 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15847 match(Set result (AryEq ary1 ary2));
15848 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15849
15850 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15851 ins_encode %{
15852 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15853 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15854 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15855 %}
15856 ins_pipe( pipe_slow );
15857 %}
15858
15859 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15860 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15861 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15862 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15863 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15864 %{
15865 predicate(UseAVX >= 2);
15866 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15867 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15868 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15869 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15870 USE basic_type, KILL cr);
15871
15872 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15873 ins_encode %{
15874 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15875 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15876 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15877 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15878 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15879 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15880 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15881 %}
15882 ins_pipe( pipe_slow );
15883 %}
15884
15885 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15886 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15887 %{
15888 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15889 match(Set result (CountPositives ary1 len));
15890 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15891
15892 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15893 ins_encode %{
15894 __ count_positives($ary1$$Register, $len$$Register,
15895 $result$$Register, $tmp3$$Register,
15896 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15897 %}
15898 ins_pipe( pipe_slow );
15899 %}
15900
15901 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15902 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15903 %{
15904 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15905 match(Set result (CountPositives ary1 len));
15906 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15907
15908 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15909 ins_encode %{
15910 __ count_positives($ary1$$Register, $len$$Register,
15911 $result$$Register, $tmp3$$Register,
15912 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15913 %}
15914 ins_pipe( pipe_slow );
15915 %}
15916
15917 // fast char[] to byte[] compression
15918 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15919 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15920 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15921 match(Set result (StrCompressedCopy src (Binary dst len)));
15922 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15923 USE_KILL len, KILL tmp5, KILL cr);
15924
15925 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15926 ins_encode %{
15927 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15928 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15929 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15930 knoreg, knoreg);
15931 %}
15932 ins_pipe( pipe_slow );
15933 %}
15934
15935 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15936 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15937 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15938 match(Set result (StrCompressedCopy src (Binary dst len)));
15939 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15940 USE_KILL len, KILL tmp5, KILL cr);
15941
15942 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15943 ins_encode %{
15944 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15945 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15946 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15947 $ktmp1$$KRegister, $ktmp2$$KRegister);
15948 %}
15949 ins_pipe( pipe_slow );
15950 %}
15951 // fast byte[] to char[] inflation
15952 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15953 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15954 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15955 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15956 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15957
15958 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15959 ins_encode %{
15960 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15961 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15962 %}
15963 ins_pipe( pipe_slow );
15964 %}
15965
15966 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15967 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15968 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15969 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15970 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15971
15972 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15973 ins_encode %{
15974 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15975 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15976 %}
15977 ins_pipe( pipe_slow );
15978 %}
15979
15980 // encode char[] to byte[] in ISO_8859_1
15981 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15982 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15983 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15984 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15985 match(Set result (EncodeISOArray src (Binary dst len)));
15986 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15987
15988 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15989 ins_encode %{
15990 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15991 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15992 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15993 %}
15994 ins_pipe( pipe_slow );
15995 %}
15996
15997 // encode char[] to byte[] in ASCII
15998 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15999 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16000 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16001 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16002 match(Set result (EncodeISOArray src (Binary dst len)));
16003 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16004
16005 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16006 ins_encode %{
16007 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16008 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16009 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16010 %}
16011 ins_pipe( pipe_slow );
16012 %}
16013
16014 //----------Overflow Math Instructions-----------------------------------------
16015
16016 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16017 %{
16018 match(Set cr (OverflowAddI op1 op2));
16019 effect(DEF cr, USE_KILL op1, USE op2);
16020
16021 format %{ "addl $op1, $op2\t# overflow check int" %}
16022
16023 ins_encode %{
16024 __ addl($op1$$Register, $op2$$Register);
16025 %}
16026 ins_pipe(ialu_reg_reg);
16027 %}
16028
16029 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16030 %{
16031 match(Set cr (OverflowAddI op1 op2));
16032 effect(DEF cr, USE_KILL op1, USE op2);
16033
16034 format %{ "addl $op1, $op2\t# overflow check int" %}
16035
16036 ins_encode %{
16037 __ addl($op1$$Register, $op2$$constant);
16038 %}
16039 ins_pipe(ialu_reg_reg);
16040 %}
16041
16042 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16043 %{
16044 match(Set cr (OverflowAddL op1 op2));
16045 effect(DEF cr, USE_KILL op1, USE op2);
16046
16047 format %{ "addq $op1, $op2\t# overflow check long" %}
16048 ins_encode %{
16049 __ addq($op1$$Register, $op2$$Register);
16050 %}
16051 ins_pipe(ialu_reg_reg);
16052 %}
16053
16054 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16055 %{
16056 match(Set cr (OverflowAddL op1 op2));
16057 effect(DEF cr, USE_KILL op1, USE op2);
16058
16059 format %{ "addq $op1, $op2\t# overflow check long" %}
16060 ins_encode %{
16061 __ addq($op1$$Register, $op2$$constant);
16062 %}
16063 ins_pipe(ialu_reg_reg);
16064 %}
16065
16066 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16067 %{
16068 match(Set cr (OverflowSubI op1 op2));
16069
16070 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16071 ins_encode %{
16072 __ cmpl($op1$$Register, $op2$$Register);
16073 %}
16074 ins_pipe(ialu_reg_reg);
16075 %}
16076
16077 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16078 %{
16079 match(Set cr (OverflowSubI op1 op2));
16080
16081 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16082 ins_encode %{
16083 __ cmpl($op1$$Register, $op2$$constant);
16084 %}
16085 ins_pipe(ialu_reg_reg);
16086 %}
16087
16088 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16089 %{
16090 match(Set cr (OverflowSubL op1 op2));
16091
16092 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16093 ins_encode %{
16094 __ cmpq($op1$$Register, $op2$$Register);
16095 %}
16096 ins_pipe(ialu_reg_reg);
16097 %}
16098
16099 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16100 %{
16101 match(Set cr (OverflowSubL op1 op2));
16102
16103 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16104 ins_encode %{
16105 __ cmpq($op1$$Register, $op2$$constant);
16106 %}
16107 ins_pipe(ialu_reg_reg);
16108 %}
16109
16110 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16111 %{
16112 match(Set cr (OverflowSubI zero op2));
16113 effect(DEF cr, USE_KILL op2);
16114
16115 format %{ "negl $op2\t# overflow check int" %}
16116 ins_encode %{
16117 __ negl($op2$$Register);
16118 %}
16119 ins_pipe(ialu_reg_reg);
16120 %}
16121
16122 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16123 %{
16124 match(Set cr (OverflowSubL zero op2));
16125 effect(DEF cr, USE_KILL op2);
16126
16127 format %{ "negq $op2\t# overflow check long" %}
16128 ins_encode %{
16129 __ negq($op2$$Register);
16130 %}
16131 ins_pipe(ialu_reg_reg);
16132 %}
16133
16134 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16135 %{
16136 match(Set cr (OverflowMulI op1 op2));
16137 effect(DEF cr, USE_KILL op1, USE op2);
16138
16139 format %{ "imull $op1, $op2\t# overflow check int" %}
16140 ins_encode %{
16141 __ imull($op1$$Register, $op2$$Register);
16142 %}
16143 ins_pipe(ialu_reg_reg_alu0);
16144 %}
16145
16146 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16147 %{
16148 match(Set cr (OverflowMulI op1 op2));
16149 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16150
16151 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16152 ins_encode %{
16153 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16154 %}
16155 ins_pipe(ialu_reg_reg_alu0);
16156 %}
16157
16158 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16159 %{
16160 match(Set cr (OverflowMulL op1 op2));
16161 effect(DEF cr, USE_KILL op1, USE op2);
16162
16163 format %{ "imulq $op1, $op2\t# overflow check long" %}
16164 ins_encode %{
16165 __ imulq($op1$$Register, $op2$$Register);
16166 %}
16167 ins_pipe(ialu_reg_reg_alu0);
16168 %}
16169
16170 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16171 %{
16172 match(Set cr (OverflowMulL op1 op2));
16173 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16174
16175 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16176 ins_encode %{
16177 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16178 %}
16179 ins_pipe(ialu_reg_reg_alu0);
16180 %}
16181
16182
16183 //----------Control Flow Instructions------------------------------------------
16184 // Signed compare Instructions
16185
16186 // XXX more variants!!
16187 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16188 %{
16189 match(Set cr (CmpI op1 op2));
16190 effect(DEF cr, USE op1, USE op2);
16191
16192 format %{ "cmpl $op1, $op2" %}
16193 ins_encode %{
16194 __ cmpl($op1$$Register, $op2$$Register);
16195 %}
16196 ins_pipe(ialu_cr_reg_reg);
16197 %}
16198
16199 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16200 %{
16201 match(Set cr (CmpI op1 op2));
16202
16203 format %{ "cmpl $op1, $op2" %}
16204 ins_encode %{
16205 __ cmpl($op1$$Register, $op2$$constant);
16206 %}
16207 ins_pipe(ialu_cr_reg_imm);
16208 %}
16209
16210 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16211 %{
16212 match(Set cr (CmpI op1 (LoadI op2)));
16213
16214 ins_cost(500); // XXX
16215 format %{ "cmpl $op1, $op2" %}
16216 ins_encode %{
16217 __ cmpl($op1$$Register, $op2$$Address);
16218 %}
16219 ins_pipe(ialu_cr_reg_mem);
16220 %}
16221
16222 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16223 %{
16224 match(Set cr (CmpI src zero));
16225
16226 format %{ "testl $src, $src" %}
16227 ins_encode %{
16228 __ testl($src$$Register, $src$$Register);
16229 %}
16230 ins_pipe(ialu_cr_reg_imm);
16231 %}
16232
16233 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16234 %{
16235 match(Set cr (CmpI (AndI src con) zero));
16236
16237 format %{ "testl $src, $con" %}
16238 ins_encode %{
16239 __ testl($src$$Register, $con$$constant);
16240 %}
16241 ins_pipe(ialu_cr_reg_imm);
16242 %}
16243
16244 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16245 %{
16246 match(Set cr (CmpI (AndI src1 src2) zero));
16247
16248 format %{ "testl $src1, $src2" %}
16249 ins_encode %{
16250 __ testl($src1$$Register, $src2$$Register);
16251 %}
16252 ins_pipe(ialu_cr_reg_imm);
16253 %}
16254
16255 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16256 %{
16257 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16258
16259 format %{ "testl $src, $mem" %}
16260 ins_encode %{
16261 __ testl($src$$Register, $mem$$Address);
16262 %}
16263 ins_pipe(ialu_cr_reg_mem);
16264 %}
16265
16266 // Unsigned compare Instructions; really, same as signed except they
16267 // produce an rFlagsRegU instead of rFlagsReg.
16268 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16269 %{
16270 match(Set cr (CmpU op1 op2));
16271
16272 format %{ "cmpl $op1, $op2\t# unsigned" %}
16273 ins_encode %{
16274 __ cmpl($op1$$Register, $op2$$Register);
16275 %}
16276 ins_pipe(ialu_cr_reg_reg);
16277 %}
16278
16279 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16280 %{
16281 match(Set cr (CmpU op1 op2));
16282
16283 format %{ "cmpl $op1, $op2\t# unsigned" %}
16284 ins_encode %{
16285 __ cmpl($op1$$Register, $op2$$constant);
16286 %}
16287 ins_pipe(ialu_cr_reg_imm);
16288 %}
16289
16290 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16291 %{
16292 match(Set cr (CmpU op1 (LoadI op2)));
16293
16294 ins_cost(500); // XXX
16295 format %{ "cmpl $op1, $op2\t# unsigned" %}
16296 ins_encode %{
16297 __ cmpl($op1$$Register, $op2$$Address);
16298 %}
16299 ins_pipe(ialu_cr_reg_mem);
16300 %}
16301
16302 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16303 %{
16304 match(Set cr (CmpU src zero));
16305
16306 format %{ "testl $src, $src\t# unsigned" %}
16307 ins_encode %{
16308 __ testl($src$$Register, $src$$Register);
16309 %}
16310 ins_pipe(ialu_cr_reg_imm);
16311 %}
16312
16313 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16314 %{
16315 match(Set cr (CmpP op1 op2));
16316
16317 format %{ "cmpq $op1, $op2\t# ptr" %}
16318 ins_encode %{
16319 __ cmpq($op1$$Register, $op2$$Register);
16320 %}
16321 ins_pipe(ialu_cr_reg_reg);
16322 %}
16323
16324 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16325 %{
16326 match(Set cr (CmpP op1 (LoadP op2)));
16327 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16328
16329 ins_cost(500); // XXX
16330 format %{ "cmpq $op1, $op2\t# ptr" %}
16331 ins_encode %{
16332 __ cmpq($op1$$Register, $op2$$Address);
16333 %}
16334 ins_pipe(ialu_cr_reg_mem);
16335 %}
16336
16337 // XXX this is generalized by compP_rReg_mem???
16338 // Compare raw pointer (used in out-of-heap check).
16339 // Only works because non-oop pointers must be raw pointers
16340 // and raw pointers have no anti-dependencies.
16341 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16342 %{
16343 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16344 n->in(2)->as_Load()->barrier_data() == 0);
16345 match(Set cr (CmpP op1 (LoadP op2)));
16346
16347 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16348 ins_encode %{
16349 __ cmpq($op1$$Register, $op2$$Address);
16350 %}
16351 ins_pipe(ialu_cr_reg_mem);
16352 %}
16353
16354 // This will generate a signed flags result. This should be OK since
16355 // any compare to a zero should be eq/neq.
16356 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16357 %{
16358 match(Set cr (CmpP src zero));
16359
16360 format %{ "testq $src, $src\t# ptr" %}
16361 ins_encode %{
16362 __ testq($src$$Register, $src$$Register);
16363 %}
16364 ins_pipe(ialu_cr_reg_imm);
16365 %}
16366
16367 // This will generate a signed flags result. This should be OK since
16368 // any compare to a zero should be eq/neq.
16369 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16370 %{
16371 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16372 n->in(1)->as_Load()->barrier_data() == 0);
16373 match(Set cr (CmpP (LoadP op) zero));
16374
16375 ins_cost(500); // XXX
16376 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16377 ins_encode %{
16378 __ testq($op$$Address, 0xFFFFFFFF);
16379 %}
16380 ins_pipe(ialu_cr_reg_imm);
16381 %}
16382
16383 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16384 %{
16385 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16386 n->in(1)->as_Load()->barrier_data() == 0);
16387 match(Set cr (CmpP (LoadP mem) zero));
16388
16389 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16390 ins_encode %{
16391 __ cmpq(r12, $mem$$Address);
16392 %}
16393 ins_pipe(ialu_cr_reg_mem);
16394 %}
16395
16396 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16397 %{
16398 match(Set cr (CmpN op1 op2));
16399
16400 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16401 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16402 ins_pipe(ialu_cr_reg_reg);
16403 %}
16404
16405 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16406 %{
16407 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16408 match(Set cr (CmpN src (LoadN mem)));
16409
16410 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16411 ins_encode %{
16412 __ cmpl($src$$Register, $mem$$Address);
16413 %}
16414 ins_pipe(ialu_cr_reg_mem);
16415 %}
16416
16417 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16418 match(Set cr (CmpN op1 op2));
16419
16420 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16421 ins_encode %{
16422 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16423 %}
16424 ins_pipe(ialu_cr_reg_imm);
16425 %}
16426
16427 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16428 %{
16429 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16430 match(Set cr (CmpN src (LoadN mem)));
16431
16432 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16433 ins_encode %{
16434 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16435 %}
16436 ins_pipe(ialu_cr_reg_mem);
16437 %}
16438
16439 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16440 match(Set cr (CmpN op1 op2));
16441
16442 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16443 ins_encode %{
16444 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16445 %}
16446 ins_pipe(ialu_cr_reg_imm);
16447 %}
16448
16449 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16450 %{
16451 predicate(!UseCompactObjectHeaders);
16452 match(Set cr (CmpN src (LoadNKlass mem)));
16453
16454 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16455 ins_encode %{
16456 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16457 %}
16458 ins_pipe(ialu_cr_reg_mem);
16459 %}
16460
16461 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16462 match(Set cr (CmpN src zero));
16463
16464 format %{ "testl $src, $src\t# compressed ptr" %}
16465 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16466 ins_pipe(ialu_cr_reg_imm);
16467 %}
16468
16469 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16470 %{
16471 predicate(CompressedOops::base() != nullptr &&
16472 n->in(1)->as_Load()->barrier_data() == 0);
16473 match(Set cr (CmpN (LoadN mem) zero));
16474
16475 ins_cost(500); // XXX
16476 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16477 ins_encode %{
16478 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16479 %}
16480 ins_pipe(ialu_cr_reg_mem);
16481 %}
16482
16483 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16484 %{
16485 predicate(CompressedOops::base() == nullptr &&
16486 n->in(1)->as_Load()->barrier_data() == 0);
16487 match(Set cr (CmpN (LoadN mem) zero));
16488
16489 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16490 ins_encode %{
16491 __ cmpl(r12, $mem$$Address);
16492 %}
16493 ins_pipe(ialu_cr_reg_mem);
16494 %}
16495
16496 // Yanked all unsigned pointer compare operations.
16497 // Pointer compares are done with CmpP which is already unsigned.
16498
16499 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16500 %{
16501 match(Set cr (CmpL op1 op2));
16502
16503 format %{ "cmpq $op1, $op2" %}
16504 ins_encode %{
16505 __ cmpq($op1$$Register, $op2$$Register);
16506 %}
16507 ins_pipe(ialu_cr_reg_reg);
16508 %}
16509
16510 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16511 %{
16512 match(Set cr (CmpL op1 op2));
16513
16514 format %{ "cmpq $op1, $op2" %}
16515 ins_encode %{
16516 __ cmpq($op1$$Register, $op2$$constant);
16517 %}
16518 ins_pipe(ialu_cr_reg_imm);
16519 %}
16520
16521 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16522 %{
16523 match(Set cr (CmpL op1 (LoadL op2)));
16524
16525 format %{ "cmpq $op1, $op2" %}
16526 ins_encode %{
16527 __ cmpq($op1$$Register, $op2$$Address);
16528 %}
16529 ins_pipe(ialu_cr_reg_mem);
16530 %}
16531
16532 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16533 %{
16534 match(Set cr (CmpL src zero));
16535
16536 format %{ "testq $src, $src" %}
16537 ins_encode %{
16538 __ testq($src$$Register, $src$$Register);
16539 %}
16540 ins_pipe(ialu_cr_reg_imm);
16541 %}
16542
16543 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16544 %{
16545 match(Set cr (CmpL (AndL src con) zero));
16546
16547 format %{ "testq $src, $con\t# long" %}
16548 ins_encode %{
16549 __ testq($src$$Register, $con$$constant);
16550 %}
16551 ins_pipe(ialu_cr_reg_imm);
16552 %}
16553
16554 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16555 %{
16556 match(Set cr (CmpL (AndL src1 src2) zero));
16557
16558 format %{ "testq $src1, $src2\t# long" %}
16559 ins_encode %{
16560 __ testq($src1$$Register, $src2$$Register);
16561 %}
16562 ins_pipe(ialu_cr_reg_imm);
16563 %}
16564
16565 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16566 %{
16567 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16568
16569 format %{ "testq $src, $mem" %}
16570 ins_encode %{
16571 __ testq($src$$Register, $mem$$Address);
16572 %}
16573 ins_pipe(ialu_cr_reg_mem);
16574 %}
16575
16576 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16577 %{
16578 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16579
16580 format %{ "testq $src, $mem" %}
16581 ins_encode %{
16582 __ testq($src$$Register, $mem$$Address);
16583 %}
16584 ins_pipe(ialu_cr_reg_mem);
16585 %}
16586
16587 // Manifest a CmpU result in an integer register. Very painful.
16588 // This is the test to avoid.
16589 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16590 %{
16591 match(Set dst (CmpU3 src1 src2));
16592 effect(KILL flags);
16593
16594 ins_cost(275); // XXX
16595 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16596 "movl $dst, -1\n\t"
16597 "jb,u done\n\t"
16598 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16599 "done:" %}
16600 ins_encode %{
16601 Label done;
16602 __ cmpl($src1$$Register, $src2$$Register);
16603 __ movl($dst$$Register, -1);
16604 __ jccb(Assembler::below, done);
16605 __ setcc(Assembler::notZero, $dst$$Register);
16606 __ bind(done);
16607 %}
16608 ins_pipe(pipe_slow);
16609 %}
16610
16611 // Manifest a CmpL result in an integer register. Very painful.
16612 // This is the test to avoid.
16613 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16614 %{
16615 match(Set dst (CmpL3 src1 src2));
16616 effect(KILL flags);
16617
16618 ins_cost(275); // XXX
16619 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16620 "movl $dst, -1\n\t"
16621 "jl,s done\n\t"
16622 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16623 "done:" %}
16624 ins_encode %{
16625 Label done;
16626 __ cmpq($src1$$Register, $src2$$Register);
16627 __ movl($dst$$Register, -1);
16628 __ jccb(Assembler::less, done);
16629 __ setcc(Assembler::notZero, $dst$$Register);
16630 __ bind(done);
16631 %}
16632 ins_pipe(pipe_slow);
16633 %}
16634
16635 // Manifest a CmpUL result in an integer register. Very painful.
16636 // This is the test to avoid.
16637 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16638 %{
16639 match(Set dst (CmpUL3 src1 src2));
16640 effect(KILL flags);
16641
16642 ins_cost(275); // XXX
16643 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16644 "movl $dst, -1\n\t"
16645 "jb,u done\n\t"
16646 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16647 "done:" %}
16648 ins_encode %{
16649 Label done;
16650 __ cmpq($src1$$Register, $src2$$Register);
16651 __ movl($dst$$Register, -1);
16652 __ jccb(Assembler::below, done);
16653 __ setcc(Assembler::notZero, $dst$$Register);
16654 __ bind(done);
16655 %}
16656 ins_pipe(pipe_slow);
16657 %}
16658
16659 // Unsigned long compare Instructions; really, same as signed long except they
16660 // produce an rFlagsRegU instead of rFlagsReg.
16661 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16662 %{
16663 match(Set cr (CmpUL op1 op2));
16664
16665 format %{ "cmpq $op1, $op2\t# unsigned" %}
16666 ins_encode %{
16667 __ cmpq($op1$$Register, $op2$$Register);
16668 %}
16669 ins_pipe(ialu_cr_reg_reg);
16670 %}
16671
16672 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16673 %{
16674 match(Set cr (CmpUL op1 op2));
16675
16676 format %{ "cmpq $op1, $op2\t# unsigned" %}
16677 ins_encode %{
16678 __ cmpq($op1$$Register, $op2$$constant);
16679 %}
16680 ins_pipe(ialu_cr_reg_imm);
16681 %}
16682
16683 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16684 %{
16685 match(Set cr (CmpUL op1 (LoadL op2)));
16686
16687 format %{ "cmpq $op1, $op2\t# unsigned" %}
16688 ins_encode %{
16689 __ cmpq($op1$$Register, $op2$$Address);
16690 %}
16691 ins_pipe(ialu_cr_reg_mem);
16692 %}
16693
16694 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16695 %{
16696 match(Set cr (CmpUL src zero));
16697
16698 format %{ "testq $src, $src\t# unsigned" %}
16699 ins_encode %{
16700 __ testq($src$$Register, $src$$Register);
16701 %}
16702 ins_pipe(ialu_cr_reg_imm);
16703 %}
16704
16705 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16706 %{
16707 match(Set cr (CmpI (LoadB mem) imm));
16708
16709 ins_cost(125);
16710 format %{ "cmpb $mem, $imm" %}
16711 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16712 ins_pipe(ialu_cr_reg_mem);
16713 %}
16714
16715 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16716 %{
16717 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16718
16719 ins_cost(125);
16720 format %{ "testb $mem, $imm\t# ubyte" %}
16721 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16722 ins_pipe(ialu_cr_reg_mem);
16723 %}
16724
16725 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16726 %{
16727 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16728
16729 ins_cost(125);
16730 format %{ "testb $mem, $imm\t# byte" %}
16731 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16732 ins_pipe(ialu_cr_reg_mem);
16733 %}
16734
16735 //----------Max and Min--------------------------------------------------------
16736 // Min Instructions
16737
16738 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16739 %{
16740 predicate(!UseAPX);
16741 effect(USE_DEF dst, USE src, USE cr);
16742
16743 format %{ "cmovlgt $dst, $src\t# min" %}
16744 ins_encode %{
16745 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16746 %}
16747 ins_pipe(pipe_cmov_reg);
16748 %}
16749
16750 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16751 %{
16752 predicate(UseAPX);
16753 effect(DEF dst, USE src1, USE src2, USE cr);
16754
16755 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16756 ins_encode %{
16757 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16758 %}
16759 ins_pipe(pipe_cmov_reg);
16760 %}
16761
16762 instruct minI_rReg(rRegI dst, rRegI src)
16763 %{
16764 predicate(!UseAPX);
16765 match(Set dst (MinI dst src));
16766
16767 ins_cost(200);
16768 expand %{
16769 rFlagsReg cr;
16770 compI_rReg(cr, dst, src);
16771 cmovI_reg_g(dst, src, cr);
16772 %}
16773 %}
16774
16775 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16776 %{
16777 predicate(UseAPX);
16778 match(Set dst (MinI src1 src2));
16779 effect(DEF dst, USE src1, USE src2);
16780 flag(PD::Flag_ndd_demotable_opr1);
16781
16782 ins_cost(200);
16783 expand %{
16784 rFlagsReg cr;
16785 compI_rReg(cr, src1, src2);
16786 cmovI_reg_g_ndd(dst, src1, src2, cr);
16787 %}
16788 %}
16789
16790 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16791 %{
16792 predicate(!UseAPX);
16793 effect(USE_DEF dst, USE src, USE cr);
16794
16795 format %{ "cmovllt $dst, $src\t# max" %}
16796 ins_encode %{
16797 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16798 %}
16799 ins_pipe(pipe_cmov_reg);
16800 %}
16801
16802 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16803 %{
16804 predicate(UseAPX);
16805 effect(DEF dst, USE src1, USE src2, USE cr);
16806
16807 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16808 ins_encode %{
16809 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16810 %}
16811 ins_pipe(pipe_cmov_reg);
16812 %}
16813
16814 instruct maxI_rReg(rRegI dst, rRegI src)
16815 %{
16816 predicate(!UseAPX);
16817 match(Set dst (MaxI dst src));
16818
16819 ins_cost(200);
16820 expand %{
16821 rFlagsReg cr;
16822 compI_rReg(cr, dst, src);
16823 cmovI_reg_l(dst, src, cr);
16824 %}
16825 %}
16826
16827 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16828 %{
16829 predicate(UseAPX);
16830 match(Set dst (MaxI src1 src2));
16831 effect(DEF dst, USE src1, USE src2);
16832 flag(PD::Flag_ndd_demotable_opr1);
16833
16834 ins_cost(200);
16835 expand %{
16836 rFlagsReg cr;
16837 compI_rReg(cr, src1, src2);
16838 cmovI_reg_l_ndd(dst, src1, src2, cr);
16839 %}
16840 %}
16841
16842 // ============================================================================
16843 // Branch Instructions
16844
16845 // Jump Direct - Label defines a relative address from JMP+1
16846 instruct jmpDir(label labl)
16847 %{
16848 match(Goto);
16849 effect(USE labl);
16850
16851 ins_cost(300);
16852 format %{ "jmp $labl" %}
16853 size(5);
16854 ins_encode %{
16855 Label* L = $labl$$label;
16856 __ jmp(*L, false); // Always long jump
16857 %}
16858 ins_pipe(pipe_jmp);
16859 %}
16860
16861 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16862 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16863 %{
16864 match(If cop cr);
16865 effect(USE labl);
16866
16867 ins_cost(300);
16868 format %{ "j$cop $labl" %}
16869 size(6);
16870 ins_encode %{
16871 Label* L = $labl$$label;
16872 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16873 %}
16874 ins_pipe(pipe_jcc);
16875 %}
16876
16877 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16878 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16879 %{
16880 match(CountedLoopEnd cop cr);
16881 effect(USE labl);
16882
16883 ins_cost(300);
16884 format %{ "j$cop $labl\t# loop end" %}
16885 size(6);
16886 ins_encode %{
16887 Label* L = $labl$$label;
16888 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16889 %}
16890 ins_pipe(pipe_jcc);
16891 %}
16892
16893 // Jump Direct Conditional - using unsigned comparison
16894 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16895 match(If cop cmp);
16896 effect(USE labl);
16897
16898 ins_cost(300);
16899 format %{ "j$cop,u $labl" %}
16900 size(6);
16901 ins_encode %{
16902 Label* L = $labl$$label;
16903 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16904 %}
16905 ins_pipe(pipe_jcc);
16906 %}
16907
16908 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16909 match(If cop cmp);
16910 effect(USE labl);
16911
16912 ins_cost(200);
16913 format %{ "j$cop,u $labl" %}
16914 size(6);
16915 ins_encode %{
16916 Label* L = $labl$$label;
16917 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16918 %}
16919 ins_pipe(pipe_jcc);
16920 %}
16921
16922 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16923 match(If cop cmp);
16924 effect(USE labl);
16925
16926 ins_cost(200);
16927 format %{ $$template
16928 if ($cop$$cmpcode == Assembler::notEqual) {
16929 $$emit$$"jp,u $labl\n\t"
16930 $$emit$$"j$cop,u $labl"
16931 } else {
16932 $$emit$$"jp,u done\n\t"
16933 $$emit$$"j$cop,u $labl\n\t"
16934 $$emit$$"done:"
16935 }
16936 %}
16937 ins_encode %{
16938 Label* l = $labl$$label;
16939 if ($cop$$cmpcode == Assembler::notEqual) {
16940 __ jcc(Assembler::parity, *l, false);
16941 __ jcc(Assembler::notEqual, *l, false);
16942 } else if ($cop$$cmpcode == Assembler::equal) {
16943 Label done;
16944 __ jccb(Assembler::parity, done);
16945 __ jcc(Assembler::equal, *l, false);
16946 __ bind(done);
16947 } else {
16948 ShouldNotReachHere();
16949 }
16950 %}
16951 ins_pipe(pipe_jcc);
16952 %}
16953
16954 // Jump Direct Conditional - using signed and unsigned comparison
16955 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16956 match(If cop cmp);
16957 effect(USE labl);
16958
16959 ins_cost(200);
16960 format %{ "j$cop,su $labl" %}
16961 size(6);
16962 ins_encode %{
16963 Label* L = $labl$$label;
16964 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16965 %}
16966 ins_pipe(pipe_jcc);
16967 %}
16968
16969 // ============================================================================
16970 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16971 // superklass array for an instance of the superklass. Set a hidden
16972 // internal cache on a hit (cache is checked with exposed code in
16973 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16974 // encoding ALSO sets flags.
16975
16976 instruct partialSubtypeCheck(rdi_RegP result,
16977 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16978 rFlagsReg cr)
16979 %{
16980 match(Set result (PartialSubtypeCheck sub super));
16981 predicate(!UseSecondarySupersTable);
16982 effect(KILL rcx, KILL cr);
16983
16984 ins_cost(1100); // slightly larger than the next version
16985 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16986 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16987 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16988 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16989 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16990 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16991 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16992 "miss:\t" %}
16993
16994 ins_encode %{
16995 Label miss;
16996 // NB: Callers may assume that, when $result is a valid register,
16997 // check_klass_subtype_slow_path_linear sets it to a nonzero
16998 // value.
16999 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17000 $rcx$$Register, $result$$Register,
17001 nullptr, &miss,
17002 /*set_cond_codes:*/ true);
17003 __ xorptr($result$$Register, $result$$Register);
17004 __ bind(miss);
17005 %}
17006
17007 ins_pipe(pipe_slow);
17008 %}
17009
17010 // ============================================================================
17011 // Two versions of hashtable-based partialSubtypeCheck, both used when
17012 // we need to search for a super class in the secondary supers array.
17013 // The first is used when we don't know _a priori_ the class being
17014 // searched for. The second, far more common, is used when we do know:
17015 // this is used for instanceof, checkcast, and any case where C2 can
17016 // determine it by constant propagation.
17017
17018 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17019 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17020 rFlagsReg cr)
17021 %{
17022 match(Set result (PartialSubtypeCheck sub super));
17023 predicate(UseSecondarySupersTable);
17024 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17025
17026 ins_cost(1000);
17027 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17028
17029 ins_encode %{
17030 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17031 $temp3$$Register, $temp4$$Register, $result$$Register);
17032 %}
17033
17034 ins_pipe(pipe_slow);
17035 %}
17036
17037 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17038 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17039 rFlagsReg cr)
17040 %{
17041 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17042 predicate(UseSecondarySupersTable);
17043 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17044
17045 ins_cost(700); // smaller than the next version
17046 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17047
17048 ins_encode %{
17049 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17050 if (InlineSecondarySupersTest) {
17051 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17052 $temp3$$Register, $temp4$$Register, $result$$Register,
17053 super_klass_slot);
17054 } else {
17055 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17056 }
17057 %}
17058
17059 ins_pipe(pipe_slow);
17060 %}
17061
17062 // ============================================================================
17063 // Branch Instructions -- short offset versions
17064 //
17065 // These instructions are used to replace jumps of a long offset (the default
17066 // match) with jumps of a shorter offset. These instructions are all tagged
17067 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17068 // match rules in general matching. Instead, the ADLC generates a conversion
17069 // method in the MachNode which can be used to do in-place replacement of the
17070 // long variant with the shorter variant. The compiler will determine if a
17071 // branch can be taken by the is_short_branch_offset() predicate in the machine
17072 // specific code section of the file.
17073
17074 // Jump Direct - Label defines a relative address from JMP+1
17075 instruct jmpDir_short(label labl) %{
17076 match(Goto);
17077 effect(USE labl);
17078
17079 ins_cost(300);
17080 format %{ "jmp,s $labl" %}
17081 size(2);
17082 ins_encode %{
17083 Label* L = $labl$$label;
17084 __ jmpb(*L);
17085 %}
17086 ins_pipe(pipe_jmp);
17087 ins_short_branch(1);
17088 %}
17089
17090 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17091 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17092 match(If cop cr);
17093 effect(USE labl);
17094
17095 ins_cost(300);
17096 format %{ "j$cop,s $labl" %}
17097 size(2);
17098 ins_encode %{
17099 Label* L = $labl$$label;
17100 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17101 %}
17102 ins_pipe(pipe_jcc);
17103 ins_short_branch(1);
17104 %}
17105
17106 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17107 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17108 match(CountedLoopEnd cop cr);
17109 effect(USE labl);
17110
17111 ins_cost(300);
17112 format %{ "j$cop,s $labl\t# loop end" %}
17113 size(2);
17114 ins_encode %{
17115 Label* L = $labl$$label;
17116 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17117 %}
17118 ins_pipe(pipe_jcc);
17119 ins_short_branch(1);
17120 %}
17121
17122 // Jump Direct Conditional - using unsigned comparison
17123 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17124 match(If cop cmp);
17125 effect(USE labl);
17126
17127 ins_cost(300);
17128 format %{ "j$cop,us $labl" %}
17129 size(2);
17130 ins_encode %{
17131 Label* L = $labl$$label;
17132 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17133 %}
17134 ins_pipe(pipe_jcc);
17135 ins_short_branch(1);
17136 %}
17137
17138 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17139 match(If cop cmp);
17140 effect(USE labl);
17141
17142 ins_cost(300);
17143 format %{ "j$cop,us $labl" %}
17144 size(2);
17145 ins_encode %{
17146 Label* L = $labl$$label;
17147 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17148 %}
17149 ins_pipe(pipe_jcc);
17150 ins_short_branch(1);
17151 %}
17152
17153 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17154 match(If cop cmp);
17155 effect(USE labl);
17156
17157 ins_cost(300);
17158 format %{ $$template
17159 if ($cop$$cmpcode == Assembler::notEqual) {
17160 $$emit$$"jp,u,s $labl\n\t"
17161 $$emit$$"j$cop,u,s $labl"
17162 } else {
17163 $$emit$$"jp,u,s done\n\t"
17164 $$emit$$"j$cop,u,s $labl\n\t"
17165 $$emit$$"done:"
17166 }
17167 %}
17168 size(4);
17169 ins_encode %{
17170 Label* l = $labl$$label;
17171 if ($cop$$cmpcode == Assembler::notEqual) {
17172 __ jccb(Assembler::parity, *l);
17173 __ jccb(Assembler::notEqual, *l);
17174 } else if ($cop$$cmpcode == Assembler::equal) {
17175 Label done;
17176 __ jccb(Assembler::parity, done);
17177 __ jccb(Assembler::equal, *l);
17178 __ bind(done);
17179 } else {
17180 ShouldNotReachHere();
17181 }
17182 %}
17183 ins_pipe(pipe_jcc);
17184 ins_short_branch(1);
17185 %}
17186
17187 // Jump Direct Conditional - using signed and unsigned comparison
17188 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17189 match(If cop cmp);
17190 effect(USE labl);
17191
17192 ins_cost(300);
17193 format %{ "j$cop,sus $labl" %}
17194 size(2);
17195 ins_encode %{
17196 Label* L = $labl$$label;
17197 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17198 %}
17199 ins_pipe(pipe_jcc);
17200 ins_short_branch(1);
17201 %}
17202
17203 // ============================================================================
17204 // inlined locking and unlocking
17205
17206 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17207 match(Set cr (FastLock object box));
17208 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17209 ins_cost(300);
17210 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17211 ins_encode %{
17212 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17213 %}
17214 ins_pipe(pipe_slow);
17215 %}
17216
17217 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17218 match(Set cr (FastUnlock object rax_reg));
17219 effect(TEMP tmp, USE_KILL rax_reg);
17220 ins_cost(300);
17221 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17222 ins_encode %{
17223 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17224 %}
17225 ins_pipe(pipe_slow);
17226 %}
17227
17228
17229 // ============================================================================
17230 // Safepoint Instructions
17231 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17232 %{
17233 match(SafePoint poll);
17234 effect(KILL cr, USE poll);
17235
17236 format %{ "testl rax, [$poll]\t"
17237 "# Safepoint: poll for GC" %}
17238 ins_cost(125);
17239 ins_encode %{
17240 __ relocate(relocInfo::poll_type);
17241 address pre_pc = __ pc();
17242 __ testl(rax, Address($poll$$Register, 0));
17243 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17244 %}
17245 ins_pipe(ialu_reg_mem);
17246 %}
17247
17248 instruct mask_all_evexL(kReg dst, rRegL src) %{
17249 match(Set dst (MaskAll src));
17250 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17251 ins_encode %{
17252 int mask_len = Matcher::vector_length(this);
17253 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17254 %}
17255 ins_pipe( pipe_slow );
17256 %}
17257
17258 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17259 predicate(Matcher::vector_length(n) > 32);
17260 match(Set dst (MaskAll src));
17261 effect(TEMP tmp);
17262 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17263 ins_encode %{
17264 int mask_len = Matcher::vector_length(this);
17265 __ movslq($tmp$$Register, $src$$Register);
17266 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17267 %}
17268 ins_pipe( pipe_slow );
17269 %}
17270
17271 // ============================================================================
17272 // Procedure Call/Return Instructions
17273 // Call Java Static Instruction
17274 // Note: If this code changes, the corresponding ret_addr_offset() and
17275 // compute_padding() functions will have to be adjusted.
17276 instruct CallStaticJavaDirect(method meth) %{
17277 match(CallStaticJava);
17278 effect(USE meth);
17279
17280 ins_cost(300);
17281 format %{ "call,static " %}
17282 opcode(0xE8); /* E8 cd */
17283 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17284 ins_pipe(pipe_slow);
17285 ins_alignment(4);
17286 %}
17287
17288 // Call Java Dynamic Instruction
17289 // Note: If this code changes, the corresponding ret_addr_offset() and
17290 // compute_padding() functions will have to be adjusted.
17291 instruct CallDynamicJavaDirect(method meth)
17292 %{
17293 match(CallDynamicJava);
17294 effect(USE meth);
17295
17296 ins_cost(300);
17297 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17298 "call,dynamic " %}
17299 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17300 ins_pipe(pipe_slow);
17301 ins_alignment(4);
17302 %}
17303
17304 // Call Runtime Instruction
17305 instruct CallRuntimeDirect(method meth)
17306 %{
17307 match(CallRuntime);
17308 effect(USE meth);
17309
17310 ins_cost(300);
17311 format %{ "call,runtime " %}
17312 ins_encode(clear_avx, Java_To_Runtime(meth));
17313 ins_pipe(pipe_slow);
17314 %}
17315
17316 // Call runtime without safepoint
17317 instruct CallLeafDirect(method meth)
17318 %{
17319 match(CallLeaf);
17320 effect(USE meth);
17321
17322 ins_cost(300);
17323 format %{ "call_leaf,runtime " %}
17324 ins_encode(clear_avx, Java_To_Runtime(meth));
17325 ins_pipe(pipe_slow);
17326 %}
17327
17328 // Call runtime without safepoint and with vector arguments
17329 instruct CallLeafDirectVector(method meth)
17330 %{
17331 match(CallLeafVector);
17332 effect(USE meth);
17333
17334 ins_cost(300);
17335 format %{ "call_leaf,vector " %}
17336 ins_encode(Java_To_Runtime(meth));
17337 ins_pipe(pipe_slow);
17338 %}
17339
17340 // Call runtime without safepoint
17341 instruct CallLeafNoFPDirect(method meth)
17342 %{
17343 match(CallLeafNoFP);
17344 effect(USE meth);
17345
17346 ins_cost(300);
17347 format %{ "call_leaf_nofp,runtime " %}
17348 ins_encode(clear_avx, Java_To_Runtime(meth));
17349 ins_pipe(pipe_slow);
17350 %}
17351
17352 // Return Instruction
17353 // Remove the return address & jump to it.
17354 // Notice: We always emit a nop after a ret to make sure there is room
17355 // for safepoint patching
17356 instruct Ret()
17357 %{
17358 match(Return);
17359
17360 format %{ "ret" %}
17361 ins_encode %{
17362 __ ret(0);
17363 %}
17364 ins_pipe(pipe_jmp);
17365 %}
17366
17367 // Tail Call; Jump from runtime stub to Java code.
17368 // Also known as an 'interprocedural jump'.
17369 // Target of jump will eventually return to caller.
17370 // TailJump below removes the return address.
17371 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17372 // emitted just above the TailCall which has reset rbp to the caller state.
17373 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17374 %{
17375 match(TailCall jump_target method_ptr);
17376
17377 ins_cost(300);
17378 format %{ "jmp $jump_target\t# rbx holds method" %}
17379 ins_encode %{
17380 __ jmp($jump_target$$Register);
17381 %}
17382 ins_pipe(pipe_jmp);
17383 %}
17384
17385 // Tail Jump; remove the return address; jump to target.
17386 // TailCall above leaves the return address around.
17387 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17388 %{
17389 match(TailJump jump_target ex_oop);
17390
17391 ins_cost(300);
17392 format %{ "popq rdx\t# pop return address\n\t"
17393 "jmp $jump_target" %}
17394 ins_encode %{
17395 __ popq(as_Register(RDX_enc));
17396 __ jmp($jump_target$$Register);
17397 %}
17398 ins_pipe(pipe_jmp);
17399 %}
17400
17401 // Forward exception.
17402 instruct ForwardExceptionjmp()
17403 %{
17404 match(ForwardException);
17405
17406 format %{ "jmp forward_exception_stub" %}
17407 ins_encode %{
17408 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17409 %}
17410 ins_pipe(pipe_jmp);
17411 %}
17412
17413 // Create exception oop: created by stack-crawling runtime code.
17414 // Created exception is now available to this handler, and is setup
17415 // just prior to jumping to this handler. No code emitted.
17416 instruct CreateException(rax_RegP ex_oop)
17417 %{
17418 match(Set ex_oop (CreateEx));
17419
17420 size(0);
17421 // use the following format syntax
17422 format %{ "# exception oop is in rax; no code emitted" %}
17423 ins_encode();
17424 ins_pipe(empty);
17425 %}
17426
17427 // Rethrow exception:
17428 // The exception oop will come in the first argument position.
17429 // Then JUMP (not call) to the rethrow stub code.
17430 instruct RethrowException()
17431 %{
17432 match(Rethrow);
17433
17434 // use the following format syntax
17435 format %{ "jmp rethrow_stub" %}
17436 ins_encode %{
17437 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17438 %}
17439 ins_pipe(pipe_jmp);
17440 %}
17441
17442 // ============================================================================
17443 // This name is KNOWN by the ADLC and cannot be changed.
17444 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17445 // for this guy.
17446 instruct tlsLoadP(r15_RegP dst) %{
17447 match(Set dst (ThreadLocal));
17448 effect(DEF dst);
17449
17450 size(0);
17451 format %{ "# TLS is in R15" %}
17452 ins_encode( /*empty encoding*/ );
17453 ins_pipe(ialu_reg_reg);
17454 %}
17455
17456 instruct addF_reg(regF dst, regF src) %{
17457 predicate(UseAVX == 0);
17458 match(Set dst (AddF dst src));
17459
17460 format %{ "addss $dst, $src" %}
17461 ins_cost(150);
17462 ins_encode %{
17463 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17464 %}
17465 ins_pipe(pipe_slow);
17466 %}
17467
17468 instruct addF_mem(regF dst, memory src) %{
17469 predicate(UseAVX == 0);
17470 match(Set dst (AddF dst (LoadF src)));
17471
17472 format %{ "addss $dst, $src" %}
17473 ins_cost(150);
17474 ins_encode %{
17475 __ addss($dst$$XMMRegister, $src$$Address);
17476 %}
17477 ins_pipe(pipe_slow);
17478 %}
17479
17480 instruct addF_imm(regF dst, immF con) %{
17481 predicate(UseAVX == 0);
17482 match(Set dst (AddF dst con));
17483 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17484 ins_cost(150);
17485 ins_encode %{
17486 __ addss($dst$$XMMRegister, $constantaddress($con));
17487 %}
17488 ins_pipe(pipe_slow);
17489 %}
17490
17491 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17492 predicate(UseAVX > 0);
17493 match(Set dst (AddF src1 src2));
17494
17495 format %{ "vaddss $dst, $src1, $src2" %}
17496 ins_cost(150);
17497 ins_encode %{
17498 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17499 %}
17500 ins_pipe(pipe_slow);
17501 %}
17502
17503 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17504 predicate(UseAVX > 0);
17505 match(Set dst (AddF src1 (LoadF src2)));
17506
17507 format %{ "vaddss $dst, $src1, $src2" %}
17508 ins_cost(150);
17509 ins_encode %{
17510 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17511 %}
17512 ins_pipe(pipe_slow);
17513 %}
17514
17515 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17516 predicate(UseAVX > 0);
17517 match(Set dst (AddF src con));
17518
17519 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17520 ins_cost(150);
17521 ins_encode %{
17522 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17523 %}
17524 ins_pipe(pipe_slow);
17525 %}
17526
17527 instruct addD_reg(regD dst, regD src) %{
17528 predicate(UseAVX == 0);
17529 match(Set dst (AddD dst src));
17530
17531 format %{ "addsd $dst, $src" %}
17532 ins_cost(150);
17533 ins_encode %{
17534 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17535 %}
17536 ins_pipe(pipe_slow);
17537 %}
17538
17539 instruct addD_mem(regD dst, memory src) %{
17540 predicate(UseAVX == 0);
17541 match(Set dst (AddD dst (LoadD src)));
17542
17543 format %{ "addsd $dst, $src" %}
17544 ins_cost(150);
17545 ins_encode %{
17546 __ addsd($dst$$XMMRegister, $src$$Address);
17547 %}
17548 ins_pipe(pipe_slow);
17549 %}
17550
17551 instruct addD_imm(regD dst, immD con) %{
17552 predicate(UseAVX == 0);
17553 match(Set dst (AddD dst con));
17554 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17555 ins_cost(150);
17556 ins_encode %{
17557 __ addsd($dst$$XMMRegister, $constantaddress($con));
17558 %}
17559 ins_pipe(pipe_slow);
17560 %}
17561
17562 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17563 predicate(UseAVX > 0);
17564 match(Set dst (AddD src1 src2));
17565
17566 format %{ "vaddsd $dst, $src1, $src2" %}
17567 ins_cost(150);
17568 ins_encode %{
17569 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17570 %}
17571 ins_pipe(pipe_slow);
17572 %}
17573
17574 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17575 predicate(UseAVX > 0);
17576 match(Set dst (AddD src1 (LoadD src2)));
17577
17578 format %{ "vaddsd $dst, $src1, $src2" %}
17579 ins_cost(150);
17580 ins_encode %{
17581 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17582 %}
17583 ins_pipe(pipe_slow);
17584 %}
17585
17586 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17587 predicate(UseAVX > 0);
17588 match(Set dst (AddD src con));
17589
17590 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17591 ins_cost(150);
17592 ins_encode %{
17593 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17594 %}
17595 ins_pipe(pipe_slow);
17596 %}
17597
17598 instruct subF_reg(regF dst, regF src) %{
17599 predicate(UseAVX == 0);
17600 match(Set dst (SubF dst src));
17601
17602 format %{ "subss $dst, $src" %}
17603 ins_cost(150);
17604 ins_encode %{
17605 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17606 %}
17607 ins_pipe(pipe_slow);
17608 %}
17609
17610 instruct subF_mem(regF dst, memory src) %{
17611 predicate(UseAVX == 0);
17612 match(Set dst (SubF dst (LoadF src)));
17613
17614 format %{ "subss $dst, $src" %}
17615 ins_cost(150);
17616 ins_encode %{
17617 __ subss($dst$$XMMRegister, $src$$Address);
17618 %}
17619 ins_pipe(pipe_slow);
17620 %}
17621
17622 instruct subF_imm(regF dst, immF con) %{
17623 predicate(UseAVX == 0);
17624 match(Set dst (SubF dst con));
17625 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17626 ins_cost(150);
17627 ins_encode %{
17628 __ subss($dst$$XMMRegister, $constantaddress($con));
17629 %}
17630 ins_pipe(pipe_slow);
17631 %}
17632
17633 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17634 predicate(UseAVX > 0);
17635 match(Set dst (SubF src1 src2));
17636
17637 format %{ "vsubss $dst, $src1, $src2" %}
17638 ins_cost(150);
17639 ins_encode %{
17640 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17641 %}
17642 ins_pipe(pipe_slow);
17643 %}
17644
17645 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17646 predicate(UseAVX > 0);
17647 match(Set dst (SubF src1 (LoadF src2)));
17648
17649 format %{ "vsubss $dst, $src1, $src2" %}
17650 ins_cost(150);
17651 ins_encode %{
17652 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17653 %}
17654 ins_pipe(pipe_slow);
17655 %}
17656
17657 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17658 predicate(UseAVX > 0);
17659 match(Set dst (SubF src con));
17660
17661 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17662 ins_cost(150);
17663 ins_encode %{
17664 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17665 %}
17666 ins_pipe(pipe_slow);
17667 %}
17668
17669 instruct subD_reg(regD dst, regD src) %{
17670 predicate(UseAVX == 0);
17671 match(Set dst (SubD dst src));
17672
17673 format %{ "subsd $dst, $src" %}
17674 ins_cost(150);
17675 ins_encode %{
17676 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17677 %}
17678 ins_pipe(pipe_slow);
17679 %}
17680
17681 instruct subD_mem(regD dst, memory src) %{
17682 predicate(UseAVX == 0);
17683 match(Set dst (SubD dst (LoadD src)));
17684
17685 format %{ "subsd $dst, $src" %}
17686 ins_cost(150);
17687 ins_encode %{
17688 __ subsd($dst$$XMMRegister, $src$$Address);
17689 %}
17690 ins_pipe(pipe_slow);
17691 %}
17692
17693 instruct subD_imm(regD dst, immD con) %{
17694 predicate(UseAVX == 0);
17695 match(Set dst (SubD dst con));
17696 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17697 ins_cost(150);
17698 ins_encode %{
17699 __ subsd($dst$$XMMRegister, $constantaddress($con));
17700 %}
17701 ins_pipe(pipe_slow);
17702 %}
17703
17704 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17705 predicate(UseAVX > 0);
17706 match(Set dst (SubD src1 src2));
17707
17708 format %{ "vsubsd $dst, $src1, $src2" %}
17709 ins_cost(150);
17710 ins_encode %{
17711 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17712 %}
17713 ins_pipe(pipe_slow);
17714 %}
17715
17716 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17717 predicate(UseAVX > 0);
17718 match(Set dst (SubD src1 (LoadD src2)));
17719
17720 format %{ "vsubsd $dst, $src1, $src2" %}
17721 ins_cost(150);
17722 ins_encode %{
17723 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17724 %}
17725 ins_pipe(pipe_slow);
17726 %}
17727
17728 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17729 predicate(UseAVX > 0);
17730 match(Set dst (SubD src con));
17731
17732 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17733 ins_cost(150);
17734 ins_encode %{
17735 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17736 %}
17737 ins_pipe(pipe_slow);
17738 %}
17739
17740 instruct mulF_reg(regF dst, regF src) %{
17741 predicate(UseAVX == 0);
17742 match(Set dst (MulF dst src));
17743
17744 format %{ "mulss $dst, $src" %}
17745 ins_cost(150);
17746 ins_encode %{
17747 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17748 %}
17749 ins_pipe(pipe_slow);
17750 %}
17751
17752 instruct mulF_mem(regF dst, memory src) %{
17753 predicate(UseAVX == 0);
17754 match(Set dst (MulF dst (LoadF src)));
17755
17756 format %{ "mulss $dst, $src" %}
17757 ins_cost(150);
17758 ins_encode %{
17759 __ mulss($dst$$XMMRegister, $src$$Address);
17760 %}
17761 ins_pipe(pipe_slow);
17762 %}
17763
17764 instruct mulF_imm(regF dst, immF con) %{
17765 predicate(UseAVX == 0);
17766 match(Set dst (MulF dst con));
17767 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17768 ins_cost(150);
17769 ins_encode %{
17770 __ mulss($dst$$XMMRegister, $constantaddress($con));
17771 %}
17772 ins_pipe(pipe_slow);
17773 %}
17774
17775 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17776 predicate(UseAVX > 0);
17777 match(Set dst (MulF src1 src2));
17778
17779 format %{ "vmulss $dst, $src1, $src2" %}
17780 ins_cost(150);
17781 ins_encode %{
17782 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17783 %}
17784 ins_pipe(pipe_slow);
17785 %}
17786
17787 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17788 predicate(UseAVX > 0);
17789 match(Set dst (MulF src1 (LoadF src2)));
17790
17791 format %{ "vmulss $dst, $src1, $src2" %}
17792 ins_cost(150);
17793 ins_encode %{
17794 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17795 %}
17796 ins_pipe(pipe_slow);
17797 %}
17798
17799 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17800 predicate(UseAVX > 0);
17801 match(Set dst (MulF src con));
17802
17803 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17804 ins_cost(150);
17805 ins_encode %{
17806 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17807 %}
17808 ins_pipe(pipe_slow);
17809 %}
17810
17811 instruct mulD_reg(regD dst, regD src) %{
17812 predicate(UseAVX == 0);
17813 match(Set dst (MulD dst src));
17814
17815 format %{ "mulsd $dst, $src" %}
17816 ins_cost(150);
17817 ins_encode %{
17818 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17819 %}
17820 ins_pipe(pipe_slow);
17821 %}
17822
17823 instruct mulD_mem(regD dst, memory src) %{
17824 predicate(UseAVX == 0);
17825 match(Set dst (MulD dst (LoadD src)));
17826
17827 format %{ "mulsd $dst, $src" %}
17828 ins_cost(150);
17829 ins_encode %{
17830 __ mulsd($dst$$XMMRegister, $src$$Address);
17831 %}
17832 ins_pipe(pipe_slow);
17833 %}
17834
17835 instruct mulD_imm(regD dst, immD con) %{
17836 predicate(UseAVX == 0);
17837 match(Set dst (MulD dst con));
17838 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17839 ins_cost(150);
17840 ins_encode %{
17841 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17842 %}
17843 ins_pipe(pipe_slow);
17844 %}
17845
17846 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17847 predicate(UseAVX > 0);
17848 match(Set dst (MulD src1 src2));
17849
17850 format %{ "vmulsd $dst, $src1, $src2" %}
17851 ins_cost(150);
17852 ins_encode %{
17853 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17854 %}
17855 ins_pipe(pipe_slow);
17856 %}
17857
17858 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17859 predicate(UseAVX > 0);
17860 match(Set dst (MulD src1 (LoadD src2)));
17861
17862 format %{ "vmulsd $dst, $src1, $src2" %}
17863 ins_cost(150);
17864 ins_encode %{
17865 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17866 %}
17867 ins_pipe(pipe_slow);
17868 %}
17869
17870 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17871 predicate(UseAVX > 0);
17872 match(Set dst (MulD src con));
17873
17874 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17875 ins_cost(150);
17876 ins_encode %{
17877 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17878 %}
17879 ins_pipe(pipe_slow);
17880 %}
17881
17882 instruct divF_reg(regF dst, regF src) %{
17883 predicate(UseAVX == 0);
17884 match(Set dst (DivF dst src));
17885
17886 format %{ "divss $dst, $src" %}
17887 ins_cost(150);
17888 ins_encode %{
17889 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17890 %}
17891 ins_pipe(pipe_slow);
17892 %}
17893
17894 instruct divF_mem(regF dst, memory src) %{
17895 predicate(UseAVX == 0);
17896 match(Set dst (DivF dst (LoadF src)));
17897
17898 format %{ "divss $dst, $src" %}
17899 ins_cost(150);
17900 ins_encode %{
17901 __ divss($dst$$XMMRegister, $src$$Address);
17902 %}
17903 ins_pipe(pipe_slow);
17904 %}
17905
17906 instruct divF_imm(regF dst, immF con) %{
17907 predicate(UseAVX == 0);
17908 match(Set dst (DivF dst con));
17909 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17910 ins_cost(150);
17911 ins_encode %{
17912 __ divss($dst$$XMMRegister, $constantaddress($con));
17913 %}
17914 ins_pipe(pipe_slow);
17915 %}
17916
17917 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17918 predicate(UseAVX > 0);
17919 match(Set dst (DivF src1 src2));
17920
17921 format %{ "vdivss $dst, $src1, $src2" %}
17922 ins_cost(150);
17923 ins_encode %{
17924 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17925 %}
17926 ins_pipe(pipe_slow);
17927 %}
17928
17929 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17930 predicate(UseAVX > 0);
17931 match(Set dst (DivF src1 (LoadF src2)));
17932
17933 format %{ "vdivss $dst, $src1, $src2" %}
17934 ins_cost(150);
17935 ins_encode %{
17936 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17937 %}
17938 ins_pipe(pipe_slow);
17939 %}
17940
17941 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17942 predicate(UseAVX > 0);
17943 match(Set dst (DivF src con));
17944
17945 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17946 ins_cost(150);
17947 ins_encode %{
17948 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17949 %}
17950 ins_pipe(pipe_slow);
17951 %}
17952
17953 instruct divD_reg(regD dst, regD src) %{
17954 predicate(UseAVX == 0);
17955 match(Set dst (DivD dst src));
17956
17957 format %{ "divsd $dst, $src" %}
17958 ins_cost(150);
17959 ins_encode %{
17960 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17961 %}
17962 ins_pipe(pipe_slow);
17963 %}
17964
17965 instruct divD_mem(regD dst, memory src) %{
17966 predicate(UseAVX == 0);
17967 match(Set dst (DivD dst (LoadD src)));
17968
17969 format %{ "divsd $dst, $src" %}
17970 ins_cost(150);
17971 ins_encode %{
17972 __ divsd($dst$$XMMRegister, $src$$Address);
17973 %}
17974 ins_pipe(pipe_slow);
17975 %}
17976
17977 instruct divD_imm(regD dst, immD con) %{
17978 predicate(UseAVX == 0);
17979 match(Set dst (DivD dst con));
17980 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17981 ins_cost(150);
17982 ins_encode %{
17983 __ divsd($dst$$XMMRegister, $constantaddress($con));
17984 %}
17985 ins_pipe(pipe_slow);
17986 %}
17987
17988 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17989 predicate(UseAVX > 0);
17990 match(Set dst (DivD src1 src2));
17991
17992 format %{ "vdivsd $dst, $src1, $src2" %}
17993 ins_cost(150);
17994 ins_encode %{
17995 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17996 %}
17997 ins_pipe(pipe_slow);
17998 %}
17999
18000 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18001 predicate(UseAVX > 0);
18002 match(Set dst (DivD src1 (LoadD src2)));
18003
18004 format %{ "vdivsd $dst, $src1, $src2" %}
18005 ins_cost(150);
18006 ins_encode %{
18007 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18008 %}
18009 ins_pipe(pipe_slow);
18010 %}
18011
18012 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18013 predicate(UseAVX > 0);
18014 match(Set dst (DivD src con));
18015
18016 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18017 ins_cost(150);
18018 ins_encode %{
18019 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18020 %}
18021 ins_pipe(pipe_slow);
18022 %}
18023
18024 instruct absF_reg(regF dst) %{
18025 predicate(UseAVX == 0);
18026 match(Set dst (AbsF dst));
18027 ins_cost(150);
18028 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18029 ins_encode %{
18030 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18031 %}
18032 ins_pipe(pipe_slow);
18033 %}
18034
18035 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18036 predicate(UseAVX > 0);
18037 match(Set dst (AbsF src));
18038 ins_cost(150);
18039 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18040 ins_encode %{
18041 int vlen_enc = Assembler::AVX_128bit;
18042 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18043 ExternalAddress(float_signmask()), vlen_enc);
18044 %}
18045 ins_pipe(pipe_slow);
18046 %}
18047
18048 instruct absD_reg(regD dst) %{
18049 predicate(UseAVX == 0);
18050 match(Set dst (AbsD dst));
18051 ins_cost(150);
18052 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18053 "# abs double by sign masking" %}
18054 ins_encode %{
18055 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18056 %}
18057 ins_pipe(pipe_slow);
18058 %}
18059
18060 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18061 predicate(UseAVX > 0);
18062 match(Set dst (AbsD src));
18063 ins_cost(150);
18064 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18065 "# abs double by sign masking" %}
18066 ins_encode %{
18067 int vlen_enc = Assembler::AVX_128bit;
18068 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18069 ExternalAddress(double_signmask()), vlen_enc);
18070 %}
18071 ins_pipe(pipe_slow);
18072 %}
18073
18074 instruct negF_reg(regF dst) %{
18075 predicate(UseAVX == 0);
18076 match(Set dst (NegF dst));
18077 ins_cost(150);
18078 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18079 ins_encode %{
18080 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18081 %}
18082 ins_pipe(pipe_slow);
18083 %}
18084
18085 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18086 predicate(UseAVX > 0);
18087 match(Set dst (NegF src));
18088 ins_cost(150);
18089 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18090 ins_encode %{
18091 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18092 ExternalAddress(float_signflip()));
18093 %}
18094 ins_pipe(pipe_slow);
18095 %}
18096
18097 instruct negD_reg(regD dst) %{
18098 predicate(UseAVX == 0);
18099 match(Set dst (NegD dst));
18100 ins_cost(150);
18101 format %{ "xorpd $dst, [0x8000000000000000]\t"
18102 "# neg double by sign flipping" %}
18103 ins_encode %{
18104 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18105 %}
18106 ins_pipe(pipe_slow);
18107 %}
18108
18109 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18110 predicate(UseAVX > 0);
18111 match(Set dst (NegD src));
18112 ins_cost(150);
18113 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18114 "# neg double by sign flipping" %}
18115 ins_encode %{
18116 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18117 ExternalAddress(double_signflip()));
18118 %}
18119 ins_pipe(pipe_slow);
18120 %}
18121
18122 // sqrtss instruction needs destination register to be pre initialized for best performance
18123 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18124 instruct sqrtF_reg(regF dst) %{
18125 match(Set dst (SqrtF dst));
18126 format %{ "sqrtss $dst, $dst" %}
18127 ins_encode %{
18128 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18129 %}
18130 ins_pipe(pipe_slow);
18131 %}
18132
18133 // sqrtsd instruction needs destination register to be pre initialized for best performance
18134 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18135 instruct sqrtD_reg(regD dst) %{
18136 match(Set dst (SqrtD dst));
18137 format %{ "sqrtsd $dst, $dst" %}
18138 ins_encode %{
18139 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18140 %}
18141 ins_pipe(pipe_slow);
18142 %}
18143
18144 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18145 effect(TEMP tmp);
18146 match(Set dst (ConvF2HF src));
18147 ins_cost(125);
18148 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18149 ins_encode %{
18150 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18151 %}
18152 ins_pipe( pipe_slow );
18153 %}
18154
18155 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18156 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18157 effect(TEMP ktmp, TEMP rtmp);
18158 match(Set mem (StoreC mem (ConvF2HF src)));
18159 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18160 ins_encode %{
18161 __ movl($rtmp$$Register, 0x1);
18162 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18163 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18164 %}
18165 ins_pipe( pipe_slow );
18166 %}
18167
18168 instruct vconvF2HF(vec dst, vec src) %{
18169 match(Set dst (VectorCastF2HF src));
18170 format %{ "vector_conv_F2HF $dst $src" %}
18171 ins_encode %{
18172 int vlen_enc = vector_length_encoding(this, $src);
18173 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18174 %}
18175 ins_pipe( pipe_slow );
18176 %}
18177
18178 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18179 predicate(n->as_StoreVector()->memory_size() >= 16);
18180 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18181 format %{ "vcvtps2ph $mem,$src" %}
18182 ins_encode %{
18183 int vlen_enc = vector_length_encoding(this, $src);
18184 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18185 %}
18186 ins_pipe( pipe_slow );
18187 %}
18188
18189 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18190 match(Set dst (ConvHF2F src));
18191 format %{ "vcvtph2ps $dst,$src" %}
18192 ins_encode %{
18193 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18194 %}
18195 ins_pipe( pipe_slow );
18196 %}
18197
18198 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18199 match(Set dst (VectorCastHF2F (LoadVector mem)));
18200 format %{ "vcvtph2ps $dst,$mem" %}
18201 ins_encode %{
18202 int vlen_enc = vector_length_encoding(this);
18203 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18204 %}
18205 ins_pipe( pipe_slow );
18206 %}
18207
18208 instruct vconvHF2F(vec dst, vec src) %{
18209 match(Set dst (VectorCastHF2F src));
18210 ins_cost(125);
18211 format %{ "vector_conv_HF2F $dst,$src" %}
18212 ins_encode %{
18213 int vlen_enc = vector_length_encoding(this);
18214 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18215 %}
18216 ins_pipe( pipe_slow );
18217 %}
18218
18219 // ---------------------------------------- VectorReinterpret ------------------------------------
18220 instruct reinterpret_mask(kReg dst) %{
18221 predicate(n->bottom_type()->isa_pvectmask() &&
18222 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18223 match(Set dst (VectorReinterpret dst));
18224 ins_cost(125);
18225 format %{ "vector_reinterpret $dst\t!" %}
18226 ins_encode %{
18227 // empty
18228 %}
18229 ins_pipe( pipe_slow );
18230 %}
18231
18232 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18233 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18234 n->bottom_type()->isa_pvectmask() &&
18235 n->in(1)->bottom_type()->isa_pvectmask() &&
18236 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18237 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18238 match(Set dst (VectorReinterpret src));
18239 effect(TEMP xtmp);
18240 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18241 ins_encode %{
18242 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18243 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18244 assert(src_sz == dst_sz , "src and dst size mismatch");
18245 int vlen_enc = vector_length_encoding(src_sz);
18246 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18247 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18248 %}
18249 ins_pipe( pipe_slow );
18250 %}
18251
18252 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18253 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18254 n->bottom_type()->isa_pvectmask() &&
18255 n->in(1)->bottom_type()->isa_pvectmask() &&
18256 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18257 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18258 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18259 match(Set dst (VectorReinterpret src));
18260 effect(TEMP xtmp);
18261 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18262 ins_encode %{
18263 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18264 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18265 assert(src_sz == dst_sz , "src and dst size mismatch");
18266 int vlen_enc = vector_length_encoding(src_sz);
18267 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18268 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18269 %}
18270 ins_pipe( pipe_slow );
18271 %}
18272
18273 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18274 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18275 n->bottom_type()->isa_pvectmask() &&
18276 n->in(1)->bottom_type()->isa_pvectmask() &&
18277 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18278 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18279 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18280 match(Set dst (VectorReinterpret src));
18281 effect(TEMP xtmp);
18282 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18283 ins_encode %{
18284 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18285 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18286 assert(src_sz == dst_sz , "src and dst size mismatch");
18287 int vlen_enc = vector_length_encoding(src_sz);
18288 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18289 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18290 %}
18291 ins_pipe( pipe_slow );
18292 %}
18293
18294 instruct reinterpret(vec dst) %{
18295 predicate(!n->bottom_type()->isa_pvectmask() &&
18296 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18297 match(Set dst (VectorReinterpret dst));
18298 ins_cost(125);
18299 format %{ "vector_reinterpret $dst\t!" %}
18300 ins_encode %{
18301 // empty
18302 %}
18303 ins_pipe( pipe_slow );
18304 %}
18305
18306 instruct reinterpret_expand(vec dst, vec src) %{
18307 predicate(UseAVX == 0 &&
18308 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18309 match(Set dst (VectorReinterpret src));
18310 ins_cost(125);
18311 effect(TEMP dst);
18312 format %{ "vector_reinterpret_expand $dst,$src" %}
18313 ins_encode %{
18314 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18315 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18316
18317 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18318 if (src_vlen_in_bytes == 4) {
18319 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18320 } else {
18321 assert(src_vlen_in_bytes == 8, "");
18322 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18323 }
18324 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18325 %}
18326 ins_pipe( pipe_slow );
18327 %}
18328
18329 instruct vreinterpret_expand4(legVec dst, vec src) %{
18330 predicate(UseAVX > 0 &&
18331 !n->bottom_type()->isa_pvectmask() &&
18332 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18333 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18334 match(Set dst (VectorReinterpret src));
18335 ins_cost(125);
18336 format %{ "vector_reinterpret_expand $dst,$src" %}
18337 ins_encode %{
18338 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18339 %}
18340 ins_pipe( pipe_slow );
18341 %}
18342
18343
18344 instruct vreinterpret_expand(legVec dst, vec src) %{
18345 predicate(UseAVX > 0 &&
18346 !n->bottom_type()->isa_pvectmask() &&
18347 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18348 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18349 match(Set dst (VectorReinterpret src));
18350 ins_cost(125);
18351 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18352 ins_encode %{
18353 switch (Matcher::vector_length_in_bytes(this, $src)) {
18354 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18355 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18356 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18357 default: ShouldNotReachHere();
18358 }
18359 %}
18360 ins_pipe( pipe_slow );
18361 %}
18362
18363 instruct reinterpret_shrink(vec dst, legVec src) %{
18364 predicate(!n->bottom_type()->isa_pvectmask() &&
18365 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18366 match(Set dst (VectorReinterpret src));
18367 ins_cost(125);
18368 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18369 ins_encode %{
18370 switch (Matcher::vector_length_in_bytes(this)) {
18371 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18372 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18373 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18374 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18375 default: ShouldNotReachHere();
18376 }
18377 %}
18378 ins_pipe( pipe_slow );
18379 %}
18380
18381 // ----------------------------------------------------------------------------------------------------
18382
18383 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18384 match(Set dst (RoundDoubleMode src rmode));
18385 format %{ "roundsd $dst,$src" %}
18386 ins_cost(150);
18387 ins_encode %{
18388 assert(UseSSE >= 4, "required");
18389 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18390 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18391 }
18392 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18393 %}
18394 ins_pipe(pipe_slow);
18395 %}
18396
18397 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18398 match(Set dst (RoundDoubleMode con rmode));
18399 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18400 ins_cost(150);
18401 ins_encode %{
18402 assert(UseSSE >= 4, "required");
18403 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18404 %}
18405 ins_pipe(pipe_slow);
18406 %}
18407
18408 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18409 predicate(Matcher::vector_length(n) < 8);
18410 match(Set dst (RoundDoubleModeV src rmode));
18411 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18412 ins_encode %{
18413 assert(UseAVX > 0, "required");
18414 int vlen_enc = vector_length_encoding(this);
18415 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18416 %}
18417 ins_pipe( pipe_slow );
18418 %}
18419
18420 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18421 predicate(Matcher::vector_length(n) == 8);
18422 match(Set dst (RoundDoubleModeV src rmode));
18423 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18424 ins_encode %{
18425 assert(UseAVX > 2, "required");
18426 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18427 %}
18428 ins_pipe( pipe_slow );
18429 %}
18430
18431 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18432 predicate(Matcher::vector_length(n) < 8);
18433 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18434 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18435 ins_encode %{
18436 assert(UseAVX > 0, "required");
18437 int vlen_enc = vector_length_encoding(this);
18438 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18439 %}
18440 ins_pipe( pipe_slow );
18441 %}
18442
18443 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18444 predicate(Matcher::vector_length(n) == 8);
18445 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18446 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18447 ins_encode %{
18448 assert(UseAVX > 2, "required");
18449 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18450 %}
18451 ins_pipe( pipe_slow );
18452 %}
18453
18454 instruct onspinwait() %{
18455 match(OnSpinWait);
18456 ins_cost(200);
18457
18458 format %{
18459 $$template
18460 $$emit$$"pause\t! membar_onspinwait"
18461 %}
18462 ins_encode %{
18463 __ pause();
18464 %}
18465 ins_pipe(pipe_slow);
18466 %}
18467
18468 // a * b + c
18469 instruct fmaD_reg(regD a, regD b, regD c) %{
18470 match(Set c (FmaD c (Binary a b)));
18471 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18472 ins_cost(150);
18473 ins_encode %{
18474 assert(UseFMA, "Needs FMA instructions support.");
18475 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18476 %}
18477 ins_pipe( pipe_slow );
18478 %}
18479
18480 // a * b + c
18481 instruct fmaF_reg(regF a, regF b, regF c) %{
18482 match(Set c (FmaF c (Binary a b)));
18483 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18484 ins_cost(150);
18485 ins_encode %{
18486 assert(UseFMA, "Needs FMA instructions support.");
18487 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18488 %}
18489 ins_pipe( pipe_slow );
18490 %}
18491
18492 // ====================VECTOR INSTRUCTIONS=====================================
18493
18494 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18495 instruct MoveVec2Leg(legVec dst, vec src) %{
18496 match(Set dst src);
18497 format %{ "" %}
18498 ins_encode %{
18499 ShouldNotReachHere();
18500 %}
18501 ins_pipe( fpu_reg_reg );
18502 %}
18503
18504 instruct MoveLeg2Vec(vec dst, legVec src) %{
18505 match(Set dst src);
18506 format %{ "" %}
18507 ins_encode %{
18508 ShouldNotReachHere();
18509 %}
18510 ins_pipe( fpu_reg_reg );
18511 %}
18512
18513 // ============================================================================
18514
18515 // Load vectors generic operand pattern
18516 instruct loadV(vec dst, memory mem) %{
18517 match(Set dst (LoadVector mem));
18518 ins_cost(125);
18519 format %{ "load_vector $dst,$mem" %}
18520 ins_encode %{
18521 BasicType bt = Matcher::vector_element_basic_type(this);
18522 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18523 %}
18524 ins_pipe( pipe_slow );
18525 %}
18526
18527 // Store vectors generic operand pattern.
18528 instruct storeV(memory mem, vec src) %{
18529 match(Set mem (StoreVector mem src));
18530 ins_cost(145);
18531 format %{ "store_vector $mem,$src\n\t" %}
18532 ins_encode %{
18533 switch (Matcher::vector_length_in_bytes(this, $src)) {
18534 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18535 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18536 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18537 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18538 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18539 default: ShouldNotReachHere();
18540 }
18541 %}
18542 ins_pipe( pipe_slow );
18543 %}
18544
18545 // ---------------------------------------- Gather ------------------------------------
18546
18547 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18548
18549 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18550 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18551 Matcher::vector_length_in_bytes(n) <= 32);
18552 match(Set dst (LoadVectorGather mem idx));
18553 effect(TEMP dst, TEMP tmp, TEMP mask);
18554 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18555 ins_encode %{
18556 int vlen_enc = vector_length_encoding(this);
18557 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18558 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18559 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18560 __ lea($tmp$$Register, $mem$$Address);
18561 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18562 %}
18563 ins_pipe( pipe_slow );
18564 %}
18565
18566
18567 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18568 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18569 !is_subword_type(Matcher::vector_element_basic_type(n)));
18570 match(Set dst (LoadVectorGather mem idx));
18571 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18572 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18573 ins_encode %{
18574 int vlen_enc = vector_length_encoding(this);
18575 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18576 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18577 __ lea($tmp$$Register, $mem$$Address);
18578 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18579 %}
18580 ins_pipe( pipe_slow );
18581 %}
18582
18583 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18584 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18585 !is_subword_type(Matcher::vector_element_basic_type(n)));
18586 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18587 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18588 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18589 ins_encode %{
18590 assert(UseAVX > 2, "sanity");
18591 int vlen_enc = vector_length_encoding(this);
18592 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18593 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18594 // Note: Since gather instruction partially updates the opmask register used
18595 // for predication hense moving mask operand to a temporary.
18596 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18597 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18598 __ lea($tmp$$Register, $mem$$Address);
18599 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18600 %}
18601 ins_pipe( pipe_slow );
18602 %}
18603
18604 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18605 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18606 match(Set dst (LoadVectorGather mem idx_base));
18607 effect(TEMP tmp, TEMP rtmp);
18608 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18609 ins_encode %{
18610 int vlen_enc = vector_length_encoding(this);
18611 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18612 __ lea($tmp$$Register, $mem$$Address);
18613 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18614 %}
18615 ins_pipe( pipe_slow );
18616 %}
18617
18618 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18619 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18620 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18621 match(Set dst (LoadVectorGather mem idx_base));
18622 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18623 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18624 ins_encode %{
18625 int vlen_enc = vector_length_encoding(this);
18626 int vector_len = Matcher::vector_length(this);
18627 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18628 __ lea($tmp$$Register, $mem$$Address);
18629 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18630 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18631 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18632 %}
18633 ins_pipe( pipe_slow );
18634 %}
18635
18636 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18637 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18638 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18639 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18640 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18641 ins_encode %{
18642 int vlen_enc = vector_length_encoding(this);
18643 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18644 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18645 __ lea($tmp$$Register, $mem$$Address);
18646 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18647 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18648 %}
18649 ins_pipe( pipe_slow );
18650 %}
18651
18652 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18653 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18654 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18655 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18656 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18657 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18658 ins_encode %{
18659 int vlen_enc = vector_length_encoding(this);
18660 int vector_len = Matcher::vector_length(this);
18661 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18662 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18663 __ lea($tmp$$Register, $mem$$Address);
18664 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18665 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18666 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18667 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18668 %}
18669 ins_pipe( pipe_slow );
18670 %}
18671
18672 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18673 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18674 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18675 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18676 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18677 ins_encode %{
18678 int vlen_enc = vector_length_encoding(this);
18679 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18680 __ lea($tmp$$Register, $mem$$Address);
18681 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18682 if (elem_bt == T_SHORT) {
18683 __ movl($mask_idx$$Register, 0x55555555);
18684 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18685 }
18686 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18687 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18688 %}
18689 ins_pipe( pipe_slow );
18690 %}
18691
18692 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18693 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18694 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18695 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18696 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18697 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18698 ins_encode %{
18699 int vlen_enc = vector_length_encoding(this);
18700 int vector_len = Matcher::vector_length(this);
18701 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18702 __ lea($tmp$$Register, $mem$$Address);
18703 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18704 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18705 if (elem_bt == T_SHORT) {
18706 __ movl($mask_idx$$Register, 0x55555555);
18707 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18708 }
18709 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18710 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18711 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18712 %}
18713 ins_pipe( pipe_slow );
18714 %}
18715
18716 // ====================Scatter=======================================
18717
18718 // Scatter INT, LONG, FLOAT, DOUBLE
18719
18720 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18721 predicate(UseAVX > 2);
18722 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18723 effect(TEMP tmp, TEMP ktmp);
18724 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18725 ins_encode %{
18726 int vlen_enc = vector_length_encoding(this, $src);
18727 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18728
18729 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18730 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18731
18732 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18733 __ lea($tmp$$Register, $mem$$Address);
18734 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18735 %}
18736 ins_pipe( pipe_slow );
18737 %}
18738
18739 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18740 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18741 effect(TEMP tmp, TEMP ktmp);
18742 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18743 ins_encode %{
18744 int vlen_enc = vector_length_encoding(this, $src);
18745 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18746 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18747 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18748 // Note: Since scatter instruction partially updates the opmask register used
18749 // for predication hense moving mask operand to a temporary.
18750 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18751 __ lea($tmp$$Register, $mem$$Address);
18752 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18753 %}
18754 ins_pipe( pipe_slow );
18755 %}
18756
18757 // ====================REPLICATE=======================================
18758
18759 // Replicate byte scalar to be vector
18760 instruct vReplB_reg(vec dst, rRegI src) %{
18761 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18762 match(Set dst (Replicate src));
18763 format %{ "replicateB $dst,$src" %}
18764 ins_encode %{
18765 uint vlen = Matcher::vector_length(this);
18766 if (UseAVX >= 2) {
18767 int vlen_enc = vector_length_encoding(this);
18768 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18769 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18770 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18771 } else {
18772 __ movdl($dst$$XMMRegister, $src$$Register);
18773 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18774 }
18775 } else {
18776 assert(UseAVX < 2, "");
18777 __ movdl($dst$$XMMRegister, $src$$Register);
18778 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18779 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18780 if (vlen >= 16) {
18781 assert(vlen == 16, "");
18782 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18783 }
18784 }
18785 %}
18786 ins_pipe( pipe_slow );
18787 %}
18788
18789 instruct ReplB_mem(vec dst, memory mem) %{
18790 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18791 match(Set dst (Replicate (LoadB mem)));
18792 format %{ "replicateB $dst,$mem" %}
18793 ins_encode %{
18794 int vlen_enc = vector_length_encoding(this);
18795 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18796 %}
18797 ins_pipe( pipe_slow );
18798 %}
18799
18800 // ====================ReplicateS=======================================
18801
18802 instruct vReplS_reg(vec dst, rRegI src) %{
18803 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18804 match(Set dst (Replicate src));
18805 format %{ "replicateS $dst,$src" %}
18806 ins_encode %{
18807 uint vlen = Matcher::vector_length(this);
18808 int vlen_enc = vector_length_encoding(this);
18809 if (UseAVX >= 2) {
18810 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18811 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18812 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18813 } else {
18814 __ movdl($dst$$XMMRegister, $src$$Register);
18815 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18816 }
18817 } else {
18818 assert(UseAVX < 2, "");
18819 __ movdl($dst$$XMMRegister, $src$$Register);
18820 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18821 if (vlen >= 8) {
18822 assert(vlen == 8, "");
18823 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18824 }
18825 }
18826 %}
18827 ins_pipe( pipe_slow );
18828 %}
18829
18830 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18831 match(Set dst (Replicate con));
18832 effect(TEMP rtmp);
18833 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18834 ins_encode %{
18835 int vlen_enc = vector_length_encoding(this);
18836 BasicType bt = Matcher::vector_element_basic_type(this);
18837 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18838 __ movl($rtmp$$Register, $con$$constant);
18839 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18840 %}
18841 ins_pipe( pipe_slow );
18842 %}
18843
18844 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18845 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18846 match(Set dst (Replicate src));
18847 effect(TEMP rtmp);
18848 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18849 ins_encode %{
18850 int vlen_enc = vector_length_encoding(this);
18851 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18852 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18853 %}
18854 ins_pipe( pipe_slow );
18855 %}
18856
18857 instruct ReplS_mem(vec dst, memory mem) %{
18858 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18859 match(Set dst (Replicate (LoadS mem)));
18860 format %{ "replicateS $dst,$mem" %}
18861 ins_encode %{
18862 int vlen_enc = vector_length_encoding(this);
18863 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18864 %}
18865 ins_pipe( pipe_slow );
18866 %}
18867
18868 // ====================ReplicateI=======================================
18869
18870 instruct ReplI_reg(vec dst, rRegI src) %{
18871 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18872 match(Set dst (Replicate src));
18873 format %{ "replicateI $dst,$src" %}
18874 ins_encode %{
18875 uint vlen = Matcher::vector_length(this);
18876 int vlen_enc = vector_length_encoding(this);
18877 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18878 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18879 } else if (VM_Version::supports_avx2()) {
18880 __ movdl($dst$$XMMRegister, $src$$Register);
18881 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18882 } else {
18883 __ movdl($dst$$XMMRegister, $src$$Register);
18884 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18885 }
18886 %}
18887 ins_pipe( pipe_slow );
18888 %}
18889
18890 instruct ReplI_mem(vec dst, memory mem) %{
18891 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18892 match(Set dst (Replicate (LoadI mem)));
18893 format %{ "replicateI $dst,$mem" %}
18894 ins_encode %{
18895 int vlen_enc = vector_length_encoding(this);
18896 if (VM_Version::supports_avx2()) {
18897 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18898 } else if (VM_Version::supports_avx()) {
18899 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18900 } else {
18901 __ movdl($dst$$XMMRegister, $mem$$Address);
18902 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18903 }
18904 %}
18905 ins_pipe( pipe_slow );
18906 %}
18907
18908 instruct ReplI_imm(vec dst, immI con) %{
18909 predicate(Matcher::is_non_long_integral_vector(n));
18910 match(Set dst (Replicate con));
18911 format %{ "replicateI $dst,$con" %}
18912 ins_encode %{
18913 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18914 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18915 type2aelembytes(Matcher::vector_element_basic_type(this))));
18916 BasicType bt = Matcher::vector_element_basic_type(this);
18917 int vlen = Matcher::vector_length_in_bytes(this);
18918 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18919 %}
18920 ins_pipe( pipe_slow );
18921 %}
18922
18923 // Replicate scalar zero to be vector
18924 instruct ReplI_zero(vec dst, immI_0 zero) %{
18925 predicate(Matcher::is_non_long_integral_vector(n));
18926 match(Set dst (Replicate zero));
18927 format %{ "replicateI $dst,$zero" %}
18928 ins_encode %{
18929 int vlen_enc = vector_length_encoding(this);
18930 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18931 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18932 } else {
18933 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18934 }
18935 %}
18936 ins_pipe( fpu_reg_reg );
18937 %}
18938
18939 instruct ReplI_M1(vec dst, immI_M1 con) %{
18940 predicate(Matcher::is_non_long_integral_vector(n));
18941 match(Set dst (Replicate con));
18942 format %{ "vallones $dst" %}
18943 ins_encode %{
18944 int vector_len = vector_length_encoding(this);
18945 __ vallones($dst$$XMMRegister, vector_len);
18946 %}
18947 ins_pipe( pipe_slow );
18948 %}
18949
18950 // ====================ReplicateL=======================================
18951
18952 // Replicate long (8 byte) scalar to be vector
18953 instruct ReplL_reg(vec dst, rRegL src) %{
18954 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18955 match(Set dst (Replicate src));
18956 format %{ "replicateL $dst,$src" %}
18957 ins_encode %{
18958 int vlen = Matcher::vector_length(this);
18959 int vlen_enc = vector_length_encoding(this);
18960 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18961 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18962 } else if (VM_Version::supports_avx2()) {
18963 __ movdq($dst$$XMMRegister, $src$$Register);
18964 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18965 } else {
18966 __ movdq($dst$$XMMRegister, $src$$Register);
18967 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18968 }
18969 %}
18970 ins_pipe( pipe_slow );
18971 %}
18972
18973 instruct ReplL_mem(vec dst, memory mem) %{
18974 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18975 match(Set dst (Replicate (LoadL mem)));
18976 format %{ "replicateL $dst,$mem" %}
18977 ins_encode %{
18978 int vlen_enc = vector_length_encoding(this);
18979 if (VM_Version::supports_avx2()) {
18980 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18981 } else if (VM_Version::supports_sse3()) {
18982 __ movddup($dst$$XMMRegister, $mem$$Address);
18983 } else {
18984 __ movq($dst$$XMMRegister, $mem$$Address);
18985 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18986 }
18987 %}
18988 ins_pipe( pipe_slow );
18989 %}
18990
18991 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18992 instruct ReplL_imm(vec dst, immL con) %{
18993 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18994 match(Set dst (Replicate con));
18995 format %{ "replicateL $dst,$con" %}
18996 ins_encode %{
18997 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18998 int vlen = Matcher::vector_length_in_bytes(this);
18999 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19000 %}
19001 ins_pipe( pipe_slow );
19002 %}
19003
19004 instruct ReplL_zero(vec dst, immL0 zero) %{
19005 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19006 match(Set dst (Replicate zero));
19007 format %{ "replicateL $dst,$zero" %}
19008 ins_encode %{
19009 int vlen_enc = vector_length_encoding(this);
19010 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19011 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19012 } else {
19013 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19014 }
19015 %}
19016 ins_pipe( fpu_reg_reg );
19017 %}
19018
19019 instruct ReplL_M1(vec dst, immL_M1 con) %{
19020 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19021 match(Set dst (Replicate con));
19022 format %{ "vallones $dst" %}
19023 ins_encode %{
19024 int vector_len = vector_length_encoding(this);
19025 __ vallones($dst$$XMMRegister, vector_len);
19026 %}
19027 ins_pipe( pipe_slow );
19028 %}
19029
19030 // ====================ReplicateF=======================================
19031
19032 instruct vReplF_reg(vec dst, vlRegF src) %{
19033 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19034 match(Set dst (Replicate src));
19035 format %{ "replicateF $dst,$src" %}
19036 ins_encode %{
19037 uint vlen = Matcher::vector_length(this);
19038 int vlen_enc = vector_length_encoding(this);
19039 if (vlen <= 4) {
19040 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19041 } else if (VM_Version::supports_avx2()) {
19042 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19043 } else {
19044 assert(vlen == 8, "sanity");
19045 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19046 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19047 }
19048 %}
19049 ins_pipe( pipe_slow );
19050 %}
19051
19052 instruct ReplF_reg(vec dst, vlRegF src) %{
19053 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19054 match(Set dst (Replicate src));
19055 format %{ "replicateF $dst,$src" %}
19056 ins_encode %{
19057 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19058 %}
19059 ins_pipe( pipe_slow );
19060 %}
19061
19062 instruct ReplF_mem(vec dst, memory mem) %{
19063 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19064 match(Set dst (Replicate (LoadF mem)));
19065 format %{ "replicateF $dst,$mem" %}
19066 ins_encode %{
19067 int vlen_enc = vector_length_encoding(this);
19068 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19069 %}
19070 ins_pipe( pipe_slow );
19071 %}
19072
19073 // Replicate float scalar immediate to be vector by loading from const table.
19074 instruct ReplF_imm(vec dst, immF con) %{
19075 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19076 match(Set dst (Replicate con));
19077 format %{ "replicateF $dst,$con" %}
19078 ins_encode %{
19079 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19080 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19081 int vlen = Matcher::vector_length_in_bytes(this);
19082 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19083 %}
19084 ins_pipe( pipe_slow );
19085 %}
19086
19087 instruct ReplF_zero(vec dst, immF0 zero) %{
19088 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19089 match(Set dst (Replicate zero));
19090 format %{ "replicateF $dst,$zero" %}
19091 ins_encode %{
19092 int vlen_enc = vector_length_encoding(this);
19093 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19094 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19095 } else {
19096 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19097 }
19098 %}
19099 ins_pipe( fpu_reg_reg );
19100 %}
19101
19102 // ====================ReplicateD=======================================
19103
19104 // Replicate double (8 bytes) scalar to be vector
19105 instruct vReplD_reg(vec dst, vlRegD src) %{
19106 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19107 match(Set dst (Replicate src));
19108 format %{ "replicateD $dst,$src" %}
19109 ins_encode %{
19110 uint vlen = Matcher::vector_length(this);
19111 int vlen_enc = vector_length_encoding(this);
19112 if (vlen <= 2) {
19113 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19114 } else if (VM_Version::supports_avx2()) {
19115 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19116 } else {
19117 assert(vlen == 4, "sanity");
19118 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19119 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19120 }
19121 %}
19122 ins_pipe( pipe_slow );
19123 %}
19124
19125 instruct ReplD_reg(vec dst, vlRegD src) %{
19126 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19127 match(Set dst (Replicate src));
19128 format %{ "replicateD $dst,$src" %}
19129 ins_encode %{
19130 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19131 %}
19132 ins_pipe( pipe_slow );
19133 %}
19134
19135 instruct ReplD_mem(vec dst, memory mem) %{
19136 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19137 match(Set dst (Replicate (LoadD mem)));
19138 format %{ "replicateD $dst,$mem" %}
19139 ins_encode %{
19140 if (Matcher::vector_length(this) >= 4) {
19141 int vlen_enc = vector_length_encoding(this);
19142 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19143 } else {
19144 __ movddup($dst$$XMMRegister, $mem$$Address);
19145 }
19146 %}
19147 ins_pipe( pipe_slow );
19148 %}
19149
19150 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19151 instruct ReplD_imm(vec dst, immD con) %{
19152 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19153 match(Set dst (Replicate con));
19154 format %{ "replicateD $dst,$con" %}
19155 ins_encode %{
19156 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19157 int vlen = Matcher::vector_length_in_bytes(this);
19158 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19159 %}
19160 ins_pipe( pipe_slow );
19161 %}
19162
19163 instruct ReplD_zero(vec dst, immD0 zero) %{
19164 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19165 match(Set dst (Replicate zero));
19166 format %{ "replicateD $dst,$zero" %}
19167 ins_encode %{
19168 int vlen_enc = vector_length_encoding(this);
19169 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19170 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19171 } else {
19172 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19173 }
19174 %}
19175 ins_pipe( fpu_reg_reg );
19176 %}
19177
19178 // ====================VECTOR INSERT=======================================
19179
19180 instruct insert(vec dst, rRegI val, immU8 idx) %{
19181 predicate(Matcher::vector_length_in_bytes(n) < 32);
19182 match(Set dst (VectorInsert (Binary dst val) idx));
19183 format %{ "vector_insert $dst,$val,$idx" %}
19184 ins_encode %{
19185 assert(UseSSE >= 4, "required");
19186 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19187
19188 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19189
19190 assert(is_integral_type(elem_bt), "");
19191 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19192
19193 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19194 %}
19195 ins_pipe( pipe_slow );
19196 %}
19197
19198 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19199 predicate(Matcher::vector_length_in_bytes(n) == 32);
19200 match(Set dst (VectorInsert (Binary src val) idx));
19201 effect(TEMP vtmp);
19202 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19203 ins_encode %{
19204 int vlen_enc = Assembler::AVX_256bit;
19205 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19206 int elem_per_lane = 16/type2aelembytes(elem_bt);
19207 int log2epr = log2(elem_per_lane);
19208
19209 assert(is_integral_type(elem_bt), "sanity");
19210 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19211
19212 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19213 uint y_idx = ($idx$$constant >> log2epr) & 1;
19214 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19215 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19216 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19217 %}
19218 ins_pipe( pipe_slow );
19219 %}
19220
19221 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19222 predicate(Matcher::vector_length_in_bytes(n) == 64);
19223 match(Set dst (VectorInsert (Binary src val) idx));
19224 effect(TEMP vtmp);
19225 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19226 ins_encode %{
19227 assert(UseAVX > 2, "sanity");
19228
19229 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19230 int elem_per_lane = 16/type2aelembytes(elem_bt);
19231 int log2epr = log2(elem_per_lane);
19232
19233 assert(is_integral_type(elem_bt), "");
19234 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19235
19236 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19237 uint y_idx = ($idx$$constant >> log2epr) & 3;
19238 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19239 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19240 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19241 %}
19242 ins_pipe( pipe_slow );
19243 %}
19244
19245 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19246 predicate(Matcher::vector_length(n) == 2);
19247 match(Set dst (VectorInsert (Binary dst val) idx));
19248 format %{ "vector_insert $dst,$val,$idx" %}
19249 ins_encode %{
19250 assert(UseSSE >= 4, "required");
19251 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19252 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19253
19254 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19255 %}
19256 ins_pipe( pipe_slow );
19257 %}
19258
19259 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19260 predicate(Matcher::vector_length(n) == 4);
19261 match(Set dst (VectorInsert (Binary src val) idx));
19262 effect(TEMP vtmp);
19263 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19264 ins_encode %{
19265 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19266 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19267
19268 uint x_idx = $idx$$constant & right_n_bits(1);
19269 uint y_idx = ($idx$$constant >> 1) & 1;
19270 int vlen_enc = Assembler::AVX_256bit;
19271 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19272 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19273 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19274 %}
19275 ins_pipe( pipe_slow );
19276 %}
19277
19278 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19279 predicate(Matcher::vector_length(n) == 8);
19280 match(Set dst (VectorInsert (Binary src val) idx));
19281 effect(TEMP vtmp);
19282 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19283 ins_encode %{
19284 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19285 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19286
19287 uint x_idx = $idx$$constant & right_n_bits(1);
19288 uint y_idx = ($idx$$constant >> 1) & 3;
19289 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19290 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19291 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19292 %}
19293 ins_pipe( pipe_slow );
19294 %}
19295
19296 instruct insertF(vec dst, regF val, immU8 idx) %{
19297 predicate(Matcher::vector_length(n) < 8);
19298 match(Set dst (VectorInsert (Binary dst val) idx));
19299 format %{ "vector_insert $dst,$val,$idx" %}
19300 ins_encode %{
19301 assert(UseSSE >= 4, "sanity");
19302
19303 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19304 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19305
19306 uint x_idx = $idx$$constant & right_n_bits(2);
19307 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19308 %}
19309 ins_pipe( pipe_slow );
19310 %}
19311
19312 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19313 predicate(Matcher::vector_length(n) >= 8);
19314 match(Set dst (VectorInsert (Binary src val) idx));
19315 effect(TEMP vtmp);
19316 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19317 ins_encode %{
19318 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19319 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19320
19321 int vlen = Matcher::vector_length(this);
19322 uint x_idx = $idx$$constant & right_n_bits(2);
19323 if (vlen == 8) {
19324 uint y_idx = ($idx$$constant >> 2) & 1;
19325 int vlen_enc = Assembler::AVX_256bit;
19326 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19327 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19328 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19329 } else {
19330 assert(vlen == 16, "sanity");
19331 uint y_idx = ($idx$$constant >> 2) & 3;
19332 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19333 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19334 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19335 }
19336 %}
19337 ins_pipe( pipe_slow );
19338 %}
19339
19340 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19341 predicate(Matcher::vector_length(n) == 2);
19342 match(Set dst (VectorInsert (Binary dst val) idx));
19343 effect(TEMP tmp);
19344 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19345 ins_encode %{
19346 assert(UseSSE >= 4, "sanity");
19347 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19348 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19349
19350 __ movq($tmp$$Register, $val$$XMMRegister);
19351 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19352 %}
19353 ins_pipe( pipe_slow );
19354 %}
19355
19356 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19357 predicate(Matcher::vector_length(n) == 4);
19358 match(Set dst (VectorInsert (Binary src val) idx));
19359 effect(TEMP vtmp, TEMP tmp);
19360 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19361 ins_encode %{
19362 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19363 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19364
19365 uint x_idx = $idx$$constant & right_n_bits(1);
19366 uint y_idx = ($idx$$constant >> 1) & 1;
19367 int vlen_enc = Assembler::AVX_256bit;
19368 __ movq($tmp$$Register, $val$$XMMRegister);
19369 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19370 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19371 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19372 %}
19373 ins_pipe( pipe_slow );
19374 %}
19375
19376 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19377 predicate(Matcher::vector_length(n) == 8);
19378 match(Set dst (VectorInsert (Binary src val) idx));
19379 effect(TEMP tmp, TEMP vtmp);
19380 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19381 ins_encode %{
19382 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19383 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19384
19385 uint x_idx = $idx$$constant & right_n_bits(1);
19386 uint y_idx = ($idx$$constant >> 1) & 3;
19387 __ movq($tmp$$Register, $val$$XMMRegister);
19388 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19389 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19390 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19391 %}
19392 ins_pipe( pipe_slow );
19393 %}
19394
19395 // ====================REDUCTION ARITHMETIC=======================================
19396
19397 // =======================Int Reduction==========================================
19398
19399 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19400 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19401 match(Set dst (AddReductionVI src1 src2));
19402 match(Set dst (MulReductionVI src1 src2));
19403 match(Set dst (AndReductionV src1 src2));
19404 match(Set dst ( OrReductionV src1 src2));
19405 match(Set dst (XorReductionV src1 src2));
19406 match(Set dst (MinReductionV src1 src2));
19407 match(Set dst (MaxReductionV src1 src2));
19408 match(Set dst (UMinReductionV src1 src2));
19409 match(Set dst (UMaxReductionV src1 src2));
19410 effect(TEMP vtmp1, TEMP vtmp2);
19411 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19412 ins_encode %{
19413 int opcode = this->ideal_Opcode();
19414 int vlen = Matcher::vector_length(this, $src2);
19415 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19416 %}
19417 ins_pipe( pipe_slow );
19418 %}
19419
19420 // =======================Long Reduction==========================================
19421
19422 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19423 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19424 match(Set dst (AddReductionVL src1 src2));
19425 match(Set dst (MulReductionVL src1 src2));
19426 match(Set dst (AndReductionV src1 src2));
19427 match(Set dst ( OrReductionV src1 src2));
19428 match(Set dst (XorReductionV src1 src2));
19429 match(Set dst (MinReductionV src1 src2));
19430 match(Set dst (MaxReductionV src1 src2));
19431 match(Set dst (UMinReductionV src1 src2));
19432 match(Set dst (UMaxReductionV src1 src2));
19433 effect(TEMP vtmp1, TEMP vtmp2);
19434 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19435 ins_encode %{
19436 int opcode = this->ideal_Opcode();
19437 int vlen = Matcher::vector_length(this, $src2);
19438 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19439 %}
19440 ins_pipe( pipe_slow );
19441 %}
19442
19443 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19444 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19445 match(Set dst (AddReductionVL src1 src2));
19446 match(Set dst (MulReductionVL src1 src2));
19447 match(Set dst (AndReductionV src1 src2));
19448 match(Set dst ( OrReductionV src1 src2));
19449 match(Set dst (XorReductionV src1 src2));
19450 match(Set dst (MinReductionV src1 src2));
19451 match(Set dst (MaxReductionV src1 src2));
19452 match(Set dst (UMinReductionV src1 src2));
19453 match(Set dst (UMaxReductionV src1 src2));
19454 effect(TEMP vtmp1, TEMP vtmp2);
19455 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19456 ins_encode %{
19457 int opcode = this->ideal_Opcode();
19458 int vlen = Matcher::vector_length(this, $src2);
19459 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19460 %}
19461 ins_pipe( pipe_slow );
19462 %}
19463
19464 // =======================Float Reduction==========================================
19465
19466 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19467 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19468 match(Set dst (AddReductionVF dst src));
19469 match(Set dst (MulReductionVF dst src));
19470 effect(TEMP dst, TEMP vtmp);
19471 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19472 ins_encode %{
19473 int opcode = this->ideal_Opcode();
19474 int vlen = Matcher::vector_length(this, $src);
19475 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19476 %}
19477 ins_pipe( pipe_slow );
19478 %}
19479
19480 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19481 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19482 match(Set dst (AddReductionVF dst src));
19483 match(Set dst (MulReductionVF dst src));
19484 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19485 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19486 ins_encode %{
19487 int opcode = this->ideal_Opcode();
19488 int vlen = Matcher::vector_length(this, $src);
19489 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19490 %}
19491 ins_pipe( pipe_slow );
19492 %}
19493
19494 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19495 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19496 match(Set dst (AddReductionVF dst src));
19497 match(Set dst (MulReductionVF dst src));
19498 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19499 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19500 ins_encode %{
19501 int opcode = this->ideal_Opcode();
19502 int vlen = Matcher::vector_length(this, $src);
19503 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19504 %}
19505 ins_pipe( pipe_slow );
19506 %}
19507
19508
19509 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19510 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19511 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19512 // src1 contains reduction identity
19513 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19514 match(Set dst (AddReductionVF src1 src2));
19515 match(Set dst (MulReductionVF src1 src2));
19516 effect(TEMP dst);
19517 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19518 ins_encode %{
19519 int opcode = this->ideal_Opcode();
19520 int vlen = Matcher::vector_length(this, $src2);
19521 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19522 %}
19523 ins_pipe( pipe_slow );
19524 %}
19525
19526 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19527 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19528 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19529 // src1 contains reduction identity
19530 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19531 match(Set dst (AddReductionVF src1 src2));
19532 match(Set dst (MulReductionVF src1 src2));
19533 effect(TEMP dst, TEMP vtmp);
19534 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19535 ins_encode %{
19536 int opcode = this->ideal_Opcode();
19537 int vlen = Matcher::vector_length(this, $src2);
19538 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19539 %}
19540 ins_pipe( pipe_slow );
19541 %}
19542
19543 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19544 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19545 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19546 // src1 contains reduction identity
19547 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19548 match(Set dst (AddReductionVF src1 src2));
19549 match(Set dst (MulReductionVF src1 src2));
19550 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19551 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19552 ins_encode %{
19553 int opcode = this->ideal_Opcode();
19554 int vlen = Matcher::vector_length(this, $src2);
19555 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19556 %}
19557 ins_pipe( pipe_slow );
19558 %}
19559
19560 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19561 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19562 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19563 // src1 contains reduction identity
19564 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19565 match(Set dst (AddReductionVF src1 src2));
19566 match(Set dst (MulReductionVF src1 src2));
19567 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19568 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19569 ins_encode %{
19570 int opcode = this->ideal_Opcode();
19571 int vlen = Matcher::vector_length(this, $src2);
19572 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19573 %}
19574 ins_pipe( pipe_slow );
19575 %}
19576
19577 // =======================Double Reduction==========================================
19578
19579 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19580 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19581 match(Set dst (AddReductionVD dst src));
19582 match(Set dst (MulReductionVD dst src));
19583 effect(TEMP dst, TEMP vtmp);
19584 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19585 ins_encode %{
19586 int opcode = this->ideal_Opcode();
19587 int vlen = Matcher::vector_length(this, $src);
19588 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19589 %}
19590 ins_pipe( pipe_slow );
19591 %}
19592
19593 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19594 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19595 match(Set dst (AddReductionVD dst src));
19596 match(Set dst (MulReductionVD dst src));
19597 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19598 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19599 ins_encode %{
19600 int opcode = this->ideal_Opcode();
19601 int vlen = Matcher::vector_length(this, $src);
19602 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19603 %}
19604 ins_pipe( pipe_slow );
19605 %}
19606
19607 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19608 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19609 match(Set dst (AddReductionVD dst src));
19610 match(Set dst (MulReductionVD dst src));
19611 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19612 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19613 ins_encode %{
19614 int opcode = this->ideal_Opcode();
19615 int vlen = Matcher::vector_length(this, $src);
19616 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19617 %}
19618 ins_pipe( pipe_slow );
19619 %}
19620
19621 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19622 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19623 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19624 // src1 contains reduction identity
19625 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19626 match(Set dst (AddReductionVD src1 src2));
19627 match(Set dst (MulReductionVD src1 src2));
19628 effect(TEMP dst);
19629 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19630 ins_encode %{
19631 int opcode = this->ideal_Opcode();
19632 int vlen = Matcher::vector_length(this, $src2);
19633 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19634 %}
19635 ins_pipe( pipe_slow );
19636 %}
19637
19638 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19639 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19640 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19641 // src1 contains reduction identity
19642 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19643 match(Set dst (AddReductionVD src1 src2));
19644 match(Set dst (MulReductionVD src1 src2));
19645 effect(TEMP dst, TEMP vtmp);
19646 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19647 ins_encode %{
19648 int opcode = this->ideal_Opcode();
19649 int vlen = Matcher::vector_length(this, $src2);
19650 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19651 %}
19652 ins_pipe( pipe_slow );
19653 %}
19654
19655 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19656 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19657 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19658 // src1 contains reduction identity
19659 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19660 match(Set dst (AddReductionVD src1 src2));
19661 match(Set dst (MulReductionVD src1 src2));
19662 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19663 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19664 ins_encode %{
19665 int opcode = this->ideal_Opcode();
19666 int vlen = Matcher::vector_length(this, $src2);
19667 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19668 %}
19669 ins_pipe( pipe_slow );
19670 %}
19671
19672 // =======================Byte Reduction==========================================
19673
19674 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19675 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19676 match(Set dst (AddReductionVI src1 src2));
19677 match(Set dst (AndReductionV src1 src2));
19678 match(Set dst ( OrReductionV src1 src2));
19679 match(Set dst (XorReductionV src1 src2));
19680 match(Set dst (MinReductionV src1 src2));
19681 match(Set dst (MaxReductionV src1 src2));
19682 match(Set dst (UMinReductionV src1 src2));
19683 match(Set dst (UMaxReductionV src1 src2));
19684 effect(TEMP vtmp1, TEMP vtmp2);
19685 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19686 ins_encode %{
19687 int opcode = this->ideal_Opcode();
19688 int vlen = Matcher::vector_length(this, $src2);
19689 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19690 %}
19691 ins_pipe( pipe_slow );
19692 %}
19693
19694 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19695 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19696 match(Set dst (AddReductionVI src1 src2));
19697 match(Set dst (AndReductionV src1 src2));
19698 match(Set dst ( OrReductionV src1 src2));
19699 match(Set dst (XorReductionV src1 src2));
19700 match(Set dst (MinReductionV src1 src2));
19701 match(Set dst (MaxReductionV src1 src2));
19702 match(Set dst (UMinReductionV src1 src2));
19703 match(Set dst (UMaxReductionV src1 src2));
19704 effect(TEMP vtmp1, TEMP vtmp2);
19705 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19706 ins_encode %{
19707 int opcode = this->ideal_Opcode();
19708 int vlen = Matcher::vector_length(this, $src2);
19709 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19710 %}
19711 ins_pipe( pipe_slow );
19712 %}
19713
19714 // =======================Short Reduction==========================================
19715
19716 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19717 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19718 match(Set dst (AddReductionVI src1 src2));
19719 match(Set dst (MulReductionVI src1 src2));
19720 match(Set dst (AndReductionV src1 src2));
19721 match(Set dst ( OrReductionV src1 src2));
19722 match(Set dst (XorReductionV src1 src2));
19723 match(Set dst (MinReductionV src1 src2));
19724 match(Set dst (MaxReductionV src1 src2));
19725 match(Set dst (UMinReductionV src1 src2));
19726 match(Set dst (UMaxReductionV src1 src2));
19727 effect(TEMP vtmp1, TEMP vtmp2);
19728 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19729 ins_encode %{
19730 int opcode = this->ideal_Opcode();
19731 int vlen = Matcher::vector_length(this, $src2);
19732 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19733 %}
19734 ins_pipe( pipe_slow );
19735 %}
19736
19737 // =======================Mul Reduction==========================================
19738
19739 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19740 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19741 Matcher::vector_length(n->in(2)) <= 32); // src2
19742 match(Set dst (MulReductionVI src1 src2));
19743 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19744 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19745 ins_encode %{
19746 int opcode = this->ideal_Opcode();
19747 int vlen = Matcher::vector_length(this, $src2);
19748 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19749 %}
19750 ins_pipe( pipe_slow );
19751 %}
19752
19753 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19754 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19755 Matcher::vector_length(n->in(2)) == 64); // src2
19756 match(Set dst (MulReductionVI src1 src2));
19757 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19758 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19759 ins_encode %{
19760 int opcode = this->ideal_Opcode();
19761 int vlen = Matcher::vector_length(this, $src2);
19762 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19763 %}
19764 ins_pipe( pipe_slow );
19765 %}
19766
19767 //--------------------Min/Max Float Reduction --------------------
19768 // Float Min Reduction
19769 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19770 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19771 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19772 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19773 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19774 Matcher::vector_length(n->in(2)) == 2);
19775 match(Set dst (MinReductionV src1 src2));
19776 match(Set dst (MaxReductionV src1 src2));
19777 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19778 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19779 ins_encode %{
19780 assert(UseAVX > 0, "sanity");
19781
19782 int opcode = this->ideal_Opcode();
19783 int vlen = Matcher::vector_length(this, $src2);
19784 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19785 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19786 %}
19787 ins_pipe( pipe_slow );
19788 %}
19789
19790 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19791 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19792 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19793 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19794 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19795 Matcher::vector_length(n->in(2)) >= 4);
19796 match(Set dst (MinReductionV src1 src2));
19797 match(Set dst (MaxReductionV src1 src2));
19798 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19799 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19800 ins_encode %{
19801 assert(UseAVX > 0, "sanity");
19802
19803 int opcode = this->ideal_Opcode();
19804 int vlen = Matcher::vector_length(this, $src2);
19805 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19806 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19807 %}
19808 ins_pipe( pipe_slow );
19809 %}
19810
19811 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19812 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19813 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19814 Matcher::vector_length(n->in(2)) == 2);
19815 match(Set dst (MinReductionV dst src));
19816 match(Set dst (MaxReductionV dst src));
19817 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19818 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19819 ins_encode %{
19820 assert(UseAVX > 0, "sanity");
19821
19822 int opcode = this->ideal_Opcode();
19823 int vlen = Matcher::vector_length(this, $src);
19824 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19825 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19826 %}
19827 ins_pipe( pipe_slow );
19828 %}
19829
19830
19831 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19832 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19833 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19834 Matcher::vector_length(n->in(2)) >= 4);
19835 match(Set dst (MinReductionV dst src));
19836 match(Set dst (MaxReductionV dst src));
19837 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19838 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19839 ins_encode %{
19840 assert(UseAVX > 0, "sanity");
19841
19842 int opcode = this->ideal_Opcode();
19843 int vlen = Matcher::vector_length(this, $src);
19844 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19845 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19846 %}
19847 ins_pipe( pipe_slow );
19848 %}
19849
19850 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19851 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19852 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19853 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19854 Matcher::vector_length(n->in(2)) == 2);
19855 match(Set dst (MinReductionV src1 src2));
19856 match(Set dst (MaxReductionV src1 src2));
19857 effect(TEMP dst, TEMP xtmp1);
19858 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19859 ins_encode %{
19860 int opcode = this->ideal_Opcode();
19861 int vlen = Matcher::vector_length(this, $src2);
19862 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19863 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19864 %}
19865 ins_pipe( pipe_slow );
19866 %}
19867
19868 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19869 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19870 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19871 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19872 Matcher::vector_length(n->in(2)) >= 4);
19873 match(Set dst (MinReductionV src1 src2));
19874 match(Set dst (MaxReductionV src1 src2));
19875 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19876 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19877 ins_encode %{
19878 int opcode = this->ideal_Opcode();
19879 int vlen = Matcher::vector_length(this, $src2);
19880 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19881 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19882 %}
19883 ins_pipe( pipe_slow );
19884 %}
19885
19886 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19887 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19888 Matcher::vector_length(n->in(2)) == 2);
19889 match(Set dst (MinReductionV dst src));
19890 match(Set dst (MaxReductionV dst src));
19891 effect(TEMP dst, TEMP xtmp1);
19892 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19893 ins_encode %{
19894 int opcode = this->ideal_Opcode();
19895 int vlen = Matcher::vector_length(this, $src);
19896 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19897 $xtmp1$$XMMRegister);
19898 %}
19899 ins_pipe( pipe_slow );
19900 %}
19901
19902 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19903 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19904 Matcher::vector_length(n->in(2)) >= 4);
19905 match(Set dst (MinReductionV dst src));
19906 match(Set dst (MaxReductionV dst src));
19907 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19908 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19909 ins_encode %{
19910 int opcode = this->ideal_Opcode();
19911 int vlen = Matcher::vector_length(this, $src);
19912 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19913 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19914 %}
19915 ins_pipe( pipe_slow );
19916 %}
19917
19918 //--------------------Min Double Reduction --------------------
19919 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19920 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19921 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19922 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19923 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19924 Matcher::vector_length(n->in(2)) == 2);
19925 match(Set dst (MinReductionV src1 src2));
19926 match(Set dst (MaxReductionV src1 src2));
19927 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19928 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19929 ins_encode %{
19930 assert(UseAVX > 0, "sanity");
19931
19932 int opcode = this->ideal_Opcode();
19933 int vlen = Matcher::vector_length(this, $src2);
19934 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19935 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19936 %}
19937 ins_pipe( pipe_slow );
19938 %}
19939
19940 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19941 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19942 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19943 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19944 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19945 Matcher::vector_length(n->in(2)) >= 4);
19946 match(Set dst (MinReductionV src1 src2));
19947 match(Set dst (MaxReductionV src1 src2));
19948 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19949 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19950 ins_encode %{
19951 assert(UseAVX > 0, "sanity");
19952
19953 int opcode = this->ideal_Opcode();
19954 int vlen = Matcher::vector_length(this, $src2);
19955 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19956 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19957 %}
19958 ins_pipe( pipe_slow );
19959 %}
19960
19961
19962 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19963 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19964 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19965 Matcher::vector_length(n->in(2)) == 2);
19966 match(Set dst (MinReductionV dst src));
19967 match(Set dst (MaxReductionV dst src));
19968 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19969 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19970 ins_encode %{
19971 assert(UseAVX > 0, "sanity");
19972
19973 int opcode = this->ideal_Opcode();
19974 int vlen = Matcher::vector_length(this, $src);
19975 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19976 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19977 %}
19978 ins_pipe( pipe_slow );
19979 %}
19980
19981 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19982 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19983 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19984 Matcher::vector_length(n->in(2)) >= 4);
19985 match(Set dst (MinReductionV dst src));
19986 match(Set dst (MaxReductionV dst src));
19987 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19988 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19989 ins_encode %{
19990 assert(UseAVX > 0, "sanity");
19991
19992 int opcode = this->ideal_Opcode();
19993 int vlen = Matcher::vector_length(this, $src);
19994 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19995 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19996 %}
19997 ins_pipe( pipe_slow );
19998 %}
19999
20000 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20001 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20002 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20003 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20004 Matcher::vector_length(n->in(2)) == 2);
20005 match(Set dst (MinReductionV src1 src2));
20006 match(Set dst (MaxReductionV src1 src2));
20007 effect(TEMP dst, TEMP xtmp1);
20008 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20009 ins_encode %{
20010 int opcode = this->ideal_Opcode();
20011 int vlen = Matcher::vector_length(this, $src2);
20012 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20013 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20014 %}
20015 ins_pipe( pipe_slow );
20016 %}
20017
20018 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20019 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20020 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20021 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20022 Matcher::vector_length(n->in(2)) >= 4);
20023 match(Set dst (MinReductionV src1 src2));
20024 match(Set dst (MaxReductionV src1 src2));
20025 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20026 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20027 ins_encode %{
20028 int opcode = this->ideal_Opcode();
20029 int vlen = Matcher::vector_length(this, $src2);
20030 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20031 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20032 %}
20033 ins_pipe( pipe_slow );
20034 %}
20035
20036
20037 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20038 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20039 Matcher::vector_length(n->in(2)) == 2);
20040 match(Set dst (MinReductionV dst src));
20041 match(Set dst (MaxReductionV dst src));
20042 effect(TEMP dst, TEMP xtmp1);
20043 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20044 ins_encode %{
20045 int opcode = this->ideal_Opcode();
20046 int vlen = Matcher::vector_length(this, $src);
20047 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20048 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20049 %}
20050 ins_pipe( pipe_slow );
20051 %}
20052
20053 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20054 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20055 Matcher::vector_length(n->in(2)) >= 4);
20056 match(Set dst (MinReductionV dst src));
20057 match(Set dst (MaxReductionV dst src));
20058 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20059 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20060 ins_encode %{
20061 int opcode = this->ideal_Opcode();
20062 int vlen = Matcher::vector_length(this, $src);
20063 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20064 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20065 %}
20066 ins_pipe( pipe_slow );
20067 %}
20068
20069 // ====================VECTOR ARITHMETIC=======================================
20070
20071 // --------------------------------- ADD --------------------------------------
20072
20073 // Bytes vector add
20074 instruct vaddB(vec dst, vec src) %{
20075 predicate(UseAVX == 0);
20076 match(Set dst (AddVB dst src));
20077 format %{ "paddb $dst,$src\t! add packedB" %}
20078 ins_encode %{
20079 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20080 %}
20081 ins_pipe( pipe_slow );
20082 %}
20083
20084 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20085 predicate(UseAVX > 0);
20086 match(Set dst (AddVB src1 src2));
20087 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20088 ins_encode %{
20089 int vlen_enc = vector_length_encoding(this);
20090 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20091 %}
20092 ins_pipe( pipe_slow );
20093 %}
20094
20095 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20096 predicate((UseAVX > 0) &&
20097 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20098 match(Set dst (AddVB src (LoadVector mem)));
20099 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20100 ins_encode %{
20101 int vlen_enc = vector_length_encoding(this);
20102 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20103 %}
20104 ins_pipe( pipe_slow );
20105 %}
20106
20107 // Shorts/Chars vector add
20108 instruct vaddS(vec dst, vec src) %{
20109 predicate(UseAVX == 0);
20110 match(Set dst (AddVS dst src));
20111 format %{ "paddw $dst,$src\t! add packedS" %}
20112 ins_encode %{
20113 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20114 %}
20115 ins_pipe( pipe_slow );
20116 %}
20117
20118 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20119 predicate(UseAVX > 0);
20120 match(Set dst (AddVS src1 src2));
20121 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20122 ins_encode %{
20123 int vlen_enc = vector_length_encoding(this);
20124 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20125 %}
20126 ins_pipe( pipe_slow );
20127 %}
20128
20129 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20130 predicate((UseAVX > 0) &&
20131 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20132 match(Set dst (AddVS src (LoadVector mem)));
20133 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20134 ins_encode %{
20135 int vlen_enc = vector_length_encoding(this);
20136 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20137 %}
20138 ins_pipe( pipe_slow );
20139 %}
20140
20141 // Integers vector add
20142 instruct vaddI(vec dst, vec src) %{
20143 predicate(UseAVX == 0);
20144 match(Set dst (AddVI dst src));
20145 format %{ "paddd $dst,$src\t! add packedI" %}
20146 ins_encode %{
20147 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20148 %}
20149 ins_pipe( pipe_slow );
20150 %}
20151
20152 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20153 predicate(UseAVX > 0);
20154 match(Set dst (AddVI src1 src2));
20155 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20156 ins_encode %{
20157 int vlen_enc = vector_length_encoding(this);
20158 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20159 %}
20160 ins_pipe( pipe_slow );
20161 %}
20162
20163
20164 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20165 predicate((UseAVX > 0) &&
20166 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20167 match(Set dst (AddVI src (LoadVector mem)));
20168 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20169 ins_encode %{
20170 int vlen_enc = vector_length_encoding(this);
20171 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20172 %}
20173 ins_pipe( pipe_slow );
20174 %}
20175
20176 // Longs vector add
20177 instruct vaddL(vec dst, vec src) %{
20178 predicate(UseAVX == 0);
20179 match(Set dst (AddVL dst src));
20180 format %{ "paddq $dst,$src\t! add packedL" %}
20181 ins_encode %{
20182 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20183 %}
20184 ins_pipe( pipe_slow );
20185 %}
20186
20187 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20188 predicate(UseAVX > 0);
20189 match(Set dst (AddVL src1 src2));
20190 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20191 ins_encode %{
20192 int vlen_enc = vector_length_encoding(this);
20193 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20194 %}
20195 ins_pipe( pipe_slow );
20196 %}
20197
20198 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20199 predicate((UseAVX > 0) &&
20200 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20201 match(Set dst (AddVL src (LoadVector mem)));
20202 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20203 ins_encode %{
20204 int vlen_enc = vector_length_encoding(this);
20205 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20206 %}
20207 ins_pipe( pipe_slow );
20208 %}
20209
20210 // Floats vector add
20211 instruct vaddF(vec dst, vec src) %{
20212 predicate(UseAVX == 0);
20213 match(Set dst (AddVF dst src));
20214 format %{ "addps $dst,$src\t! add packedF" %}
20215 ins_encode %{
20216 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20217 %}
20218 ins_pipe( pipe_slow );
20219 %}
20220
20221 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20222 predicate(UseAVX > 0);
20223 match(Set dst (AddVF src1 src2));
20224 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20225 ins_encode %{
20226 int vlen_enc = vector_length_encoding(this);
20227 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20228 %}
20229 ins_pipe( pipe_slow );
20230 %}
20231
20232 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20233 predicate((UseAVX > 0) &&
20234 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20235 match(Set dst (AddVF src (LoadVector mem)));
20236 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20237 ins_encode %{
20238 int vlen_enc = vector_length_encoding(this);
20239 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20240 %}
20241 ins_pipe( pipe_slow );
20242 %}
20243
20244 // Doubles vector add
20245 instruct vaddD(vec dst, vec src) %{
20246 predicate(UseAVX == 0);
20247 match(Set dst (AddVD dst src));
20248 format %{ "addpd $dst,$src\t! add packedD" %}
20249 ins_encode %{
20250 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20251 %}
20252 ins_pipe( pipe_slow );
20253 %}
20254
20255 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20256 predicate(UseAVX > 0);
20257 match(Set dst (AddVD src1 src2));
20258 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20259 ins_encode %{
20260 int vlen_enc = vector_length_encoding(this);
20261 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20262 %}
20263 ins_pipe( pipe_slow );
20264 %}
20265
20266 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20267 predicate((UseAVX > 0) &&
20268 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20269 match(Set dst (AddVD src (LoadVector mem)));
20270 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20271 ins_encode %{
20272 int vlen_enc = vector_length_encoding(this);
20273 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20274 %}
20275 ins_pipe( pipe_slow );
20276 %}
20277
20278 // --------------------------------- SUB --------------------------------------
20279
20280 // Bytes vector sub
20281 instruct vsubB(vec dst, vec src) %{
20282 predicate(UseAVX == 0);
20283 match(Set dst (SubVB dst src));
20284 format %{ "psubb $dst,$src\t! sub packedB" %}
20285 ins_encode %{
20286 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20287 %}
20288 ins_pipe( pipe_slow );
20289 %}
20290
20291 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20292 predicate(UseAVX > 0);
20293 match(Set dst (SubVB src1 src2));
20294 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20295 ins_encode %{
20296 int vlen_enc = vector_length_encoding(this);
20297 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20298 %}
20299 ins_pipe( pipe_slow );
20300 %}
20301
20302 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20303 predicate((UseAVX > 0) &&
20304 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20305 match(Set dst (SubVB src (LoadVector mem)));
20306 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20307 ins_encode %{
20308 int vlen_enc = vector_length_encoding(this);
20309 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20310 %}
20311 ins_pipe( pipe_slow );
20312 %}
20313
20314 // Shorts/Chars vector sub
20315 instruct vsubS(vec dst, vec src) %{
20316 predicate(UseAVX == 0);
20317 match(Set dst (SubVS dst src));
20318 format %{ "psubw $dst,$src\t! sub packedS" %}
20319 ins_encode %{
20320 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20321 %}
20322 ins_pipe( pipe_slow );
20323 %}
20324
20325
20326 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20327 predicate(UseAVX > 0);
20328 match(Set dst (SubVS src1 src2));
20329 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20330 ins_encode %{
20331 int vlen_enc = vector_length_encoding(this);
20332 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20333 %}
20334 ins_pipe( pipe_slow );
20335 %}
20336
20337 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20338 predicate((UseAVX > 0) &&
20339 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20340 match(Set dst (SubVS src (LoadVector mem)));
20341 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20342 ins_encode %{
20343 int vlen_enc = vector_length_encoding(this);
20344 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20345 %}
20346 ins_pipe( pipe_slow );
20347 %}
20348
20349 // Integers vector sub
20350 instruct vsubI(vec dst, vec src) %{
20351 predicate(UseAVX == 0);
20352 match(Set dst (SubVI dst src));
20353 format %{ "psubd $dst,$src\t! sub packedI" %}
20354 ins_encode %{
20355 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20356 %}
20357 ins_pipe( pipe_slow );
20358 %}
20359
20360 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20361 predicate(UseAVX > 0);
20362 match(Set dst (SubVI src1 src2));
20363 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20364 ins_encode %{
20365 int vlen_enc = vector_length_encoding(this);
20366 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20367 %}
20368 ins_pipe( pipe_slow );
20369 %}
20370
20371 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20372 predicate((UseAVX > 0) &&
20373 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20374 match(Set dst (SubVI src (LoadVector mem)));
20375 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20376 ins_encode %{
20377 int vlen_enc = vector_length_encoding(this);
20378 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20379 %}
20380 ins_pipe( pipe_slow );
20381 %}
20382
20383 // Longs vector sub
20384 instruct vsubL(vec dst, vec src) %{
20385 predicate(UseAVX == 0);
20386 match(Set dst (SubVL dst src));
20387 format %{ "psubq $dst,$src\t! sub packedL" %}
20388 ins_encode %{
20389 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20390 %}
20391 ins_pipe( pipe_slow );
20392 %}
20393
20394 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20395 predicate(UseAVX > 0);
20396 match(Set dst (SubVL src1 src2));
20397 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20398 ins_encode %{
20399 int vlen_enc = vector_length_encoding(this);
20400 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20401 %}
20402 ins_pipe( pipe_slow );
20403 %}
20404
20405
20406 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20407 predicate((UseAVX > 0) &&
20408 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20409 match(Set dst (SubVL src (LoadVector mem)));
20410 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20411 ins_encode %{
20412 int vlen_enc = vector_length_encoding(this);
20413 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20414 %}
20415 ins_pipe( pipe_slow );
20416 %}
20417
20418 // Floats vector sub
20419 instruct vsubF(vec dst, vec src) %{
20420 predicate(UseAVX == 0);
20421 match(Set dst (SubVF dst src));
20422 format %{ "subps $dst,$src\t! sub packedF" %}
20423 ins_encode %{
20424 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20425 %}
20426 ins_pipe( pipe_slow );
20427 %}
20428
20429 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20430 predicate(UseAVX > 0);
20431 match(Set dst (SubVF src1 src2));
20432 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20433 ins_encode %{
20434 int vlen_enc = vector_length_encoding(this);
20435 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20436 %}
20437 ins_pipe( pipe_slow );
20438 %}
20439
20440 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20441 predicate((UseAVX > 0) &&
20442 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20443 match(Set dst (SubVF src (LoadVector mem)));
20444 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20445 ins_encode %{
20446 int vlen_enc = vector_length_encoding(this);
20447 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20448 %}
20449 ins_pipe( pipe_slow );
20450 %}
20451
20452 // Doubles vector sub
20453 instruct vsubD(vec dst, vec src) %{
20454 predicate(UseAVX == 0);
20455 match(Set dst (SubVD dst src));
20456 format %{ "subpd $dst,$src\t! sub packedD" %}
20457 ins_encode %{
20458 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20459 %}
20460 ins_pipe( pipe_slow );
20461 %}
20462
20463 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20464 predicate(UseAVX > 0);
20465 match(Set dst (SubVD src1 src2));
20466 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20467 ins_encode %{
20468 int vlen_enc = vector_length_encoding(this);
20469 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20470 %}
20471 ins_pipe( pipe_slow );
20472 %}
20473
20474 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20475 predicate((UseAVX > 0) &&
20476 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20477 match(Set dst (SubVD src (LoadVector mem)));
20478 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20479 ins_encode %{
20480 int vlen_enc = vector_length_encoding(this);
20481 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20482 %}
20483 ins_pipe( pipe_slow );
20484 %}
20485
20486 // --------------------------------- MUL --------------------------------------
20487
20488 // Byte vector mul
20489 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20490 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20491 match(Set dst (MulVB src1 src2));
20492 effect(TEMP dst, TEMP xtmp);
20493 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20494 ins_encode %{
20495 assert(UseSSE > 3, "required");
20496 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20497 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20498 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20499 __ psllw($dst$$XMMRegister, 8);
20500 __ psrlw($dst$$XMMRegister, 8);
20501 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20502 %}
20503 ins_pipe( pipe_slow );
20504 %}
20505
20506 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20507 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20508 match(Set dst (MulVB src1 src2));
20509 effect(TEMP dst, TEMP xtmp);
20510 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20511 ins_encode %{
20512 assert(UseSSE > 3, "required");
20513 // Odd-index elements
20514 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20515 __ psrlw($dst$$XMMRegister, 8);
20516 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20517 __ psrlw($xtmp$$XMMRegister, 8);
20518 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20519 __ psllw($dst$$XMMRegister, 8);
20520 // Even-index elements
20521 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20522 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20523 __ psllw($xtmp$$XMMRegister, 8);
20524 __ psrlw($xtmp$$XMMRegister, 8);
20525 // Combine
20526 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20527 %}
20528 ins_pipe( pipe_slow );
20529 %}
20530
20531 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20532 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20533 match(Set dst (MulVB src1 src2));
20534 effect(TEMP xtmp1, TEMP xtmp2);
20535 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20536 ins_encode %{
20537 int vlen_enc = vector_length_encoding(this);
20538 // Odd-index elements
20539 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20540 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20541 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20542 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20543 // Even-index elements
20544 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20545 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20546 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20547 // Combine
20548 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20549 %}
20550 ins_pipe( pipe_slow );
20551 %}
20552
20553 // Shorts/Chars vector mul
20554 instruct vmulS(vec dst, vec src) %{
20555 predicate(UseAVX == 0);
20556 match(Set dst (MulVS dst src));
20557 format %{ "pmullw $dst,$src\t! mul packedS" %}
20558 ins_encode %{
20559 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20560 %}
20561 ins_pipe( pipe_slow );
20562 %}
20563
20564 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20565 predicate(UseAVX > 0);
20566 match(Set dst (MulVS src1 src2));
20567 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20568 ins_encode %{
20569 int vlen_enc = vector_length_encoding(this);
20570 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20571 %}
20572 ins_pipe( pipe_slow );
20573 %}
20574
20575 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20576 predicate((UseAVX > 0) &&
20577 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20578 match(Set dst (MulVS src (LoadVector mem)));
20579 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20580 ins_encode %{
20581 int vlen_enc = vector_length_encoding(this);
20582 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20583 %}
20584 ins_pipe( pipe_slow );
20585 %}
20586
20587 // Integers vector mul
20588 instruct vmulI(vec dst, vec src) %{
20589 predicate(UseAVX == 0);
20590 match(Set dst (MulVI dst src));
20591 format %{ "pmulld $dst,$src\t! mul packedI" %}
20592 ins_encode %{
20593 assert(UseSSE > 3, "required");
20594 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20595 %}
20596 ins_pipe( pipe_slow );
20597 %}
20598
20599 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20600 predicate(UseAVX > 0);
20601 match(Set dst (MulVI src1 src2));
20602 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20603 ins_encode %{
20604 int vlen_enc = vector_length_encoding(this);
20605 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20606 %}
20607 ins_pipe( pipe_slow );
20608 %}
20609
20610 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20611 predicate((UseAVX > 0) &&
20612 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20613 match(Set dst (MulVI src (LoadVector mem)));
20614 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20615 ins_encode %{
20616 int vlen_enc = vector_length_encoding(this);
20617 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20618 %}
20619 ins_pipe( pipe_slow );
20620 %}
20621
20622 // Longs vector mul
20623 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20624 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20625 VM_Version::supports_avx512dq()) ||
20626 VM_Version::supports_avx512vldq());
20627 match(Set dst (MulVL src1 src2));
20628 ins_cost(500);
20629 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20630 ins_encode %{
20631 assert(UseAVX > 2, "required");
20632 int vlen_enc = vector_length_encoding(this);
20633 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20634 %}
20635 ins_pipe( pipe_slow );
20636 %}
20637
20638 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20639 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20640 VM_Version::supports_avx512dq()) ||
20641 (Matcher::vector_length_in_bytes(n) > 8 &&
20642 VM_Version::supports_avx512vldq()));
20643 match(Set dst (MulVL src (LoadVector mem)));
20644 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20645 ins_cost(500);
20646 ins_encode %{
20647 assert(UseAVX > 2, "required");
20648 int vlen_enc = vector_length_encoding(this);
20649 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20650 %}
20651 ins_pipe( pipe_slow );
20652 %}
20653
20654 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20655 predicate(UseAVX == 0);
20656 match(Set dst (MulVL src1 src2));
20657 ins_cost(500);
20658 effect(TEMP dst, TEMP xtmp);
20659 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20660 ins_encode %{
20661 assert(VM_Version::supports_sse4_1(), "required");
20662 // Get the lo-hi products, only the lower 32 bits is in concerns
20663 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20664 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20665 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20666 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20667 __ psllq($dst$$XMMRegister, 32);
20668 // Get the lo-lo products
20669 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20670 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20671 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20672 %}
20673 ins_pipe( pipe_slow );
20674 %}
20675
20676 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20677 predicate(UseAVX > 0 &&
20678 ((Matcher::vector_length_in_bytes(n) == 64 &&
20679 !VM_Version::supports_avx512dq()) ||
20680 (Matcher::vector_length_in_bytes(n) < 64 &&
20681 !VM_Version::supports_avx512vldq())));
20682 match(Set dst (MulVL src1 src2));
20683 effect(TEMP xtmp1, TEMP xtmp2);
20684 ins_cost(500);
20685 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20686 ins_encode %{
20687 int vlen_enc = vector_length_encoding(this);
20688 // Get the lo-hi products, only the lower 32 bits is in concerns
20689 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20690 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20691 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20692 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20693 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20694 // Get the lo-lo products
20695 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20696 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20697 %}
20698 ins_pipe( pipe_slow );
20699 %}
20700
20701 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20702 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20703 match(Set dst (MulVL src1 src2));
20704 ins_cost(100);
20705 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20706 ins_encode %{
20707 int vlen_enc = vector_length_encoding(this);
20708 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20709 %}
20710 ins_pipe( pipe_slow );
20711 %}
20712
20713 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20714 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20715 match(Set dst (MulVL src1 src2));
20716 ins_cost(100);
20717 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20718 ins_encode %{
20719 int vlen_enc = vector_length_encoding(this);
20720 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20721 %}
20722 ins_pipe( pipe_slow );
20723 %}
20724
20725 // Floats vector mul
20726 instruct vmulF(vec dst, vec src) %{
20727 predicate(UseAVX == 0);
20728 match(Set dst (MulVF dst src));
20729 format %{ "mulps $dst,$src\t! mul packedF" %}
20730 ins_encode %{
20731 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20732 %}
20733 ins_pipe( pipe_slow );
20734 %}
20735
20736 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20737 predicate(UseAVX > 0);
20738 match(Set dst (MulVF src1 src2));
20739 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20740 ins_encode %{
20741 int vlen_enc = vector_length_encoding(this);
20742 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743 %}
20744 ins_pipe( pipe_slow );
20745 %}
20746
20747 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20748 predicate((UseAVX > 0) &&
20749 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750 match(Set dst (MulVF src (LoadVector mem)));
20751 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20752 ins_encode %{
20753 int vlen_enc = vector_length_encoding(this);
20754 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755 %}
20756 ins_pipe( pipe_slow );
20757 %}
20758
20759 // Doubles vector mul
20760 instruct vmulD(vec dst, vec src) %{
20761 predicate(UseAVX == 0);
20762 match(Set dst (MulVD dst src));
20763 format %{ "mulpd $dst,$src\t! mul packedD" %}
20764 ins_encode %{
20765 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20766 %}
20767 ins_pipe( pipe_slow );
20768 %}
20769
20770 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20771 predicate(UseAVX > 0);
20772 match(Set dst (MulVD src1 src2));
20773 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20774 ins_encode %{
20775 int vlen_enc = vector_length_encoding(this);
20776 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20777 %}
20778 ins_pipe( pipe_slow );
20779 %}
20780
20781 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20782 predicate((UseAVX > 0) &&
20783 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20784 match(Set dst (MulVD src (LoadVector mem)));
20785 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20786 ins_encode %{
20787 int vlen_enc = vector_length_encoding(this);
20788 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20789 %}
20790 ins_pipe( pipe_slow );
20791 %}
20792
20793 // --------------------------------- DIV --------------------------------------
20794
20795 // Floats vector div
20796 instruct vdivF(vec dst, vec src) %{
20797 predicate(UseAVX == 0);
20798 match(Set dst (DivVF dst src));
20799 format %{ "divps $dst,$src\t! div packedF" %}
20800 ins_encode %{
20801 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20802 %}
20803 ins_pipe( pipe_slow );
20804 %}
20805
20806 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20807 predicate(UseAVX > 0);
20808 match(Set dst (DivVF src1 src2));
20809 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20810 ins_encode %{
20811 int vlen_enc = vector_length_encoding(this);
20812 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20813 %}
20814 ins_pipe( pipe_slow );
20815 %}
20816
20817 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20818 predicate((UseAVX > 0) &&
20819 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20820 match(Set dst (DivVF src (LoadVector mem)));
20821 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20822 ins_encode %{
20823 int vlen_enc = vector_length_encoding(this);
20824 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20825 %}
20826 ins_pipe( pipe_slow );
20827 %}
20828
20829 // Doubles vector div
20830 instruct vdivD(vec dst, vec src) %{
20831 predicate(UseAVX == 0);
20832 match(Set dst (DivVD dst src));
20833 format %{ "divpd $dst,$src\t! div packedD" %}
20834 ins_encode %{
20835 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20836 %}
20837 ins_pipe( pipe_slow );
20838 %}
20839
20840 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20841 predicate(UseAVX > 0);
20842 match(Set dst (DivVD src1 src2));
20843 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20844 ins_encode %{
20845 int vlen_enc = vector_length_encoding(this);
20846 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20847 %}
20848 ins_pipe( pipe_slow );
20849 %}
20850
20851 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20852 predicate((UseAVX > 0) &&
20853 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20854 match(Set dst (DivVD src (LoadVector mem)));
20855 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20856 ins_encode %{
20857 int vlen_enc = vector_length_encoding(this);
20858 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20859 %}
20860 ins_pipe( pipe_slow );
20861 %}
20862
20863 // ------------------------------ MinMax ---------------------------------------
20864
20865 // Byte, Short, Int vector Min/Max
20866 instruct minmax_reg_sse(vec dst, vec src) %{
20867 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20868 UseAVX == 0);
20869 match(Set dst (MinV dst src));
20870 match(Set dst (MaxV dst src));
20871 format %{ "vector_minmax $dst,$src\t! " %}
20872 ins_encode %{
20873 assert(UseSSE >= 4, "required");
20874
20875 int opcode = this->ideal_Opcode();
20876 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20877 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20878 %}
20879 ins_pipe( pipe_slow );
20880 %}
20881
20882 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20883 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20884 UseAVX > 0);
20885 match(Set dst (MinV src1 src2));
20886 match(Set dst (MaxV src1 src2));
20887 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20888 ins_encode %{
20889 int opcode = this->ideal_Opcode();
20890 int vlen_enc = vector_length_encoding(this);
20891 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20892
20893 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20894 %}
20895 ins_pipe( pipe_slow );
20896 %}
20897
20898 // Long vector Min/Max
20899 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20900 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20901 UseAVX == 0);
20902 match(Set dst (MinV dst src));
20903 match(Set dst (MaxV src dst));
20904 effect(TEMP dst, TEMP tmp);
20905 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20906 ins_encode %{
20907 assert(UseSSE >= 4, "required");
20908
20909 int opcode = this->ideal_Opcode();
20910 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20911 assert(elem_bt == T_LONG, "sanity");
20912
20913 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20914 %}
20915 ins_pipe( pipe_slow );
20916 %}
20917
20918 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20919 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20920 UseAVX > 0 && !VM_Version::supports_avx512vl());
20921 match(Set dst (MinV src1 src2));
20922 match(Set dst (MaxV src1 src2));
20923 effect(TEMP dst);
20924 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20925 ins_encode %{
20926 int vlen_enc = vector_length_encoding(this);
20927 int opcode = this->ideal_Opcode();
20928 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20929 assert(elem_bt == T_LONG, "sanity");
20930
20931 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20932 %}
20933 ins_pipe( pipe_slow );
20934 %}
20935
20936 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20937 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20938 Matcher::vector_element_basic_type(n) == T_LONG);
20939 match(Set dst (MinV src1 src2));
20940 match(Set dst (MaxV src1 src2));
20941 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20942 ins_encode %{
20943 assert(UseAVX > 2, "required");
20944
20945 int vlen_enc = vector_length_encoding(this);
20946 int opcode = this->ideal_Opcode();
20947 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20948 assert(elem_bt == T_LONG, "sanity");
20949
20950 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20951 %}
20952 ins_pipe( pipe_slow );
20953 %}
20954
20955 // Float/Double vector Min/Max
20956 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20957 predicate(VM_Version::supports_avx10_2() &&
20958 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20959 match(Set dst (MinV a b));
20960 match(Set dst (MaxV a b));
20961 format %{ "vector_minmaxFP $dst, $a, $b" %}
20962 ins_encode %{
20963 int vlen_enc = vector_length_encoding(this);
20964 int opcode = this->ideal_Opcode();
20965 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20966 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20967 %}
20968 ins_pipe( pipe_slow );
20969 %}
20970
20971 // Float/Double vector Min/Max
20972 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20973 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20974 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20975 UseAVX > 0);
20976 match(Set dst (MinV a b));
20977 match(Set dst (MaxV a b));
20978 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20979 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20980 ins_encode %{
20981 assert(UseAVX > 0, "required");
20982
20983 int opcode = this->ideal_Opcode();
20984 int vlen_enc = vector_length_encoding(this);
20985 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20986
20987 __ vminmax_fp(opcode, elem_bt,
20988 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20989 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20990 %}
20991 ins_pipe( pipe_slow );
20992 %}
20993
20994 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20995 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20996 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20997 match(Set dst (MinV a b));
20998 match(Set dst (MaxV a b));
20999 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21000 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21001 ins_encode %{
21002 assert(UseAVX > 2, "required");
21003
21004 int opcode = this->ideal_Opcode();
21005 int vlen_enc = vector_length_encoding(this);
21006 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007
21008 __ evminmax_fp(opcode, elem_bt,
21009 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21010 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21011 %}
21012 ins_pipe( pipe_slow );
21013 %}
21014
21015 // ------------------------------ Unsigned vector Min/Max ----------------------
21016
21017 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21018 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21019 match(Set dst (UMinV a b));
21020 match(Set dst (UMaxV a b));
21021 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21022 ins_encode %{
21023 int opcode = this->ideal_Opcode();
21024 int vlen_enc = vector_length_encoding(this);
21025 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21026 assert(is_integral_type(elem_bt), "");
21027 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21028 %}
21029 ins_pipe( pipe_slow );
21030 %}
21031
21032 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21033 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21034 match(Set dst (UMinV a (LoadVector b)));
21035 match(Set dst (UMaxV a (LoadVector b)));
21036 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21037 ins_encode %{
21038 int opcode = this->ideal_Opcode();
21039 int vlen_enc = vector_length_encoding(this);
21040 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21041 assert(is_integral_type(elem_bt), "");
21042 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21043 %}
21044 ins_pipe( pipe_slow );
21045 %}
21046
21047 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21048 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21049 match(Set dst (UMinV a b));
21050 match(Set dst (UMaxV a b));
21051 effect(TEMP xtmp1, TEMP xtmp2);
21052 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21053 ins_encode %{
21054 int opcode = this->ideal_Opcode();
21055 int vlen_enc = vector_length_encoding(this);
21056 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21057 %}
21058 ins_pipe( pipe_slow );
21059 %}
21060
21061 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21062 match(Set dst (UMinV (Binary dst src2) mask));
21063 match(Set dst (UMaxV (Binary dst src2) mask));
21064 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21065 ins_encode %{
21066 int vlen_enc = vector_length_encoding(this);
21067 BasicType bt = Matcher::vector_element_basic_type(this);
21068 int opc = this->ideal_Opcode();
21069 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21070 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21071 %}
21072 ins_pipe( pipe_slow );
21073 %}
21074
21075 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21076 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21077 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21078 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21079 ins_encode %{
21080 int vlen_enc = vector_length_encoding(this);
21081 BasicType bt = Matcher::vector_element_basic_type(this);
21082 int opc = this->ideal_Opcode();
21083 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21084 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21085 %}
21086 ins_pipe( pipe_slow );
21087 %}
21088
21089 // --------------------------------- Signum/CopySign ---------------------------
21090
21091 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21092 match(Set dst (SignumF dst (Binary zero one)));
21093 effect(KILL cr);
21094 format %{ "signumF $dst, $dst" %}
21095 ins_encode %{
21096 int opcode = this->ideal_Opcode();
21097 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21098 %}
21099 ins_pipe( pipe_slow );
21100 %}
21101
21102 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21103 match(Set dst (SignumD dst (Binary zero one)));
21104 effect(KILL cr);
21105 format %{ "signumD $dst, $dst" %}
21106 ins_encode %{
21107 int opcode = this->ideal_Opcode();
21108 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21109 %}
21110 ins_pipe( pipe_slow );
21111 %}
21112
21113 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21114 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21115 match(Set dst (SignumVF src (Binary zero one)));
21116 match(Set dst (SignumVD src (Binary zero one)));
21117 effect(TEMP dst, TEMP xtmp1);
21118 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21119 ins_encode %{
21120 int opcode = this->ideal_Opcode();
21121 int vec_enc = vector_length_encoding(this);
21122 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21123 $xtmp1$$XMMRegister, vec_enc);
21124 %}
21125 ins_pipe( pipe_slow );
21126 %}
21127
21128 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21129 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21130 match(Set dst (SignumVF src (Binary zero one)));
21131 match(Set dst (SignumVD src (Binary zero one)));
21132 effect(TEMP dst, TEMP ktmp1);
21133 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21134 ins_encode %{
21135 int opcode = this->ideal_Opcode();
21136 int vec_enc = vector_length_encoding(this);
21137 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21138 $ktmp1$$KRegister, vec_enc);
21139 %}
21140 ins_pipe( pipe_slow );
21141 %}
21142
21143 // ---------------------------------------
21144 // For copySign use 0xE4 as writemask for vpternlog
21145 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21146 // C (xmm2) is set to 0x7FFFFFFF
21147 // Wherever xmm2 is 0, we want to pick from B (sign)
21148 // Wherever xmm2 is 1, we want to pick from A (src)
21149 //
21150 // A B C Result
21151 // 0 0 0 0
21152 // 0 0 1 0
21153 // 0 1 0 1
21154 // 0 1 1 0
21155 // 1 0 0 0
21156 // 1 0 1 1
21157 // 1 1 0 1
21158 // 1 1 1 1
21159 //
21160 // Result going from high bit to low bit is 0x11100100 = 0xe4
21161 // ---------------------------------------
21162
21163 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21164 match(Set dst (CopySignF dst src));
21165 effect(TEMP tmp1, TEMP tmp2);
21166 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21167 ins_encode %{
21168 __ movl($tmp2$$Register, 0x7FFFFFFF);
21169 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21170 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21171 %}
21172 ins_pipe( pipe_slow );
21173 %}
21174
21175 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21176 match(Set dst (CopySignD dst (Binary src zero)));
21177 ins_cost(100);
21178 effect(TEMP tmp1, TEMP tmp2);
21179 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21180 ins_encode %{
21181 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21182 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21183 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21184 %}
21185 ins_pipe( pipe_slow );
21186 %}
21187
21188 //----------------------------- CompressBits/ExpandBits ------------------------
21189
21190 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21191 predicate(n->bottom_type()->isa_int());
21192 match(Set dst (CompressBits src mask));
21193 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21194 ins_encode %{
21195 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21196 %}
21197 ins_pipe( pipe_slow );
21198 %}
21199
21200 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21201 predicate(n->bottom_type()->isa_int());
21202 match(Set dst (ExpandBits src mask));
21203 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21204 ins_encode %{
21205 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21206 %}
21207 ins_pipe( pipe_slow );
21208 %}
21209
21210 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21211 predicate(n->bottom_type()->isa_int());
21212 match(Set dst (CompressBits src (LoadI mask)));
21213 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21214 ins_encode %{
21215 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21216 %}
21217 ins_pipe( pipe_slow );
21218 %}
21219
21220 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21221 predicate(n->bottom_type()->isa_int());
21222 match(Set dst (ExpandBits src (LoadI mask)));
21223 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21224 ins_encode %{
21225 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21226 %}
21227 ins_pipe( pipe_slow );
21228 %}
21229
21230 // --------------------------------- Sqrt --------------------------------------
21231
21232 instruct vsqrtF_reg(vec dst, vec src) %{
21233 match(Set dst (SqrtVF src));
21234 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21235 ins_encode %{
21236 assert(UseAVX > 0, "required");
21237 int vlen_enc = vector_length_encoding(this);
21238 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21239 %}
21240 ins_pipe( pipe_slow );
21241 %}
21242
21243 instruct vsqrtF_mem(vec dst, memory mem) %{
21244 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21245 match(Set dst (SqrtVF (LoadVector mem)));
21246 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21247 ins_encode %{
21248 assert(UseAVX > 0, "required");
21249 int vlen_enc = vector_length_encoding(this);
21250 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21251 %}
21252 ins_pipe( pipe_slow );
21253 %}
21254
21255 // Floating point vector sqrt
21256 instruct vsqrtD_reg(vec dst, vec src) %{
21257 match(Set dst (SqrtVD src));
21258 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21259 ins_encode %{
21260 assert(UseAVX > 0, "required");
21261 int vlen_enc = vector_length_encoding(this);
21262 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21263 %}
21264 ins_pipe( pipe_slow );
21265 %}
21266
21267 instruct vsqrtD_mem(vec dst, memory mem) %{
21268 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21269 match(Set dst (SqrtVD (LoadVector mem)));
21270 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21271 ins_encode %{
21272 assert(UseAVX > 0, "required");
21273 int vlen_enc = vector_length_encoding(this);
21274 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21275 %}
21276 ins_pipe( pipe_slow );
21277 %}
21278
21279 // ------------------------------ Shift ---------------------------------------
21280
21281 // Left and right shift count vectors are the same on x86
21282 // (only lowest bits of xmm reg are used for count).
21283 instruct vshiftcnt(vec dst, rRegI cnt) %{
21284 match(Set dst (LShiftCntV cnt));
21285 match(Set dst (RShiftCntV cnt));
21286 format %{ "movdl $dst,$cnt\t! load shift count" %}
21287 ins_encode %{
21288 __ movdl($dst$$XMMRegister, $cnt$$Register);
21289 %}
21290 ins_pipe( pipe_slow );
21291 %}
21292
21293 // Byte vector shift
21294 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21295 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21296 match(Set dst ( LShiftVB src shift));
21297 match(Set dst ( RShiftVB src shift));
21298 match(Set dst (URShiftVB src shift));
21299 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21300 format %{"vector_byte_shift $dst,$src,$shift" %}
21301 ins_encode %{
21302 assert(UseSSE > 3, "required");
21303 int opcode = this->ideal_Opcode();
21304 bool sign = (opcode != Op_URShiftVB);
21305 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21306 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21307 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21308 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21309 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21310 %}
21311 ins_pipe( pipe_slow );
21312 %}
21313
21314 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21315 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21316 UseAVX <= 1);
21317 match(Set dst ( LShiftVB src shift));
21318 match(Set dst ( RShiftVB src shift));
21319 match(Set dst (URShiftVB src shift));
21320 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21321 format %{"vector_byte_shift $dst,$src,$shift" %}
21322 ins_encode %{
21323 assert(UseSSE > 3, "required");
21324 int opcode = this->ideal_Opcode();
21325 bool sign = (opcode != Op_URShiftVB);
21326 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21327 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21328 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21329 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21330 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21331 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21332 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21333 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21334 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21335 %}
21336 ins_pipe( pipe_slow );
21337 %}
21338
21339 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21340 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21341 UseAVX > 1);
21342 match(Set dst ( LShiftVB src shift));
21343 match(Set dst ( RShiftVB src shift));
21344 match(Set dst (URShiftVB src shift));
21345 effect(TEMP dst, TEMP tmp);
21346 format %{"vector_byte_shift $dst,$src,$shift" %}
21347 ins_encode %{
21348 int opcode = this->ideal_Opcode();
21349 bool sign = (opcode != Op_URShiftVB);
21350 int vlen_enc = Assembler::AVX_256bit;
21351 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21352 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21353 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21354 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21355 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21356 %}
21357 ins_pipe( pipe_slow );
21358 %}
21359
21360 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21361 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21362 match(Set dst ( LShiftVB src shift));
21363 match(Set dst ( RShiftVB src shift));
21364 match(Set dst (URShiftVB src shift));
21365 effect(TEMP dst, TEMP tmp);
21366 format %{"vector_byte_shift $dst,$src,$shift" %}
21367 ins_encode %{
21368 assert(UseAVX > 1, "required");
21369 int opcode = this->ideal_Opcode();
21370 bool sign = (opcode != Op_URShiftVB);
21371 int vlen_enc = Assembler::AVX_256bit;
21372 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21373 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21374 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21375 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21377 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21378 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21379 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21381 %}
21382 ins_pipe( pipe_slow );
21383 %}
21384
21385 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21386 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21387 match(Set dst ( LShiftVB src shift));
21388 match(Set dst (RShiftVB src shift));
21389 match(Set dst (URShiftVB src shift));
21390 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21391 format %{"vector_byte_shift $dst,$src,$shift" %}
21392 ins_encode %{
21393 assert(UseAVX > 2, "required");
21394 int opcode = this->ideal_Opcode();
21395 bool sign = (opcode != Op_URShiftVB);
21396 int vlen_enc = Assembler::AVX_512bit;
21397 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21398 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21399 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21400 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21401 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21402 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21403 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21404 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21406 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21407 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21408 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21409 %}
21410 ins_pipe( pipe_slow );
21411 %}
21412
21413 // Shorts vector logical right shift produces incorrect Java result
21414 // for negative data because java code convert short value into int with
21415 // sign extension before a shift. But char vectors are fine since chars are
21416 // unsigned values.
21417 // Shorts/Chars vector left shift
21418 instruct vshiftS(vec dst, vec src, vec shift) %{
21419 predicate(!n->as_ShiftV()->is_var_shift());
21420 match(Set dst ( LShiftVS src shift));
21421 match(Set dst ( RShiftVS src shift));
21422 match(Set dst (URShiftVS src shift));
21423 effect(TEMP dst, USE src, USE shift);
21424 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21425 ins_encode %{
21426 int opcode = this->ideal_Opcode();
21427 if (UseAVX > 0) {
21428 int vlen_enc = vector_length_encoding(this);
21429 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21430 } else {
21431 int vlen = Matcher::vector_length(this);
21432 if (vlen == 2) {
21433 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21434 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435 } else if (vlen == 4) {
21436 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21437 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21438 } else {
21439 assert (vlen == 8, "sanity");
21440 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21441 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21442 }
21443 }
21444 %}
21445 ins_pipe( pipe_slow );
21446 %}
21447
21448 // Integers vector left shift
21449 instruct vshiftI(vec dst, vec src, vec shift) %{
21450 predicate(!n->as_ShiftV()->is_var_shift());
21451 match(Set dst ( LShiftVI src shift));
21452 match(Set dst ( RShiftVI src shift));
21453 match(Set dst (URShiftVI src shift));
21454 effect(TEMP dst, USE src, USE shift);
21455 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21456 ins_encode %{
21457 int opcode = this->ideal_Opcode();
21458 if (UseAVX > 0) {
21459 int vlen_enc = vector_length_encoding(this);
21460 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21461 } else {
21462 int vlen = Matcher::vector_length(this);
21463 if (vlen == 2) {
21464 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21465 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21466 } else {
21467 assert(vlen == 4, "sanity");
21468 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21469 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21470 }
21471 }
21472 %}
21473 ins_pipe( pipe_slow );
21474 %}
21475
21476 // Integers vector left constant shift
21477 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21478 match(Set dst (LShiftVI src (LShiftCntV shift)));
21479 match(Set dst (RShiftVI src (RShiftCntV shift)));
21480 match(Set dst (URShiftVI src (RShiftCntV shift)));
21481 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21482 ins_encode %{
21483 int opcode = this->ideal_Opcode();
21484 if (UseAVX > 0) {
21485 int vector_len = vector_length_encoding(this);
21486 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21487 } else {
21488 int vlen = Matcher::vector_length(this);
21489 if (vlen == 2) {
21490 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21491 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21492 } else {
21493 assert(vlen == 4, "sanity");
21494 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21495 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21496 }
21497 }
21498 %}
21499 ins_pipe( pipe_slow );
21500 %}
21501
21502 // Longs vector shift
21503 instruct vshiftL(vec dst, vec src, vec shift) %{
21504 predicate(!n->as_ShiftV()->is_var_shift());
21505 match(Set dst ( LShiftVL src shift));
21506 match(Set dst (URShiftVL src shift));
21507 effect(TEMP dst, USE src, USE shift);
21508 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21509 ins_encode %{
21510 int opcode = this->ideal_Opcode();
21511 if (UseAVX > 0) {
21512 int vlen_enc = vector_length_encoding(this);
21513 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21514 } else {
21515 assert(Matcher::vector_length(this) == 2, "");
21516 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21517 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21518 }
21519 %}
21520 ins_pipe( pipe_slow );
21521 %}
21522
21523 // Longs vector constant shift
21524 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21525 match(Set dst (LShiftVL src (LShiftCntV shift)));
21526 match(Set dst (URShiftVL src (RShiftCntV shift)));
21527 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21528 ins_encode %{
21529 int opcode = this->ideal_Opcode();
21530 if (UseAVX > 0) {
21531 int vector_len = vector_length_encoding(this);
21532 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21533 } else {
21534 assert(Matcher::vector_length(this) == 2, "");
21535 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21536 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21537 }
21538 %}
21539 ins_pipe( pipe_slow );
21540 %}
21541
21542 // -------------------ArithmeticRightShift -----------------------------------
21543 // Long vector arithmetic right shift
21544 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21545 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21546 match(Set dst (RShiftVL src shift));
21547 effect(TEMP dst, TEMP tmp);
21548 format %{ "vshiftq $dst,$src,$shift" %}
21549 ins_encode %{
21550 uint vlen = Matcher::vector_length(this);
21551 if (vlen == 2) {
21552 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21553 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21554 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21555 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21556 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21557 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21558 } else {
21559 assert(vlen == 4, "sanity");
21560 assert(UseAVX > 1, "required");
21561 int vlen_enc = Assembler::AVX_256bit;
21562 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21563 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21564 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21565 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21566 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21567 }
21568 %}
21569 ins_pipe( pipe_slow );
21570 %}
21571
21572 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21573 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21574 match(Set dst (RShiftVL src shift));
21575 format %{ "vshiftq $dst,$src,$shift" %}
21576 ins_encode %{
21577 int vlen_enc = vector_length_encoding(this);
21578 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21579 %}
21580 ins_pipe( pipe_slow );
21581 %}
21582
21583 // ------------------- Variable Shift -----------------------------
21584 // Byte variable shift
21585 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21586 predicate(Matcher::vector_length(n) <= 8 &&
21587 n->as_ShiftV()->is_var_shift() &&
21588 !VM_Version::supports_avx512bw());
21589 match(Set dst ( LShiftVB src shift));
21590 match(Set dst ( RShiftVB src shift));
21591 match(Set dst (URShiftVB src shift));
21592 effect(TEMP dst, TEMP vtmp);
21593 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21594 ins_encode %{
21595 assert(UseAVX >= 2, "required");
21596
21597 int opcode = this->ideal_Opcode();
21598 int vlen_enc = Assembler::AVX_128bit;
21599 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21600 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21601 %}
21602 ins_pipe( pipe_slow );
21603 %}
21604
21605 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21606 predicate(Matcher::vector_length(n) == 16 &&
21607 n->as_ShiftV()->is_var_shift() &&
21608 !VM_Version::supports_avx512bw());
21609 match(Set dst ( LShiftVB src shift));
21610 match(Set dst ( RShiftVB src shift));
21611 match(Set dst (URShiftVB src shift));
21612 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21613 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21614 ins_encode %{
21615 assert(UseAVX >= 2, "required");
21616
21617 int opcode = this->ideal_Opcode();
21618 int vlen_enc = Assembler::AVX_128bit;
21619 // Shift lower half and get word result in dst
21620 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21621
21622 // Shift upper half and get word result in vtmp1
21623 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21624 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21625 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21626
21627 // Merge and down convert the two word results to byte in dst
21628 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21629 %}
21630 ins_pipe( pipe_slow );
21631 %}
21632
21633 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21634 predicate(Matcher::vector_length(n) == 32 &&
21635 n->as_ShiftV()->is_var_shift() &&
21636 !VM_Version::supports_avx512bw());
21637 match(Set dst ( LShiftVB src shift));
21638 match(Set dst ( RShiftVB src shift));
21639 match(Set dst (URShiftVB src shift));
21640 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21641 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21642 ins_encode %{
21643 assert(UseAVX >= 2, "required");
21644
21645 int opcode = this->ideal_Opcode();
21646 int vlen_enc = Assembler::AVX_128bit;
21647 // Process lower 128 bits and get result in dst
21648 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21649 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21650 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21651 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21652 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21653
21654 // Process higher 128 bits and get result in vtmp3
21655 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21656 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21657 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21658 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21659 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21660 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21661 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21662
21663 // Merge the two results in dst
21664 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21665 %}
21666 ins_pipe( pipe_slow );
21667 %}
21668
21669 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21670 predicate(Matcher::vector_length(n) <= 32 &&
21671 n->as_ShiftV()->is_var_shift() &&
21672 VM_Version::supports_avx512bw());
21673 match(Set dst ( LShiftVB src shift));
21674 match(Set dst ( RShiftVB src shift));
21675 match(Set dst (URShiftVB src shift));
21676 effect(TEMP dst, TEMP vtmp);
21677 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21678 ins_encode %{
21679 assert(UseAVX > 2, "required");
21680
21681 int opcode = this->ideal_Opcode();
21682 int vlen_enc = vector_length_encoding(this);
21683 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21684 %}
21685 ins_pipe( pipe_slow );
21686 %}
21687
21688 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21689 predicate(Matcher::vector_length(n) == 64 &&
21690 n->as_ShiftV()->is_var_shift() &&
21691 VM_Version::supports_avx512bw());
21692 match(Set dst ( LShiftVB src shift));
21693 match(Set dst ( RShiftVB src shift));
21694 match(Set dst (URShiftVB src shift));
21695 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21696 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21697 ins_encode %{
21698 assert(UseAVX > 2, "required");
21699
21700 int opcode = this->ideal_Opcode();
21701 int vlen_enc = Assembler::AVX_256bit;
21702 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21703 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21704 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21705 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21706 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21707 %}
21708 ins_pipe( pipe_slow );
21709 %}
21710
21711 // Short variable shift
21712 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21713 predicate(Matcher::vector_length(n) <= 8 &&
21714 n->as_ShiftV()->is_var_shift() &&
21715 !VM_Version::supports_avx512bw());
21716 match(Set dst ( LShiftVS src shift));
21717 match(Set dst ( RShiftVS src shift));
21718 match(Set dst (URShiftVS src shift));
21719 effect(TEMP dst, TEMP vtmp);
21720 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21721 ins_encode %{
21722 assert(UseAVX >= 2, "required");
21723
21724 int opcode = this->ideal_Opcode();
21725 bool sign = (opcode != Op_URShiftVS);
21726 int vlen_enc = Assembler::AVX_256bit;
21727 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21728 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21729 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21730 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21731 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21732 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21733 %}
21734 ins_pipe( pipe_slow );
21735 %}
21736
21737 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21738 predicate(Matcher::vector_length(n) == 16 &&
21739 n->as_ShiftV()->is_var_shift() &&
21740 !VM_Version::supports_avx512bw());
21741 match(Set dst ( LShiftVS src shift));
21742 match(Set dst ( RShiftVS src shift));
21743 match(Set dst (URShiftVS src shift));
21744 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21745 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21746 ins_encode %{
21747 assert(UseAVX >= 2, "required");
21748
21749 int opcode = this->ideal_Opcode();
21750 bool sign = (opcode != Op_URShiftVS);
21751 int vlen_enc = Assembler::AVX_256bit;
21752 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21753 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21754 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21755 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21756 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21757
21758 // Shift upper half, with result in dst using vtmp1 as TEMP
21759 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21760 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21761 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21762 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21763 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21764 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21765
21766 // Merge lower and upper half result into dst
21767 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21768 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21769 %}
21770 ins_pipe( pipe_slow );
21771 %}
21772
21773 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21774 predicate(n->as_ShiftV()->is_var_shift() &&
21775 VM_Version::supports_avx512bw());
21776 match(Set dst ( LShiftVS src shift));
21777 match(Set dst ( RShiftVS src shift));
21778 match(Set dst (URShiftVS src shift));
21779 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21780 ins_encode %{
21781 assert(UseAVX > 2, "required");
21782
21783 int opcode = this->ideal_Opcode();
21784 int vlen_enc = vector_length_encoding(this);
21785 if (!VM_Version::supports_avx512vl()) {
21786 vlen_enc = Assembler::AVX_512bit;
21787 }
21788 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21789 %}
21790 ins_pipe( pipe_slow );
21791 %}
21792
21793 //Integer variable shift
21794 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21795 predicate(n->as_ShiftV()->is_var_shift());
21796 match(Set dst ( LShiftVI src shift));
21797 match(Set dst ( RShiftVI src shift));
21798 match(Set dst (URShiftVI src shift));
21799 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21800 ins_encode %{
21801 assert(UseAVX >= 2, "required");
21802
21803 int opcode = this->ideal_Opcode();
21804 int vlen_enc = vector_length_encoding(this);
21805 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21806 %}
21807 ins_pipe( pipe_slow );
21808 %}
21809
21810 //Long variable shift
21811 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21812 predicate(n->as_ShiftV()->is_var_shift());
21813 match(Set dst ( LShiftVL src shift));
21814 match(Set dst (URShiftVL src shift));
21815 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21816 ins_encode %{
21817 assert(UseAVX >= 2, "required");
21818
21819 int opcode = this->ideal_Opcode();
21820 int vlen_enc = vector_length_encoding(this);
21821 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21822 %}
21823 ins_pipe( pipe_slow );
21824 %}
21825
21826 //Long variable right shift arithmetic
21827 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21828 predicate(Matcher::vector_length(n) <= 4 &&
21829 n->as_ShiftV()->is_var_shift() &&
21830 UseAVX == 2);
21831 match(Set dst (RShiftVL src shift));
21832 effect(TEMP dst, TEMP vtmp);
21833 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21834 ins_encode %{
21835 int opcode = this->ideal_Opcode();
21836 int vlen_enc = vector_length_encoding(this);
21837 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21838 $vtmp$$XMMRegister);
21839 %}
21840 ins_pipe( pipe_slow );
21841 %}
21842
21843 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21844 predicate(n->as_ShiftV()->is_var_shift() &&
21845 UseAVX > 2);
21846 match(Set dst (RShiftVL src shift));
21847 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21848 ins_encode %{
21849 int opcode = this->ideal_Opcode();
21850 int vlen_enc = vector_length_encoding(this);
21851 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21852 %}
21853 ins_pipe( pipe_slow );
21854 %}
21855
21856 // --------------------------------- AND --------------------------------------
21857
21858 instruct vand(vec dst, vec src) %{
21859 predicate(UseAVX == 0);
21860 match(Set dst (AndV dst src));
21861 format %{ "pand $dst,$src\t! and vectors" %}
21862 ins_encode %{
21863 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21864 %}
21865 ins_pipe( pipe_slow );
21866 %}
21867
21868 instruct vand_reg(vec dst, vec src1, vec src2) %{
21869 predicate(UseAVX > 0);
21870 match(Set dst (AndV src1 src2));
21871 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21872 ins_encode %{
21873 int vlen_enc = vector_length_encoding(this);
21874 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21875 %}
21876 ins_pipe( pipe_slow );
21877 %}
21878
21879 instruct vand_mem(vec dst, vec src, memory mem) %{
21880 predicate((UseAVX > 0) &&
21881 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21882 match(Set dst (AndV src (LoadVector mem)));
21883 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21884 ins_encode %{
21885 int vlen_enc = vector_length_encoding(this);
21886 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21887 %}
21888 ins_pipe( pipe_slow );
21889 %}
21890
21891 // --------------------------------- OR ---------------------------------------
21892
21893 instruct vor(vec dst, vec src) %{
21894 predicate(UseAVX == 0);
21895 match(Set dst (OrV dst src));
21896 format %{ "por $dst,$src\t! or vectors" %}
21897 ins_encode %{
21898 __ por($dst$$XMMRegister, $src$$XMMRegister);
21899 %}
21900 ins_pipe( pipe_slow );
21901 %}
21902
21903 instruct vor_reg(vec dst, vec src1, vec src2) %{
21904 predicate(UseAVX > 0);
21905 match(Set dst (OrV src1 src2));
21906 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21907 ins_encode %{
21908 int vlen_enc = vector_length_encoding(this);
21909 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21910 %}
21911 ins_pipe( pipe_slow );
21912 %}
21913
21914 instruct vor_mem(vec dst, vec src, memory mem) %{
21915 predicate((UseAVX > 0) &&
21916 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21917 match(Set dst (OrV src (LoadVector mem)));
21918 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21919 ins_encode %{
21920 int vlen_enc = vector_length_encoding(this);
21921 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21922 %}
21923 ins_pipe( pipe_slow );
21924 %}
21925
21926 // --------------------------------- XOR --------------------------------------
21927
21928 instruct vxor(vec dst, vec src) %{
21929 predicate(UseAVX == 0);
21930 match(Set dst (XorV dst src));
21931 format %{ "pxor $dst,$src\t! xor vectors" %}
21932 ins_encode %{
21933 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21934 %}
21935 ins_pipe( pipe_slow );
21936 %}
21937
21938 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21939 predicate(UseAVX > 0);
21940 match(Set dst (XorV src1 src2));
21941 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21942 ins_encode %{
21943 int vlen_enc = vector_length_encoding(this);
21944 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21945 %}
21946 ins_pipe( pipe_slow );
21947 %}
21948
21949 instruct vxor_mem(vec dst, vec src, memory mem) %{
21950 predicate((UseAVX > 0) &&
21951 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21952 match(Set dst (XorV src (LoadVector mem)));
21953 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21954 ins_encode %{
21955 int vlen_enc = vector_length_encoding(this);
21956 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21957 %}
21958 ins_pipe( pipe_slow );
21959 %}
21960
21961 // --------------------------------- VectorCast --------------------------------------
21962
21963 instruct vcastBtoX(vec dst, vec src) %{
21964 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21965 match(Set dst (VectorCastB2X src));
21966 format %{ "vector_cast_b2x $dst,$src\t!" %}
21967 ins_encode %{
21968 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21969 int vlen_enc = vector_length_encoding(this);
21970 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21971 %}
21972 ins_pipe( pipe_slow );
21973 %}
21974
21975 instruct vcastBtoD(legVec dst, legVec src) %{
21976 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21977 match(Set dst (VectorCastB2X src));
21978 format %{ "vector_cast_b2x $dst,$src\t!" %}
21979 ins_encode %{
21980 int vlen_enc = vector_length_encoding(this);
21981 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21982 %}
21983 ins_pipe( pipe_slow );
21984 %}
21985
21986 instruct castStoX(vec dst, vec src) %{
21987 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21988 Matcher::vector_length(n->in(1)) <= 8 && // src
21989 Matcher::vector_element_basic_type(n) == T_BYTE);
21990 match(Set dst (VectorCastS2X src));
21991 format %{ "vector_cast_s2x $dst,$src" %}
21992 ins_encode %{
21993 assert(UseAVX > 0, "required");
21994
21995 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21996 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21997 %}
21998 ins_pipe( pipe_slow );
21999 %}
22000
22001 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22002 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22003 Matcher::vector_length(n->in(1)) == 16 && // src
22004 Matcher::vector_element_basic_type(n) == T_BYTE);
22005 effect(TEMP dst, TEMP vtmp);
22006 match(Set dst (VectorCastS2X src));
22007 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22008 ins_encode %{
22009 assert(UseAVX > 0, "required");
22010
22011 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22012 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22013 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22014 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22015 %}
22016 ins_pipe( pipe_slow );
22017 %}
22018
22019 instruct vcastStoX_evex(vec dst, vec src) %{
22020 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22021 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22022 match(Set dst (VectorCastS2X src));
22023 format %{ "vector_cast_s2x $dst,$src\t!" %}
22024 ins_encode %{
22025 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22026 int src_vlen_enc = vector_length_encoding(this, $src);
22027 int vlen_enc = vector_length_encoding(this);
22028 switch (to_elem_bt) {
22029 case T_BYTE:
22030 if (!VM_Version::supports_avx512vl()) {
22031 vlen_enc = Assembler::AVX_512bit;
22032 }
22033 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22034 break;
22035 case T_INT:
22036 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037 break;
22038 case T_FLOAT:
22039 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22040 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22041 break;
22042 case T_LONG:
22043 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22044 break;
22045 case T_DOUBLE: {
22046 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22047 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22048 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22049 break;
22050 }
22051 default:
22052 ShouldNotReachHere();
22053 }
22054 %}
22055 ins_pipe( pipe_slow );
22056 %}
22057
22058 instruct castItoX(vec dst, vec src) %{
22059 predicate(UseAVX <= 2 &&
22060 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22061 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22062 match(Set dst (VectorCastI2X src));
22063 format %{ "vector_cast_i2x $dst,$src" %}
22064 ins_encode %{
22065 assert(UseAVX > 0, "required");
22066
22067 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22068 int vlen_enc = vector_length_encoding(this, $src);
22069
22070 if (to_elem_bt == T_BYTE) {
22071 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22072 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22073 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22074 } else {
22075 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22076 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22077 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22078 }
22079 %}
22080 ins_pipe( pipe_slow );
22081 %}
22082
22083 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22084 predicate(UseAVX <= 2 &&
22085 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22086 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22087 match(Set dst (VectorCastI2X src));
22088 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22089 effect(TEMP dst, TEMP vtmp);
22090 ins_encode %{
22091 assert(UseAVX > 0, "required");
22092
22093 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22094 int vlen_enc = vector_length_encoding(this, $src);
22095
22096 if (to_elem_bt == T_BYTE) {
22097 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22098 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22099 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22100 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22101 } else {
22102 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22103 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22104 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22105 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22106 }
22107 %}
22108 ins_pipe( pipe_slow );
22109 %}
22110
22111 instruct vcastItoX_evex(vec dst, vec src) %{
22112 predicate(UseAVX > 2 ||
22113 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22114 match(Set dst (VectorCastI2X src));
22115 format %{ "vector_cast_i2x $dst,$src\t!" %}
22116 ins_encode %{
22117 assert(UseAVX > 0, "required");
22118
22119 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22120 int src_vlen_enc = vector_length_encoding(this, $src);
22121 int dst_vlen_enc = vector_length_encoding(this);
22122 switch (dst_elem_bt) {
22123 case T_BYTE:
22124 if (!VM_Version::supports_avx512vl()) {
22125 src_vlen_enc = Assembler::AVX_512bit;
22126 }
22127 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22128 break;
22129 case T_SHORT:
22130 if (!VM_Version::supports_avx512vl()) {
22131 src_vlen_enc = Assembler::AVX_512bit;
22132 }
22133 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22134 break;
22135 case T_FLOAT:
22136 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22137 break;
22138 case T_LONG:
22139 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22140 break;
22141 case T_DOUBLE:
22142 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22143 break;
22144 default:
22145 ShouldNotReachHere();
22146 }
22147 %}
22148 ins_pipe( pipe_slow );
22149 %}
22150
22151 instruct vcastLtoBS(vec dst, vec src) %{
22152 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22153 UseAVX <= 2);
22154 match(Set dst (VectorCastL2X src));
22155 format %{ "vector_cast_l2x $dst,$src" %}
22156 ins_encode %{
22157 assert(UseAVX > 0, "required");
22158
22159 int vlen = Matcher::vector_length_in_bytes(this, $src);
22160 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22161 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22162 : ExternalAddress(vector_int_to_short_mask());
22163 if (vlen <= 16) {
22164 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22165 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22166 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22167 } else {
22168 assert(vlen <= 32, "required");
22169 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22170 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22171 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22172 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22173 }
22174 if (to_elem_bt == T_BYTE) {
22175 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22176 }
22177 %}
22178 ins_pipe( pipe_slow );
22179 %}
22180
22181 instruct vcastLtoX_evex(vec dst, vec src) %{
22182 predicate(UseAVX > 2 ||
22183 (Matcher::vector_element_basic_type(n) == T_INT ||
22184 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22185 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22186 match(Set dst (VectorCastL2X src));
22187 format %{ "vector_cast_l2x $dst,$src\t!" %}
22188 ins_encode %{
22189 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22190 int vlen = Matcher::vector_length_in_bytes(this, $src);
22191 int vlen_enc = vector_length_encoding(this, $src);
22192 switch (to_elem_bt) {
22193 case T_BYTE:
22194 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22195 vlen_enc = Assembler::AVX_512bit;
22196 }
22197 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22198 break;
22199 case T_SHORT:
22200 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22201 vlen_enc = Assembler::AVX_512bit;
22202 }
22203 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22204 break;
22205 case T_INT:
22206 if (vlen == 8) {
22207 if ($dst$$XMMRegister != $src$$XMMRegister) {
22208 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22209 }
22210 } else if (vlen == 16) {
22211 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22212 } else if (vlen == 32) {
22213 if (UseAVX > 2) {
22214 if (!VM_Version::supports_avx512vl()) {
22215 vlen_enc = Assembler::AVX_512bit;
22216 }
22217 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22218 } else {
22219 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22220 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22221 }
22222 } else { // vlen == 64
22223 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22224 }
22225 break;
22226 case T_FLOAT:
22227 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22228 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22229 break;
22230 case T_DOUBLE:
22231 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22232 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22233 break;
22234
22235 default: assert(false, "%s", type2name(to_elem_bt));
22236 }
22237 %}
22238 ins_pipe( pipe_slow );
22239 %}
22240
22241 instruct vcastFtoD_reg(vec dst, vec src) %{
22242 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22243 match(Set dst (VectorCastF2X src));
22244 format %{ "vector_cast_f2d $dst,$src\t!" %}
22245 ins_encode %{
22246 int vlen_enc = vector_length_encoding(this);
22247 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22248 %}
22249 ins_pipe( pipe_slow );
22250 %}
22251
22252
22253 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22254 predicate(!VM_Version::supports_avx10_2() &&
22255 !VM_Version::supports_avx512vl() &&
22256 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22257 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22258 is_integral_type(Matcher::vector_element_basic_type(n)));
22259 match(Set dst (VectorCastF2X src));
22260 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22261 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22262 ins_encode %{
22263 int vlen_enc = vector_length_encoding(this, $src);
22264 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22265 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22266 // 32 bit addresses for register indirect addressing mode since stub constants
22267 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22268 // However, targets are free to increase this limit, but having a large code cache size
22269 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22270 // cap we save a temporary register allocation which in limiting case can prevent
22271 // spilling in high register pressure blocks.
22272 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22273 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22274 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22275 %}
22276 ins_pipe( pipe_slow );
22277 %}
22278
22279 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22280 predicate(!VM_Version::supports_avx10_2() &&
22281 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22282 is_integral_type(Matcher::vector_element_basic_type(n)));
22283 match(Set dst (VectorCastF2X src));
22284 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22285 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22286 ins_encode %{
22287 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22288 if (to_elem_bt == T_LONG) {
22289 int vlen_enc = vector_length_encoding(this);
22290 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22291 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22292 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22293 } else {
22294 int vlen_enc = vector_length_encoding(this, $src);
22295 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22296 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22297 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22298 }
22299 %}
22300 ins_pipe( pipe_slow );
22301 %}
22302
22303 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22304 predicate(VM_Version::supports_avx10_2() &&
22305 is_integral_type(Matcher::vector_element_basic_type(n)));
22306 match(Set dst (VectorCastF2X src));
22307 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22308 ins_encode %{
22309 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22310 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22311 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22312 %}
22313 ins_pipe( pipe_slow );
22314 %}
22315
22316 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22317 predicate(VM_Version::supports_avx10_2() &&
22318 is_integral_type(Matcher::vector_element_basic_type(n)));
22319 match(Set dst (VectorCastF2X (LoadVector src)));
22320 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22321 ins_encode %{
22322 int vlen = Matcher::vector_length(this);
22323 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22324 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22325 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22326 %}
22327 ins_pipe( pipe_slow );
22328 %}
22329
22330 instruct vcastDtoF_reg(vec dst, vec src) %{
22331 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22332 match(Set dst (VectorCastD2X src));
22333 format %{ "vector_cast_d2x $dst,$src\t!" %}
22334 ins_encode %{
22335 int vlen_enc = vector_length_encoding(this, $src);
22336 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22337 %}
22338 ins_pipe( pipe_slow );
22339 %}
22340
22341 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22342 predicate(!VM_Version::supports_avx10_2() &&
22343 !VM_Version::supports_avx512vl() &&
22344 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22345 is_integral_type(Matcher::vector_element_basic_type(n)));
22346 match(Set dst (VectorCastD2X src));
22347 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22348 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22349 ins_encode %{
22350 int vlen_enc = vector_length_encoding(this, $src);
22351 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22352 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22353 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22354 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22355 %}
22356 ins_pipe( pipe_slow );
22357 %}
22358
22359 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22360 predicate(!VM_Version::supports_avx10_2() &&
22361 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22362 is_integral_type(Matcher::vector_element_basic_type(n)));
22363 match(Set dst (VectorCastD2X src));
22364 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22365 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22366 ins_encode %{
22367 int vlen_enc = vector_length_encoding(this, $src);
22368 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22369 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22370 ExternalAddress(vector_float_signflip());
22371 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22372 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22373 %}
22374 ins_pipe( pipe_slow );
22375 %}
22376
22377 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22378 predicate(VM_Version::supports_avx10_2() &&
22379 is_integral_type(Matcher::vector_element_basic_type(n)));
22380 match(Set dst (VectorCastD2X src));
22381 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22382 ins_encode %{
22383 int vlen_enc = vector_length_encoding(this, $src);
22384 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22385 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22386 %}
22387 ins_pipe( pipe_slow );
22388 %}
22389
22390 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22391 predicate(VM_Version::supports_avx10_2() &&
22392 is_integral_type(Matcher::vector_element_basic_type(n)));
22393 match(Set dst (VectorCastD2X (LoadVector src)));
22394 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22395 ins_encode %{
22396 int vlen = Matcher::vector_length(this);
22397 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22398 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22399 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22400 %}
22401 ins_pipe( pipe_slow );
22402 %}
22403
22404 instruct vucast(vec dst, vec src) %{
22405 match(Set dst (VectorUCastB2X src));
22406 match(Set dst (VectorUCastS2X src));
22407 match(Set dst (VectorUCastI2X src));
22408 format %{ "vector_ucast $dst,$src\t!" %}
22409 ins_encode %{
22410 assert(UseAVX > 0, "required");
22411
22412 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22413 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22414 int vlen_enc = vector_length_encoding(this);
22415 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22416 %}
22417 ins_pipe( pipe_slow );
22418 %}
22419
22420 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22421 predicate(!VM_Version::supports_avx512vl() &&
22422 Matcher::vector_length_in_bytes(n) < 64 &&
22423 Matcher::vector_element_basic_type(n) == T_INT);
22424 match(Set dst (RoundVF src));
22425 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22426 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22427 ins_encode %{
22428 int vlen_enc = vector_length_encoding(this);
22429 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22430 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22431 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22432 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22433 %}
22434 ins_pipe( pipe_slow );
22435 %}
22436
22437 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22438 predicate((VM_Version::supports_avx512vl() ||
22439 Matcher::vector_length_in_bytes(n) == 64) &&
22440 Matcher::vector_element_basic_type(n) == T_INT);
22441 match(Set dst (RoundVF src));
22442 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22443 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22444 ins_encode %{
22445 int vlen_enc = vector_length_encoding(this);
22446 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22447 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22448 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22449 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22450 %}
22451 ins_pipe( pipe_slow );
22452 %}
22453
22454 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22455 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22456 match(Set dst (RoundVD src));
22457 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22458 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22459 ins_encode %{
22460 int vlen_enc = vector_length_encoding(this);
22461 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22462 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22463 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22464 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22465 %}
22466 ins_pipe( pipe_slow );
22467 %}
22468
22469 // --------------------------------- VectorMaskCmp --------------------------------------
22470
22471 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22472 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22473 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22474 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22475 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22476 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22477 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22478 ins_encode %{
22479 int vlen_enc = vector_length_encoding(this, $src1);
22480 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22481 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22482 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22483 } else {
22484 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22485 }
22486 %}
22487 ins_pipe( pipe_slow );
22488 %}
22489
22490 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22491 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22492 n->bottom_type()->isa_pvectmask() == nullptr &&
22493 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22494 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22495 effect(TEMP ktmp);
22496 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22497 ins_encode %{
22498 int vlen_enc = Assembler::AVX_512bit;
22499 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22500 KRegister mask = k0; // The comparison itself is not being masked.
22501 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22502 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22503 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22504 } else {
22505 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22506 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22507 }
22508 %}
22509 ins_pipe( pipe_slow );
22510 %}
22511
22512 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22513 predicate(n->bottom_type()->isa_pvectmask() &&
22514 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22515 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22516 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22517 ins_encode %{
22518 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22519 int vlen_enc = vector_length_encoding(this, $src1);
22520 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22521 KRegister mask = k0; // The comparison itself is not being masked.
22522 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22523 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22524 } else {
22525 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22526 }
22527 %}
22528 ins_pipe( pipe_slow );
22529 %}
22530
22531 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22532 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22533 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22534 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22535 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22536 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22537 (n->in(2)->get_int() == BoolTest::eq ||
22538 n->in(2)->get_int() == BoolTest::lt ||
22539 n->in(2)->get_int() == BoolTest::gt)); // cond
22540 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22541 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22542 ins_encode %{
22543 int vlen_enc = vector_length_encoding(this, $src1);
22544 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22545 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22546 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22547 %}
22548 ins_pipe( pipe_slow );
22549 %}
22550
22551 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22552 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22553 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22554 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22555 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22556 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22557 (n->in(2)->get_int() == BoolTest::ne ||
22558 n->in(2)->get_int() == BoolTest::le ||
22559 n->in(2)->get_int() == BoolTest::ge)); // cond
22560 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22561 effect(TEMP dst, TEMP xtmp);
22562 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22563 ins_encode %{
22564 int vlen_enc = vector_length_encoding(this, $src1);
22565 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22566 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22567 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22568 %}
22569 ins_pipe( pipe_slow );
22570 %}
22571
22572 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22573 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22574 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22575 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22576 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22577 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22578 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22579 effect(TEMP dst, TEMP xtmp);
22580 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22581 ins_encode %{
22582 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22583 int vlen_enc = vector_length_encoding(this, $src1);
22584 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22585 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22586
22587 if (vlen_enc == Assembler::AVX_128bit) {
22588 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22589 } else {
22590 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22591 }
22592 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22593 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22594 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22595 %}
22596 ins_pipe( pipe_slow );
22597 %}
22598
22599 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22600 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22601 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22602 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22603 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22604 effect(TEMP ktmp);
22605 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22606 ins_encode %{
22607 assert(UseAVX > 2, "required");
22608
22609 int vlen_enc = vector_length_encoding(this, $src1);
22610 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22611 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22612 KRegister mask = k0; // The comparison itself is not being masked.
22613 bool merge = false;
22614 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22615
22616 switch (src1_elem_bt) {
22617 case T_INT: {
22618 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22619 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22620 break;
22621 }
22622 case T_LONG: {
22623 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22624 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22625 break;
22626 }
22627 default: assert(false, "%s", type2name(src1_elem_bt));
22628 }
22629 %}
22630 ins_pipe( pipe_slow );
22631 %}
22632
22633
22634 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22635 predicate(n->bottom_type()->isa_pvectmask() &&
22636 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22637 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22638 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22639 ins_encode %{
22640 assert(UseAVX > 2, "required");
22641 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22642
22643 int vlen_enc = vector_length_encoding(this, $src1);
22644 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22645 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22646 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22647
22648 // Comparison i
22649 switch (src1_elem_bt) {
22650 case T_BYTE: {
22651 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22652 break;
22653 }
22654 case T_SHORT: {
22655 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22656 break;
22657 }
22658 case T_INT: {
22659 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22660 break;
22661 }
22662 case T_LONG: {
22663 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22664 break;
22665 }
22666 default: assert(false, "%s", type2name(src1_elem_bt));
22667 }
22668 %}
22669 ins_pipe( pipe_slow );
22670 %}
22671
22672 // Extract
22673
22674 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22675 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22676 match(Set dst (ExtractI src idx));
22677 match(Set dst (ExtractS src idx));
22678 match(Set dst (ExtractB src idx));
22679 format %{ "extractI $dst,$src,$idx\t!" %}
22680 ins_encode %{
22681 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22682
22683 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22684 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22685 %}
22686 ins_pipe( pipe_slow );
22687 %}
22688
22689 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22690 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22691 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22692 match(Set dst (ExtractI src idx));
22693 match(Set dst (ExtractS src idx));
22694 match(Set dst (ExtractB src idx));
22695 effect(TEMP vtmp);
22696 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22697 ins_encode %{
22698 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22699
22700 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22701 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22702 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22703 %}
22704 ins_pipe( pipe_slow );
22705 %}
22706
22707 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22708 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22709 match(Set dst (ExtractL src idx));
22710 format %{ "extractL $dst,$src,$idx\t!" %}
22711 ins_encode %{
22712 assert(UseSSE >= 4, "required");
22713 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22714
22715 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22716 %}
22717 ins_pipe( pipe_slow );
22718 %}
22719
22720 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22721 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22722 Matcher::vector_length(n->in(1)) == 8); // src
22723 match(Set dst (ExtractL src idx));
22724 effect(TEMP vtmp);
22725 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22726 ins_encode %{
22727 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22728
22729 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22730 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22731 %}
22732 ins_pipe( pipe_slow );
22733 %}
22734
22735 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22736 predicate(Matcher::vector_length(n->in(1)) <= 4);
22737 match(Set dst (ExtractF src idx));
22738 effect(TEMP dst, TEMP vtmp);
22739 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22740 ins_encode %{
22741 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22742
22743 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22744 %}
22745 ins_pipe( pipe_slow );
22746 %}
22747
22748 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22749 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22750 Matcher::vector_length(n->in(1)/*src*/) == 16);
22751 match(Set dst (ExtractF src idx));
22752 effect(TEMP vtmp);
22753 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22754 ins_encode %{
22755 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22756
22757 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22758 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22759 %}
22760 ins_pipe( pipe_slow );
22761 %}
22762
22763 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22764 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22765 match(Set dst (ExtractD src idx));
22766 format %{ "extractD $dst,$src,$idx\t!" %}
22767 ins_encode %{
22768 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22769
22770 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22771 %}
22772 ins_pipe( pipe_slow );
22773 %}
22774
22775 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22776 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22777 Matcher::vector_length(n->in(1)) == 8); // src
22778 match(Set dst (ExtractD src idx));
22779 effect(TEMP vtmp);
22780 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22781 ins_encode %{
22782 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22783
22784 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22785 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22786 %}
22787 ins_pipe( pipe_slow );
22788 %}
22789
22790 // --------------------------------- Vector Blend --------------------------------------
22791
22792 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22793 predicate(UseAVX == 0);
22794 match(Set dst (VectorBlend (Binary dst src) mask));
22795 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22796 effect(TEMP tmp);
22797 ins_encode %{
22798 assert(UseSSE >= 4, "required");
22799
22800 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22801 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22802 }
22803 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22804 %}
22805 ins_pipe( pipe_slow );
22806 %}
22807
22808 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22809 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22810 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22811 Matcher::vector_length_in_bytes(n) <= 32 &&
22812 is_integral_type(Matcher::vector_element_basic_type(n)));
22813 match(Set dst (VectorBlend (Binary src1 src2) mask));
22814 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22815 ins_encode %{
22816 int vlen_enc = vector_length_encoding(this);
22817 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22818 %}
22819 ins_pipe( pipe_slow );
22820 %}
22821
22822 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22823 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22824 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22825 Matcher::vector_length_in_bytes(n) <= 32 &&
22826 !is_integral_type(Matcher::vector_element_basic_type(n)));
22827 match(Set dst (VectorBlend (Binary src1 src2) mask));
22828 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22829 ins_encode %{
22830 int vlen_enc = vector_length_encoding(this);
22831 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22832 %}
22833 ins_pipe( pipe_slow );
22834 %}
22835
22836 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22837 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22838 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22839 Matcher::vector_length_in_bytes(n) <= 32);
22840 match(Set dst (VectorBlend (Binary src1 src2) mask));
22841 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22842 effect(TEMP vtmp, TEMP dst);
22843 ins_encode %{
22844 int vlen_enc = vector_length_encoding(this);
22845 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22846 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22847 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22848 %}
22849 ins_pipe( pipe_slow );
22850 %}
22851
22852 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22853 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22854 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22855 match(Set dst (VectorBlend (Binary src1 src2) mask));
22856 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22857 effect(TEMP ktmp);
22858 ins_encode %{
22859 int vlen_enc = Assembler::AVX_512bit;
22860 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22861 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22862 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22863 %}
22864 ins_pipe( pipe_slow );
22865 %}
22866
22867
22868 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22869 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22870 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22871 VM_Version::supports_avx512bw()));
22872 match(Set dst (VectorBlend (Binary src1 src2) mask));
22873 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22874 ins_encode %{
22875 int vlen_enc = vector_length_encoding(this);
22876 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22877 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22878 %}
22879 ins_pipe( pipe_slow );
22880 %}
22881
22882 // --------------------------------- ABS --------------------------------------
22883 // a = |a|
22884 instruct vabsB_reg(vec dst, vec src) %{
22885 match(Set dst (AbsVB src));
22886 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22887 ins_encode %{
22888 uint vlen = Matcher::vector_length(this);
22889 if (vlen <= 16) {
22890 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22891 } else {
22892 int vlen_enc = vector_length_encoding(this);
22893 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22894 }
22895 %}
22896 ins_pipe( pipe_slow );
22897 %}
22898
22899 instruct vabsS_reg(vec dst, vec src) %{
22900 match(Set dst (AbsVS src));
22901 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22902 ins_encode %{
22903 uint vlen = Matcher::vector_length(this);
22904 if (vlen <= 8) {
22905 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22906 } else {
22907 int vlen_enc = vector_length_encoding(this);
22908 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22909 }
22910 %}
22911 ins_pipe( pipe_slow );
22912 %}
22913
22914 instruct vabsI_reg(vec dst, vec src) %{
22915 match(Set dst (AbsVI src));
22916 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22917 ins_encode %{
22918 uint vlen = Matcher::vector_length(this);
22919 if (vlen <= 4) {
22920 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22921 } else {
22922 int vlen_enc = vector_length_encoding(this);
22923 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22924 }
22925 %}
22926 ins_pipe( pipe_slow );
22927 %}
22928
22929 instruct vabsL_reg(vec dst, vec src) %{
22930 match(Set dst (AbsVL src));
22931 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22932 ins_encode %{
22933 assert(UseAVX > 2, "required");
22934 int vlen_enc = vector_length_encoding(this);
22935 if (!VM_Version::supports_avx512vl()) {
22936 vlen_enc = Assembler::AVX_512bit;
22937 }
22938 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22939 %}
22940 ins_pipe( pipe_slow );
22941 %}
22942
22943 // --------------------------------- ABSNEG --------------------------------------
22944
22945 instruct vabsnegF(vec dst, vec src) %{
22946 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22947 match(Set dst (AbsVF src));
22948 match(Set dst (NegVF src));
22949 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22950 ins_cost(150);
22951 ins_encode %{
22952 int opcode = this->ideal_Opcode();
22953 int vlen = Matcher::vector_length(this);
22954 if (vlen == 2) {
22955 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22956 } else {
22957 assert(vlen == 8 || vlen == 16, "required");
22958 int vlen_enc = vector_length_encoding(this);
22959 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22960 }
22961 %}
22962 ins_pipe( pipe_slow );
22963 %}
22964
22965 instruct vabsneg4F(vec dst) %{
22966 predicate(Matcher::vector_length(n) == 4);
22967 match(Set dst (AbsVF dst));
22968 match(Set dst (NegVF dst));
22969 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22970 ins_cost(150);
22971 ins_encode %{
22972 int opcode = this->ideal_Opcode();
22973 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22974 %}
22975 ins_pipe( pipe_slow );
22976 %}
22977
22978 instruct vabsnegD(vec dst, vec src) %{
22979 match(Set dst (AbsVD src));
22980 match(Set dst (NegVD src));
22981 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22982 ins_encode %{
22983 int opcode = this->ideal_Opcode();
22984 uint vlen = Matcher::vector_length(this);
22985 if (vlen == 2) {
22986 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22987 } else {
22988 int vlen_enc = vector_length_encoding(this);
22989 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22990 }
22991 %}
22992 ins_pipe( pipe_slow );
22993 %}
22994
22995 //------------------------------------- VectorTest --------------------------------------------
22996
22997 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22998 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22999 match(Set cr (VectorTest src1 src2));
23000 effect(TEMP vtmp);
23001 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23002 ins_encode %{
23003 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23004 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23005 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23006 %}
23007 ins_pipe( pipe_slow );
23008 %}
23009
23010 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23011 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23012 match(Set cr (VectorTest src1 src2));
23013 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23014 ins_encode %{
23015 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23016 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23017 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23018 %}
23019 ins_pipe( pipe_slow );
23020 %}
23021
23022 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23023 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23024 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23025 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23026 match(Set cr (VectorTest src1 src2));
23027 effect(TEMP tmp);
23028 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23029 ins_encode %{
23030 uint masklen = Matcher::vector_length(this, $src1);
23031 __ kmovwl($tmp$$Register, $src1$$KRegister);
23032 __ andl($tmp$$Register, (1 << masklen) - 1);
23033 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23034 %}
23035 ins_pipe( pipe_slow );
23036 %}
23037
23038 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23039 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23040 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23041 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23042 match(Set cr (VectorTest src1 src2));
23043 effect(TEMP tmp);
23044 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23045 ins_encode %{
23046 uint masklen = Matcher::vector_length(this, $src1);
23047 __ kmovwl($tmp$$Register, $src1$$KRegister);
23048 __ andl($tmp$$Register, (1 << masklen) - 1);
23049 %}
23050 ins_pipe( pipe_slow );
23051 %}
23052
23053 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23054 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23055 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23056 match(Set cr (VectorTest src1 src2));
23057 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23058 ins_encode %{
23059 uint masklen = Matcher::vector_length(this, $src1);
23060 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23061 %}
23062 ins_pipe( pipe_slow );
23063 %}
23064
23065 //------------------------------------- LoadMask --------------------------------------------
23066
23067 instruct loadMask(legVec dst, legVec src) %{
23068 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23069 match(Set dst (VectorLoadMask src));
23070 effect(TEMP dst);
23071 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23072 ins_encode %{
23073 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23074 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23075 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23076 %}
23077 ins_pipe( pipe_slow );
23078 %}
23079
23080 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23081 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23082 match(Set dst (VectorLoadMask src));
23083 effect(TEMP xtmp);
23084 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23085 ins_encode %{
23086 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23087 true, Assembler::AVX_512bit);
23088 %}
23089 ins_pipe( pipe_slow );
23090 %}
23091
23092 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23093 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23094 match(Set dst (VectorLoadMask src));
23095 effect(TEMP xtmp);
23096 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23097 ins_encode %{
23098 int vlen_enc = vector_length_encoding(in(1));
23099 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23100 false, vlen_enc);
23101 %}
23102 ins_pipe( pipe_slow );
23103 %}
23104
23105 //------------------------------------- StoreMask --------------------------------------------
23106
23107 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23108 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23109 match(Set dst (VectorStoreMask src size));
23110 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23111 ins_encode %{
23112 int vlen = Matcher::vector_length(this);
23113 if (vlen <= 16 && UseAVX <= 2) {
23114 assert(UseSSE >= 3, "required");
23115 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23116 } else {
23117 assert(UseAVX > 0, "required");
23118 int src_vlen_enc = vector_length_encoding(this, $src);
23119 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23120 }
23121 %}
23122 ins_pipe( pipe_slow );
23123 %}
23124
23125 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23126 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23127 match(Set dst (VectorStoreMask src size));
23128 effect(TEMP_DEF dst, TEMP xtmp);
23129 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23130 ins_encode %{
23131 int vlen_enc = Assembler::AVX_128bit;
23132 int vlen = Matcher::vector_length(this);
23133 if (vlen <= 8) {
23134 assert(UseSSE >= 3, "required");
23135 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23136 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23137 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23138 } else {
23139 assert(UseAVX > 0, "required");
23140 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23141 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23142 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23143 }
23144 %}
23145 ins_pipe( pipe_slow );
23146 %}
23147
23148 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23149 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23150 match(Set dst (VectorStoreMask src size));
23151 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23152 effect(TEMP_DEF dst, TEMP xtmp);
23153 ins_encode %{
23154 int vlen_enc = Assembler::AVX_128bit;
23155 int vlen = Matcher::vector_length(this);
23156 if (vlen <= 4) {
23157 assert(UseSSE >= 3, "required");
23158 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23159 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23160 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23161 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23162 } else {
23163 assert(UseAVX > 0, "required");
23164 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23165 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23166 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23167 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23168 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23169 }
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23175 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23176 match(Set dst (VectorStoreMask src size));
23177 effect(TEMP_DEF dst, TEMP xtmp);
23178 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23179 ins_encode %{
23180 assert(UseSSE >= 3, "required");
23181 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23182 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23183 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23184 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23185 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23186 %}
23187 ins_pipe( pipe_slow );
23188 %}
23189
23190 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23191 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23192 match(Set dst (VectorStoreMask src size));
23193 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23194 effect(TEMP_DEF dst, TEMP vtmp);
23195 ins_encode %{
23196 int vlen_enc = Assembler::AVX_128bit;
23197 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23198 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23199 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23200 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23201 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23202 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23203 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23204 %}
23205 ins_pipe( pipe_slow );
23206 %}
23207
23208 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23209 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23210 match(Set dst (VectorStoreMask src size));
23211 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23212 ins_encode %{
23213 int src_vlen_enc = vector_length_encoding(this, $src);
23214 int dst_vlen_enc = vector_length_encoding(this);
23215 if (!VM_Version::supports_avx512vl()) {
23216 src_vlen_enc = Assembler::AVX_512bit;
23217 }
23218 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23219 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23220 %}
23221 ins_pipe( pipe_slow );
23222 %}
23223
23224 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23225 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23226 match(Set dst (VectorStoreMask src size));
23227 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23228 ins_encode %{
23229 int src_vlen_enc = vector_length_encoding(this, $src);
23230 int dst_vlen_enc = vector_length_encoding(this);
23231 if (!VM_Version::supports_avx512vl()) {
23232 src_vlen_enc = Assembler::AVX_512bit;
23233 }
23234 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23235 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23236 %}
23237 ins_pipe( pipe_slow );
23238 %}
23239
23240 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23241 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23242 match(Set dst (VectorStoreMask mask size));
23243 effect(TEMP_DEF dst);
23244 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23245 ins_encode %{
23246 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23247 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23248 false, Assembler::AVX_512bit, noreg);
23249 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23250 %}
23251 ins_pipe( pipe_slow );
23252 %}
23253
23254 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23255 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23256 match(Set dst (VectorStoreMask mask size));
23257 effect(TEMP_DEF dst);
23258 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23259 ins_encode %{
23260 int dst_vlen_enc = vector_length_encoding(this);
23261 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23262 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23263 %}
23264 ins_pipe( pipe_slow );
23265 %}
23266
23267 instruct vmaskcast_evex(kReg dst) %{
23268 match(Set dst (VectorMaskCast dst));
23269 ins_cost(0);
23270 format %{ "vector_mask_cast $dst" %}
23271 ins_encode %{
23272 // empty
23273 %}
23274 ins_pipe(empty);
23275 %}
23276
23277 instruct vmaskcast(vec dst) %{
23278 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23279 match(Set dst (VectorMaskCast dst));
23280 ins_cost(0);
23281 format %{ "vector_mask_cast $dst" %}
23282 ins_encode %{
23283 // empty
23284 %}
23285 ins_pipe(empty);
23286 %}
23287
23288 instruct vmaskcast_avx(vec dst, vec src) %{
23289 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23290 match(Set dst (VectorMaskCast src));
23291 format %{ "vector_mask_cast $dst, $src" %}
23292 ins_encode %{
23293 int vlen = Matcher::vector_length(this);
23294 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23295 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23296 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23297 %}
23298 ins_pipe(pipe_slow);
23299 %}
23300
23301 //-------------------------------- Load Iota Indices ----------------------------------
23302
23303 instruct loadIotaIndices(vec dst, immI_0 src) %{
23304 match(Set dst (VectorLoadConst src));
23305 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23306 ins_encode %{
23307 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23308 BasicType bt = Matcher::vector_element_basic_type(this);
23309 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23310 %}
23311 ins_pipe( pipe_slow );
23312 %}
23313
23314 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23315 match(Set dst (PopulateIndex src1 src2));
23316 effect(TEMP dst, TEMP vtmp);
23317 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23318 ins_encode %{
23319 assert($src2$$constant == 1, "required");
23320 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23321 int vlen_enc = vector_length_encoding(this);
23322 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23323 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23324 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23325 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23326 %}
23327 ins_pipe( pipe_slow );
23328 %}
23329
23330 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23331 match(Set dst (PopulateIndex src1 src2));
23332 effect(TEMP dst, TEMP vtmp);
23333 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23334 ins_encode %{
23335 assert($src2$$constant == 1, "required");
23336 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23337 int vlen_enc = vector_length_encoding(this);
23338 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23339 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23340 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23341 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23342 %}
23343 ins_pipe( pipe_slow );
23344 %}
23345
23346 //-------------------------------- Rearrange ----------------------------------
23347
23348 // LoadShuffle/Rearrange for Byte
23349 instruct rearrangeB(vec dst, vec shuffle) %{
23350 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23351 Matcher::vector_length(n) < 32);
23352 match(Set dst (VectorRearrange dst shuffle));
23353 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23354 ins_encode %{
23355 assert(UseSSE >= 4, "required");
23356 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23357 %}
23358 ins_pipe( pipe_slow );
23359 %}
23360
23361 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23362 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23363 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23364 match(Set dst (VectorRearrange src shuffle));
23365 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23366 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23367 ins_encode %{
23368 assert(UseAVX >= 2, "required");
23369 // Swap src into vtmp1
23370 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23371 // Shuffle swapped src to get entries from other 128 bit lane
23372 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23373 // Shuffle original src to get entries from self 128 bit lane
23374 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23375 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23376 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23377 // Perform the blend
23378 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23379 %}
23380 ins_pipe( pipe_slow );
23381 %}
23382
23383
23384 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23385 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23386 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23387 match(Set dst (VectorRearrange src shuffle));
23388 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23389 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23390 ins_encode %{
23391 int vlen_enc = vector_length_encoding(this);
23392 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23393 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23394 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23395 %}
23396 ins_pipe( pipe_slow );
23397 %}
23398
23399 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23400 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23401 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23402 match(Set dst (VectorRearrange src shuffle));
23403 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23404 ins_encode %{
23405 int vlen_enc = vector_length_encoding(this);
23406 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23407 %}
23408 ins_pipe( pipe_slow );
23409 %}
23410
23411 // LoadShuffle/Rearrange for Short
23412
23413 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23414 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23415 !VM_Version::supports_avx512bw());
23416 match(Set dst (VectorLoadShuffle src));
23417 effect(TEMP dst, TEMP vtmp);
23418 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23419 ins_encode %{
23420 // Create a byte shuffle mask from short shuffle mask
23421 // only byte shuffle instruction available on these platforms
23422 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23423 if (UseAVX == 0) {
23424 assert(vlen_in_bytes <= 16, "required");
23425 // Multiply each shuffle by two to get byte index
23426 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23427 __ psllw($vtmp$$XMMRegister, 1);
23428
23429 // Duplicate to create 2 copies of byte index
23430 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23431 __ psllw($dst$$XMMRegister, 8);
23432 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23433
23434 // Add one to get alternate byte index
23435 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23436 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23437 } else {
23438 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23439 int vlen_enc = vector_length_encoding(this);
23440 // Multiply each shuffle by two to get byte index
23441 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23442
23443 // Duplicate to create 2 copies of byte index
23444 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23445 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23446
23447 // Add one to get alternate byte index
23448 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23449 }
23450 %}
23451 ins_pipe( pipe_slow );
23452 %}
23453
23454 instruct rearrangeS(vec dst, vec shuffle) %{
23455 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23456 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23457 match(Set dst (VectorRearrange dst shuffle));
23458 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23459 ins_encode %{
23460 assert(UseSSE >= 4, "required");
23461 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23462 %}
23463 ins_pipe( pipe_slow );
23464 %}
23465
23466 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23467 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23468 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23469 match(Set dst (VectorRearrange src shuffle));
23470 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23471 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23472 ins_encode %{
23473 assert(UseAVX >= 2, "required");
23474 // Swap src into vtmp1
23475 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23476 // Shuffle swapped src to get entries from other 128 bit lane
23477 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23478 // Shuffle original src to get entries from self 128 bit lane
23479 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23480 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23481 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23482 // Perform the blend
23483 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23484 %}
23485 ins_pipe( pipe_slow );
23486 %}
23487
23488 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23489 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23490 VM_Version::supports_avx512bw());
23491 match(Set dst (VectorRearrange src shuffle));
23492 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23493 ins_encode %{
23494 int vlen_enc = vector_length_encoding(this);
23495 if (!VM_Version::supports_avx512vl()) {
23496 vlen_enc = Assembler::AVX_512bit;
23497 }
23498 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23499 %}
23500 ins_pipe( pipe_slow );
23501 %}
23502
23503 // LoadShuffle/Rearrange for Integer and Float
23504
23505 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23506 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23507 Matcher::vector_length(n) == 4 && UseAVX == 0);
23508 match(Set dst (VectorLoadShuffle src));
23509 effect(TEMP dst, TEMP vtmp);
23510 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23511 ins_encode %{
23512 assert(UseSSE >= 4, "required");
23513
23514 // Create a byte shuffle mask from int shuffle mask
23515 // only byte shuffle instruction available on these platforms
23516
23517 // Duplicate and multiply each shuffle by 4
23518 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23519 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23520 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23521 __ psllw($vtmp$$XMMRegister, 2);
23522
23523 // Duplicate again to create 4 copies of byte index
23524 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23525 __ psllw($dst$$XMMRegister, 8);
23526 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23527
23528 // Add 3,2,1,0 to get alternate byte index
23529 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23530 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23531 %}
23532 ins_pipe( pipe_slow );
23533 %}
23534
23535 instruct rearrangeI(vec dst, vec shuffle) %{
23536 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23537 UseAVX == 0);
23538 match(Set dst (VectorRearrange dst shuffle));
23539 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23540 ins_encode %{
23541 assert(UseSSE >= 4, "required");
23542 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23543 %}
23544 ins_pipe( pipe_slow );
23545 %}
23546
23547 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23548 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23549 UseAVX > 0);
23550 match(Set dst (VectorRearrange src shuffle));
23551 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23552 ins_encode %{
23553 int vlen_enc = vector_length_encoding(this);
23554 BasicType bt = Matcher::vector_element_basic_type(this);
23555 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23556 %}
23557 ins_pipe( pipe_slow );
23558 %}
23559
23560 // LoadShuffle/Rearrange for Long and Double
23561
23562 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23563 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23564 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23565 match(Set dst (VectorLoadShuffle src));
23566 effect(TEMP dst, TEMP vtmp);
23567 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23568 ins_encode %{
23569 assert(UseAVX >= 2, "required");
23570
23571 int vlen_enc = vector_length_encoding(this);
23572 // Create a double word shuffle mask from long shuffle mask
23573 // only double word shuffle instruction available on these platforms
23574
23575 // Multiply each shuffle by two to get double word index
23576 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23577
23578 // Duplicate each double word shuffle
23579 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23580 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23581
23582 // Add one to get alternate double word index
23583 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23584 %}
23585 ins_pipe( pipe_slow );
23586 %}
23587
23588 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23589 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23590 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23591 match(Set dst (VectorRearrange src shuffle));
23592 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23593 ins_encode %{
23594 assert(UseAVX >= 2, "required");
23595
23596 int vlen_enc = vector_length_encoding(this);
23597 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23598 %}
23599 ins_pipe( pipe_slow );
23600 %}
23601
23602 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23603 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23604 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23605 match(Set dst (VectorRearrange src shuffle));
23606 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23607 ins_encode %{
23608 assert(UseAVX > 2, "required");
23609
23610 int vlen_enc = vector_length_encoding(this);
23611 if (vlen_enc == Assembler::AVX_128bit) {
23612 vlen_enc = Assembler::AVX_256bit;
23613 }
23614 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23615 %}
23616 ins_pipe( pipe_slow );
23617 %}
23618
23619 // --------------------------------- FMA --------------------------------------
23620 // a * b + c
23621
23622 instruct vfmaF_reg(vec a, vec b, vec c) %{
23623 match(Set c (FmaVF c (Binary a b)));
23624 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23625 ins_cost(150);
23626 ins_encode %{
23627 assert(UseFMA, "not enabled");
23628 int vlen_enc = vector_length_encoding(this);
23629 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23630 %}
23631 ins_pipe( pipe_slow );
23632 %}
23633
23634 instruct vfmaF_mem(vec a, memory b, vec c) %{
23635 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23636 match(Set c (FmaVF c (Binary a (LoadVector b))));
23637 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23638 ins_cost(150);
23639 ins_encode %{
23640 assert(UseFMA, "not enabled");
23641 int vlen_enc = vector_length_encoding(this);
23642 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23643 %}
23644 ins_pipe( pipe_slow );
23645 %}
23646
23647 instruct vfmaD_reg(vec a, vec b, vec c) %{
23648 match(Set c (FmaVD c (Binary a b)));
23649 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23650 ins_cost(150);
23651 ins_encode %{
23652 assert(UseFMA, "not enabled");
23653 int vlen_enc = vector_length_encoding(this);
23654 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23655 %}
23656 ins_pipe( pipe_slow );
23657 %}
23658
23659 instruct vfmaD_mem(vec a, memory b, vec c) %{
23660 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23661 match(Set c (FmaVD c (Binary a (LoadVector b))));
23662 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23663 ins_cost(150);
23664 ins_encode %{
23665 assert(UseFMA, "not enabled");
23666 int vlen_enc = vector_length_encoding(this);
23667 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23668 %}
23669 ins_pipe( pipe_slow );
23670 %}
23671
23672 // --------------------------------- Vector Multiply Add --------------------------------------
23673
23674 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23675 predicate(UseAVX == 0);
23676 match(Set dst (MulAddVS2VI dst src1));
23677 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23678 ins_encode %{
23679 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23680 %}
23681 ins_pipe( pipe_slow );
23682 %}
23683
23684 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23685 predicate(UseAVX > 0);
23686 match(Set dst (MulAddVS2VI src1 src2));
23687 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23688 ins_encode %{
23689 int vlen_enc = vector_length_encoding(this);
23690 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23691 %}
23692 ins_pipe( pipe_slow );
23693 %}
23694
23695 // --------------------------------- Vector Multiply Add Add ----------------------------------
23696
23697 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23698 predicate(VM_Version::supports_avx512_vnni());
23699 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23700 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23701 ins_encode %{
23702 assert(UseAVX > 2, "required");
23703 int vlen_enc = vector_length_encoding(this);
23704 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23705 %}
23706 ins_pipe( pipe_slow );
23707 ins_cost(10);
23708 %}
23709
23710 // --------------------------------- PopCount --------------------------------------
23711
23712 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23713 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23714 match(Set dst (PopCountVI src));
23715 match(Set dst (PopCountVL src));
23716 format %{ "vector_popcount_integral $dst, $src" %}
23717 ins_encode %{
23718 int opcode = this->ideal_Opcode();
23719 int vlen_enc = vector_length_encoding(this, $src);
23720 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23721 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23722 %}
23723 ins_pipe( pipe_slow );
23724 %}
23725
23726 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23727 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23728 match(Set dst (PopCountVI src mask));
23729 match(Set dst (PopCountVL src mask));
23730 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23731 ins_encode %{
23732 int vlen_enc = vector_length_encoding(this, $src);
23733 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23734 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23735 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23736 %}
23737 ins_pipe( pipe_slow );
23738 %}
23739
23740 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23741 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23742 match(Set dst (PopCountVI src));
23743 match(Set dst (PopCountVL src));
23744 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23745 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23746 ins_encode %{
23747 int opcode = this->ideal_Opcode();
23748 int vlen_enc = vector_length_encoding(this, $src);
23749 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23750 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23751 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23752 %}
23753 ins_pipe( pipe_slow );
23754 %}
23755
23756 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23757
23758 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23759 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23760 Matcher::vector_length_in_bytes(n->in(1))));
23761 match(Set dst (CountTrailingZerosV src));
23762 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23763 ins_cost(400);
23764 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23765 ins_encode %{
23766 int vlen_enc = vector_length_encoding(this, $src);
23767 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23768 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23769 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23770 %}
23771 ins_pipe( pipe_slow );
23772 %}
23773
23774 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23775 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23776 VM_Version::supports_avx512cd() &&
23777 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23778 match(Set dst (CountTrailingZerosV src));
23779 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23780 ins_cost(400);
23781 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23782 ins_encode %{
23783 int vlen_enc = vector_length_encoding(this, $src);
23784 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23785 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23786 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23787 %}
23788 ins_pipe( pipe_slow );
23789 %}
23790
23791 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23792 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23793 match(Set dst (CountTrailingZerosV src));
23794 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23795 ins_cost(400);
23796 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23797 ins_encode %{
23798 int vlen_enc = vector_length_encoding(this, $src);
23799 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23800 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23801 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23802 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23803 %}
23804 ins_pipe( pipe_slow );
23805 %}
23806
23807 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23808 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23809 match(Set dst (CountTrailingZerosV src));
23810 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23811 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23812 ins_encode %{
23813 int vlen_enc = vector_length_encoding(this, $src);
23814 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23815 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23816 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23817 %}
23818 ins_pipe( pipe_slow );
23819 %}
23820
23821
23822 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23823
23824 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23825 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23826 effect(TEMP dst);
23827 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23828 ins_encode %{
23829 int vector_len = vector_length_encoding(this);
23830 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23831 %}
23832 ins_pipe( pipe_slow );
23833 %}
23834
23835 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23836 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23837 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23838 effect(TEMP dst);
23839 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23840 ins_encode %{
23841 int vector_len = vector_length_encoding(this);
23842 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23843 %}
23844 ins_pipe( pipe_slow );
23845 %}
23846
23847 // --------------------------------- Rotation Operations ----------------------------------
23848 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23849 match(Set dst (RotateLeftV src shift));
23850 match(Set dst (RotateRightV src shift));
23851 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23852 ins_encode %{
23853 int opcode = this->ideal_Opcode();
23854 int vector_len = vector_length_encoding(this);
23855 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23856 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23857 %}
23858 ins_pipe( pipe_slow );
23859 %}
23860
23861 instruct vprorate(vec dst, vec src, vec shift) %{
23862 match(Set dst (RotateLeftV src shift));
23863 match(Set dst (RotateRightV src shift));
23864 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23865 ins_encode %{
23866 int opcode = this->ideal_Opcode();
23867 int vector_len = vector_length_encoding(this);
23868 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23869 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23870 %}
23871 ins_pipe( pipe_slow );
23872 %}
23873
23874 // ---------------------------------- Masked Operations ------------------------------------
23875 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23876 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23877 match(Set dst (LoadVectorMasked mem mask));
23878 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23879 ins_encode %{
23880 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23881 int vlen_enc = vector_length_encoding(this);
23882 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23883 %}
23884 ins_pipe( pipe_slow );
23885 %}
23886
23887
23888 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23889 predicate(n->in(3)->bottom_type()->isa_pvectmask());
23890 match(Set dst (LoadVectorMasked mem mask));
23891 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23892 ins_encode %{
23893 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23894 int vector_len = vector_length_encoding(this);
23895 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23896 %}
23897 ins_pipe( pipe_slow );
23898 %}
23899
23900 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23901 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23902 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23903 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23904 ins_encode %{
23905 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23906 int vlen_enc = vector_length_encoding(src_node);
23907 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23908 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23909 %}
23910 ins_pipe( pipe_slow );
23911 %}
23912
23913 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23914 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23915 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23916 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23917 ins_encode %{
23918 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23919 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23920 int vlen_enc = vector_length_encoding(src_node);
23921 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23922 %}
23923 ins_pipe( pipe_slow );
23924 %}
23925
23926 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23927 match(Set addr (VerifyVectorAlignment addr mask));
23928 effect(KILL cr);
23929 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23930 ins_encode %{
23931 Label Lskip;
23932 // check if masked bits of addr are zero
23933 __ testq($addr$$Register, $mask$$constant);
23934 __ jccb(Assembler::equal, Lskip);
23935 __ stop("verify_vector_alignment found a misaligned vector memory access");
23936 __ bind(Lskip);
23937 %}
23938 ins_pipe(pipe_slow);
23939 %}
23940
23941 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23942 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23943 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23944 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23945 ins_encode %{
23946 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23947 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23948
23949 Label DONE;
23950 int vlen_enc = vector_length_encoding(this, $src1);
23951 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23952
23953 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23954 __ mov64($dst$$Register, -1L);
23955 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23956 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23957 __ jccb(Assembler::carrySet, DONE);
23958 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23959 __ notq($dst$$Register);
23960 __ tzcntq($dst$$Register, $dst$$Register);
23961 __ bind(DONE);
23962 %}
23963 ins_pipe( pipe_slow );
23964 %}
23965
23966
23967 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23968 match(Set dst (VectorMaskGen len));
23969 effect(TEMP temp, KILL cr);
23970 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23971 ins_encode %{
23972 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23973 %}
23974 ins_pipe( pipe_slow );
23975 %}
23976
23977 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23978 match(Set dst (VectorMaskGen len));
23979 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23980 effect(TEMP temp);
23981 ins_encode %{
23982 if ($len$$constant > 0) {
23983 __ mov64($temp$$Register, right_n_bits($len$$constant));
23984 __ kmovql($dst$$KRegister, $temp$$Register);
23985 } else {
23986 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23987 }
23988 %}
23989 ins_pipe( pipe_slow );
23990 %}
23991
23992 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23993 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23994 match(Set dst (VectorMaskToLong mask));
23995 effect(TEMP dst, KILL cr);
23996 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23997 ins_encode %{
23998 int opcode = this->ideal_Opcode();
23999 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24000 int mask_len = Matcher::vector_length(this, $mask);
24001 int mask_size = mask_len * type2aelembytes(mbt);
24002 int vlen_enc = vector_length_encoding(this, $mask);
24003 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24004 $dst$$Register, mask_len, mask_size, vlen_enc);
24005 %}
24006 ins_pipe( pipe_slow );
24007 %}
24008
24009 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24010 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24011 match(Set dst (VectorMaskToLong mask));
24012 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24013 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24014 ins_encode %{
24015 int opcode = this->ideal_Opcode();
24016 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24017 int mask_len = Matcher::vector_length(this, $mask);
24018 int vlen_enc = vector_length_encoding(this, $mask);
24019 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24020 $dst$$Register, mask_len, mbt, vlen_enc);
24021 %}
24022 ins_pipe( pipe_slow );
24023 %}
24024
24025 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24026 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24027 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24028 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24029 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24030 ins_encode %{
24031 int opcode = this->ideal_Opcode();
24032 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24033 int mask_len = Matcher::vector_length(this, $mask);
24034 int vlen_enc = vector_length_encoding(this, $mask);
24035 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24036 $dst$$Register, mask_len, mbt, vlen_enc);
24037 %}
24038 ins_pipe( pipe_slow );
24039 %}
24040
24041 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24042 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24043 match(Set dst (VectorMaskTrueCount mask));
24044 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24045 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24046 ins_encode %{
24047 int opcode = this->ideal_Opcode();
24048 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24049 int mask_len = Matcher::vector_length(this, $mask);
24050 int mask_size = mask_len * type2aelembytes(mbt);
24051 int vlen_enc = vector_length_encoding(this, $mask);
24052 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24053 $tmp$$Register, mask_len, mask_size, vlen_enc);
24054 %}
24055 ins_pipe( pipe_slow );
24056 %}
24057
24058 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24059 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24060 match(Set dst (VectorMaskTrueCount mask));
24061 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24062 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24063 ins_encode %{
24064 int opcode = this->ideal_Opcode();
24065 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24066 int mask_len = Matcher::vector_length(this, $mask);
24067 int vlen_enc = vector_length_encoding(this, $mask);
24068 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24069 $tmp$$Register, mask_len, mbt, vlen_enc);
24070 %}
24071 ins_pipe( pipe_slow );
24072 %}
24073
24074 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24075 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24076 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24077 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24078 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24079 ins_encode %{
24080 int opcode = this->ideal_Opcode();
24081 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24082 int mask_len = Matcher::vector_length(this, $mask);
24083 int vlen_enc = vector_length_encoding(this, $mask);
24084 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24085 $tmp$$Register, mask_len, mbt, vlen_enc);
24086 %}
24087 ins_pipe( pipe_slow );
24088 %}
24089
24090 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24091 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24092 match(Set dst (VectorMaskFirstTrue mask));
24093 match(Set dst (VectorMaskLastTrue mask));
24094 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24095 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24096 ins_encode %{
24097 int opcode = this->ideal_Opcode();
24098 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24099 int mask_len = Matcher::vector_length(this, $mask);
24100 int mask_size = mask_len * type2aelembytes(mbt);
24101 int vlen_enc = vector_length_encoding(this, $mask);
24102 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24103 $tmp$$Register, mask_len, mask_size, vlen_enc);
24104 %}
24105 ins_pipe( pipe_slow );
24106 %}
24107
24108 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24109 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24110 match(Set dst (VectorMaskFirstTrue mask));
24111 match(Set dst (VectorMaskLastTrue mask));
24112 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24113 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24114 ins_encode %{
24115 int opcode = this->ideal_Opcode();
24116 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24117 int mask_len = Matcher::vector_length(this, $mask);
24118 int vlen_enc = vector_length_encoding(this, $mask);
24119 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24120 $tmp$$Register, mask_len, mbt, vlen_enc);
24121 %}
24122 ins_pipe( pipe_slow );
24123 %}
24124
24125 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24126 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24127 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24128 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24129 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24130 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24131 ins_encode %{
24132 int opcode = this->ideal_Opcode();
24133 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24134 int mask_len = Matcher::vector_length(this, $mask);
24135 int vlen_enc = vector_length_encoding(this, $mask);
24136 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24137 $tmp$$Register, mask_len, mbt, vlen_enc);
24138 %}
24139 ins_pipe( pipe_slow );
24140 %}
24141
24142 // --------------------------------- Compress/Expand Operations ---------------------------
24143 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24144 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24145 match(Set dst (CompressV src mask));
24146 match(Set dst (ExpandV src mask));
24147 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24148 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24149 ins_encode %{
24150 int opcode = this->ideal_Opcode();
24151 int vlen_enc = vector_length_encoding(this);
24152 BasicType bt = Matcher::vector_element_basic_type(this);
24153 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24154 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24155 %}
24156 ins_pipe( pipe_slow );
24157 %}
24158
24159 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24160 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24161 match(Set dst (CompressV src mask));
24162 match(Set dst (ExpandV src mask));
24163 format %{ "vector_compress_expand $dst, $src, $mask" %}
24164 ins_encode %{
24165 int opcode = this->ideal_Opcode();
24166 int vector_len = vector_length_encoding(this);
24167 BasicType bt = Matcher::vector_element_basic_type(this);
24168 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24169 %}
24170 ins_pipe( pipe_slow );
24171 %}
24172
24173 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24174 match(Set dst (CompressM mask));
24175 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24176 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24177 ins_encode %{
24178 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24179 int mask_len = Matcher::vector_length(this);
24180 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24181 %}
24182 ins_pipe( pipe_slow );
24183 %}
24184
24185 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24186
24187 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24188 predicate(!VM_Version::supports_gfni());
24189 match(Set dst (ReverseV src));
24190 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24191 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24192 ins_encode %{
24193 int vec_enc = vector_length_encoding(this);
24194 BasicType bt = Matcher::vector_element_basic_type(this);
24195 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24196 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24197 %}
24198 ins_pipe( pipe_slow );
24199 %}
24200
24201 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24202 predicate(VM_Version::supports_gfni());
24203 match(Set dst (ReverseV src));
24204 effect(TEMP dst, TEMP xtmp);
24205 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24206 ins_encode %{
24207 int vec_enc = vector_length_encoding(this);
24208 BasicType bt = Matcher::vector_element_basic_type(this);
24209 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24210 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24211 $xtmp$$XMMRegister);
24212 %}
24213 ins_pipe( pipe_slow );
24214 %}
24215
24216 instruct vreverse_byte_reg(vec dst, vec src) %{
24217 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24218 match(Set dst (ReverseBytesV src));
24219 effect(TEMP dst);
24220 format %{ "vector_reverse_byte $dst, $src" %}
24221 ins_encode %{
24222 int vec_enc = vector_length_encoding(this);
24223 BasicType bt = Matcher::vector_element_basic_type(this);
24224 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24225 %}
24226 ins_pipe( pipe_slow );
24227 %}
24228
24229 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24230 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24231 match(Set dst (ReverseBytesV src));
24232 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24233 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24234 ins_encode %{
24235 int vec_enc = vector_length_encoding(this);
24236 BasicType bt = Matcher::vector_element_basic_type(this);
24237 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24238 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24239 %}
24240 ins_pipe( pipe_slow );
24241 %}
24242
24243 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24244
24245 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24246 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24247 Matcher::vector_length_in_bytes(n->in(1))));
24248 match(Set dst (CountLeadingZerosV src));
24249 format %{ "vector_count_leading_zeros $dst, $src" %}
24250 ins_encode %{
24251 int vlen_enc = vector_length_encoding(this, $src);
24252 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24253 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24254 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24255 %}
24256 ins_pipe( pipe_slow );
24257 %}
24258
24259 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24260 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24261 Matcher::vector_length_in_bytes(n->in(1))));
24262 match(Set dst (CountLeadingZerosV src mask));
24263 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24264 ins_encode %{
24265 int vlen_enc = vector_length_encoding(this, $src);
24266 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24267 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24268 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24269 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24270 %}
24271 ins_pipe( pipe_slow );
24272 %}
24273
24274 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24275 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24276 VM_Version::supports_avx512cd() &&
24277 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24278 match(Set dst (CountLeadingZerosV src));
24279 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24280 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24281 ins_encode %{
24282 int vlen_enc = vector_length_encoding(this, $src);
24283 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24284 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24285 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24286 %}
24287 ins_pipe( pipe_slow );
24288 %}
24289
24290 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24291 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24292 match(Set dst (CountLeadingZerosV src));
24293 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24294 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24295 ins_encode %{
24296 int vlen_enc = vector_length_encoding(this, $src);
24297 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24298 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24299 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24300 $rtmp$$Register, true, vlen_enc);
24301 %}
24302 ins_pipe( pipe_slow );
24303 %}
24304
24305 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24306 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24307 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24308 match(Set dst (CountLeadingZerosV src));
24309 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24310 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24311 ins_encode %{
24312 int vlen_enc = vector_length_encoding(this, $src);
24313 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24314 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24315 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24316 %}
24317 ins_pipe( pipe_slow );
24318 %}
24319
24320 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24321 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24322 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24323 match(Set dst (CountLeadingZerosV src));
24324 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24325 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24326 ins_encode %{
24327 int vlen_enc = vector_length_encoding(this, $src);
24328 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24329 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24330 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24331 %}
24332 ins_pipe( pipe_slow );
24333 %}
24334
24335 // ---------------------------------- Vector Masked Operations ------------------------------------
24336
24337 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24338 match(Set dst (AddVB (Binary dst src2) mask));
24339 match(Set dst (AddVS (Binary dst src2) mask));
24340 match(Set dst (AddVI (Binary dst src2) mask));
24341 match(Set dst (AddVL (Binary dst src2) mask));
24342 match(Set dst (AddVF (Binary dst src2) mask));
24343 match(Set dst (AddVD (Binary dst src2) mask));
24344 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24345 ins_encode %{
24346 int vlen_enc = vector_length_encoding(this);
24347 BasicType bt = Matcher::vector_element_basic_type(this);
24348 int opc = this->ideal_Opcode();
24349 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24350 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24351 %}
24352 ins_pipe( pipe_slow );
24353 %}
24354
24355 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24356 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24357 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24358 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24359 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24360 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24361 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24362 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24363 ins_encode %{
24364 int vlen_enc = vector_length_encoding(this);
24365 BasicType bt = Matcher::vector_element_basic_type(this);
24366 int opc = this->ideal_Opcode();
24367 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24368 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24369 %}
24370 ins_pipe( pipe_slow );
24371 %}
24372
24373 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24374 match(Set dst (XorV (Binary dst src2) mask));
24375 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24376 ins_encode %{
24377 int vlen_enc = vector_length_encoding(this);
24378 BasicType bt = Matcher::vector_element_basic_type(this);
24379 int opc = this->ideal_Opcode();
24380 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24381 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24382 %}
24383 ins_pipe( pipe_slow );
24384 %}
24385
24386 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24387 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24388 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24389 ins_encode %{
24390 int vlen_enc = vector_length_encoding(this);
24391 BasicType bt = Matcher::vector_element_basic_type(this);
24392 int opc = this->ideal_Opcode();
24393 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24394 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24395 %}
24396 ins_pipe( pipe_slow );
24397 %}
24398
24399 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24400 match(Set dst (OrV (Binary dst src2) mask));
24401 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24402 ins_encode %{
24403 int vlen_enc = vector_length_encoding(this);
24404 BasicType bt = Matcher::vector_element_basic_type(this);
24405 int opc = this->ideal_Opcode();
24406 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24407 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24408 %}
24409 ins_pipe( pipe_slow );
24410 %}
24411
24412 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24413 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24414 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24415 ins_encode %{
24416 int vlen_enc = vector_length_encoding(this);
24417 BasicType bt = Matcher::vector_element_basic_type(this);
24418 int opc = this->ideal_Opcode();
24419 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24420 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24421 %}
24422 ins_pipe( pipe_slow );
24423 %}
24424
24425 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24426 match(Set dst (AndV (Binary dst src2) mask));
24427 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24428 ins_encode %{
24429 int vlen_enc = vector_length_encoding(this);
24430 BasicType bt = Matcher::vector_element_basic_type(this);
24431 int opc = this->ideal_Opcode();
24432 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24433 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24434 %}
24435 ins_pipe( pipe_slow );
24436 %}
24437
24438 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24439 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24440 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24441 ins_encode %{
24442 int vlen_enc = vector_length_encoding(this);
24443 BasicType bt = Matcher::vector_element_basic_type(this);
24444 int opc = this->ideal_Opcode();
24445 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24446 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24447 %}
24448 ins_pipe( pipe_slow );
24449 %}
24450
24451 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24452 match(Set dst (SubVB (Binary dst src2) mask));
24453 match(Set dst (SubVS (Binary dst src2) mask));
24454 match(Set dst (SubVI (Binary dst src2) mask));
24455 match(Set dst (SubVL (Binary dst src2) mask));
24456 match(Set dst (SubVF (Binary dst src2) mask));
24457 match(Set dst (SubVD (Binary dst src2) mask));
24458 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24459 ins_encode %{
24460 int vlen_enc = vector_length_encoding(this);
24461 BasicType bt = Matcher::vector_element_basic_type(this);
24462 int opc = this->ideal_Opcode();
24463 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24464 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24465 %}
24466 ins_pipe( pipe_slow );
24467 %}
24468
24469 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24470 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24471 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24472 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24473 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24474 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24475 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24476 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24477 ins_encode %{
24478 int vlen_enc = vector_length_encoding(this);
24479 BasicType bt = Matcher::vector_element_basic_type(this);
24480 int opc = this->ideal_Opcode();
24481 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24482 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24483 %}
24484 ins_pipe( pipe_slow );
24485 %}
24486
24487 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24488 match(Set dst (MulVS (Binary dst src2) mask));
24489 match(Set dst (MulVI (Binary dst src2) mask));
24490 match(Set dst (MulVL (Binary dst src2) mask));
24491 match(Set dst (MulVF (Binary dst src2) mask));
24492 match(Set dst (MulVD (Binary dst src2) mask));
24493 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24494 ins_encode %{
24495 int vlen_enc = vector_length_encoding(this);
24496 BasicType bt = Matcher::vector_element_basic_type(this);
24497 int opc = this->ideal_Opcode();
24498 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24499 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24500 %}
24501 ins_pipe( pipe_slow );
24502 %}
24503
24504 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24505 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24506 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24507 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24508 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24509 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24510 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24511 ins_encode %{
24512 int vlen_enc = vector_length_encoding(this);
24513 BasicType bt = Matcher::vector_element_basic_type(this);
24514 int opc = this->ideal_Opcode();
24515 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24516 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24517 %}
24518 ins_pipe( pipe_slow );
24519 %}
24520
24521 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24522 match(Set dst (SqrtVF dst mask));
24523 match(Set dst (SqrtVD dst mask));
24524 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24525 ins_encode %{
24526 int vlen_enc = vector_length_encoding(this);
24527 BasicType bt = Matcher::vector_element_basic_type(this);
24528 int opc = this->ideal_Opcode();
24529 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24531 %}
24532 ins_pipe( pipe_slow );
24533 %}
24534
24535 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24536 match(Set dst (DivVF (Binary dst src2) mask));
24537 match(Set dst (DivVD (Binary dst src2) mask));
24538 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24539 ins_encode %{
24540 int vlen_enc = vector_length_encoding(this);
24541 BasicType bt = Matcher::vector_element_basic_type(this);
24542 int opc = this->ideal_Opcode();
24543 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24544 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24545 %}
24546 ins_pipe( pipe_slow );
24547 %}
24548
24549 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24550 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24551 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24552 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24553 ins_encode %{
24554 int vlen_enc = vector_length_encoding(this);
24555 BasicType bt = Matcher::vector_element_basic_type(this);
24556 int opc = this->ideal_Opcode();
24557 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24559 %}
24560 ins_pipe( pipe_slow );
24561 %}
24562
24563
24564 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24565 match(Set dst (RotateLeftV (Binary dst shift) mask));
24566 match(Set dst (RotateRightV (Binary dst shift) mask));
24567 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24568 ins_encode %{
24569 int vlen_enc = vector_length_encoding(this);
24570 BasicType bt = Matcher::vector_element_basic_type(this);
24571 int opc = this->ideal_Opcode();
24572 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24573 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24574 %}
24575 ins_pipe( pipe_slow );
24576 %}
24577
24578 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24579 match(Set dst (RotateLeftV (Binary dst src2) mask));
24580 match(Set dst (RotateRightV (Binary dst src2) mask));
24581 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24582 ins_encode %{
24583 int vlen_enc = vector_length_encoding(this);
24584 BasicType bt = Matcher::vector_element_basic_type(this);
24585 int opc = this->ideal_Opcode();
24586 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24587 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24588 %}
24589 ins_pipe( pipe_slow );
24590 %}
24591
24592 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24593 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24594 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24595 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24596 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24597 ins_encode %{
24598 int vlen_enc = vector_length_encoding(this);
24599 BasicType bt = Matcher::vector_element_basic_type(this);
24600 int opc = this->ideal_Opcode();
24601 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24603 %}
24604 ins_pipe( pipe_slow );
24605 %}
24606
24607 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24608 predicate(!n->as_ShiftV()->is_var_shift());
24609 match(Set dst (LShiftVS (Binary dst src2) mask));
24610 match(Set dst (LShiftVI (Binary dst src2) mask));
24611 match(Set dst (LShiftVL (Binary dst src2) mask));
24612 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24613 ins_encode %{
24614 int vlen_enc = vector_length_encoding(this);
24615 BasicType bt = Matcher::vector_element_basic_type(this);
24616 int opc = this->ideal_Opcode();
24617 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24618 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24619 %}
24620 ins_pipe( pipe_slow );
24621 %}
24622
24623 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24624 predicate(n->as_ShiftV()->is_var_shift());
24625 match(Set dst (LShiftVS (Binary dst src2) mask));
24626 match(Set dst (LShiftVI (Binary dst src2) mask));
24627 match(Set dst (LShiftVL (Binary dst src2) mask));
24628 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24629 ins_encode %{
24630 int vlen_enc = vector_length_encoding(this);
24631 BasicType bt = Matcher::vector_element_basic_type(this);
24632 int opc = this->ideal_Opcode();
24633 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24635 %}
24636 ins_pipe( pipe_slow );
24637 %}
24638
24639 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24640 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24641 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24642 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24643 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24644 ins_encode %{
24645 int vlen_enc = vector_length_encoding(this);
24646 BasicType bt = Matcher::vector_element_basic_type(this);
24647 int opc = this->ideal_Opcode();
24648 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24649 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24650 %}
24651 ins_pipe( pipe_slow );
24652 %}
24653
24654 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24655 predicate(!n->as_ShiftV()->is_var_shift());
24656 match(Set dst (RShiftVS (Binary dst src2) mask));
24657 match(Set dst (RShiftVI (Binary dst src2) mask));
24658 match(Set dst (RShiftVL (Binary dst src2) mask));
24659 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24660 ins_encode %{
24661 int vlen_enc = vector_length_encoding(this);
24662 BasicType bt = Matcher::vector_element_basic_type(this);
24663 int opc = this->ideal_Opcode();
24664 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24665 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24666 %}
24667 ins_pipe( pipe_slow );
24668 %}
24669
24670 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24671 predicate(n->as_ShiftV()->is_var_shift());
24672 match(Set dst (RShiftVS (Binary dst src2) mask));
24673 match(Set dst (RShiftVI (Binary dst src2) mask));
24674 match(Set dst (RShiftVL (Binary dst src2) mask));
24675 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24676 ins_encode %{
24677 int vlen_enc = vector_length_encoding(this);
24678 BasicType bt = Matcher::vector_element_basic_type(this);
24679 int opc = this->ideal_Opcode();
24680 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24681 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24682 %}
24683 ins_pipe( pipe_slow );
24684 %}
24685
24686 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24687 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24688 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24689 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24690 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24691 ins_encode %{
24692 int vlen_enc = vector_length_encoding(this);
24693 BasicType bt = Matcher::vector_element_basic_type(this);
24694 int opc = this->ideal_Opcode();
24695 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24696 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24697 %}
24698 ins_pipe( pipe_slow );
24699 %}
24700
24701 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24702 predicate(!n->as_ShiftV()->is_var_shift());
24703 match(Set dst (URShiftVS (Binary dst src2) mask));
24704 match(Set dst (URShiftVI (Binary dst src2) mask));
24705 match(Set dst (URShiftVL (Binary dst src2) mask));
24706 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24707 ins_encode %{
24708 int vlen_enc = vector_length_encoding(this);
24709 BasicType bt = Matcher::vector_element_basic_type(this);
24710 int opc = this->ideal_Opcode();
24711 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24712 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24713 %}
24714 ins_pipe( pipe_slow );
24715 %}
24716
24717 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24718 predicate(n->as_ShiftV()->is_var_shift());
24719 match(Set dst (URShiftVS (Binary dst src2) mask));
24720 match(Set dst (URShiftVI (Binary dst src2) mask));
24721 match(Set dst (URShiftVL (Binary dst src2) mask));
24722 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24723 ins_encode %{
24724 int vlen_enc = vector_length_encoding(this);
24725 BasicType bt = Matcher::vector_element_basic_type(this);
24726 int opc = this->ideal_Opcode();
24727 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24728 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24729 %}
24730 ins_pipe( pipe_slow );
24731 %}
24732
24733 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24734 match(Set dst (MaxV (Binary dst src2) mask));
24735 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24736 ins_encode %{
24737 int vlen_enc = vector_length_encoding(this);
24738 BasicType bt = Matcher::vector_element_basic_type(this);
24739 int opc = this->ideal_Opcode();
24740 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24741 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24742 %}
24743 ins_pipe( pipe_slow );
24744 %}
24745
24746 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24747 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24748 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24749 ins_encode %{
24750 int vlen_enc = vector_length_encoding(this);
24751 BasicType bt = Matcher::vector_element_basic_type(this);
24752 int opc = this->ideal_Opcode();
24753 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24754 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24755 %}
24756 ins_pipe( pipe_slow );
24757 %}
24758
24759 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24760 match(Set dst (MinV (Binary dst src2) mask));
24761 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24762 ins_encode %{
24763 int vlen_enc = vector_length_encoding(this);
24764 BasicType bt = Matcher::vector_element_basic_type(this);
24765 int opc = this->ideal_Opcode();
24766 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24767 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24768 %}
24769 ins_pipe( pipe_slow );
24770 %}
24771
24772 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24773 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24774 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24775 ins_encode %{
24776 int vlen_enc = vector_length_encoding(this);
24777 BasicType bt = Matcher::vector_element_basic_type(this);
24778 int opc = this->ideal_Opcode();
24779 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24780 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24781 %}
24782 ins_pipe( pipe_slow );
24783 %}
24784
24785 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24786 match(Set dst (VectorRearrange (Binary dst src2) mask));
24787 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24788 ins_encode %{
24789 int vlen_enc = vector_length_encoding(this);
24790 BasicType bt = Matcher::vector_element_basic_type(this);
24791 int opc = this->ideal_Opcode();
24792 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24793 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24794 %}
24795 ins_pipe( pipe_slow );
24796 %}
24797
24798 instruct vabs_masked(vec dst, kReg mask) %{
24799 match(Set dst (AbsVB dst mask));
24800 match(Set dst (AbsVS dst mask));
24801 match(Set dst (AbsVI dst mask));
24802 match(Set dst (AbsVL dst mask));
24803 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24804 ins_encode %{
24805 int vlen_enc = vector_length_encoding(this);
24806 BasicType bt = Matcher::vector_element_basic_type(this);
24807 int opc = this->ideal_Opcode();
24808 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24809 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24810 %}
24811 ins_pipe( pipe_slow );
24812 %}
24813
24814 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24815 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24816 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24817 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24818 ins_encode %{
24819 assert(UseFMA, "Needs FMA instructions support.");
24820 int vlen_enc = vector_length_encoding(this);
24821 BasicType bt = Matcher::vector_element_basic_type(this);
24822 int opc = this->ideal_Opcode();
24823 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24824 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24825 %}
24826 ins_pipe( pipe_slow );
24827 %}
24828
24829 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24830 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24831 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24832 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24833 ins_encode %{
24834 assert(UseFMA, "Needs FMA instructions support.");
24835 int vlen_enc = vector_length_encoding(this);
24836 BasicType bt = Matcher::vector_element_basic_type(this);
24837 int opc = this->ideal_Opcode();
24838 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24839 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24840 %}
24841 ins_pipe( pipe_slow );
24842 %}
24843
24844 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24845 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24846 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24847 ins_encode %{
24848 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24849 int vlen_enc = vector_length_encoding(this, $src1);
24850 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24851
24852 // Comparison i
24853 switch (src1_elem_bt) {
24854 case T_BYTE: {
24855 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24856 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24857 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24858 break;
24859 }
24860 case T_SHORT: {
24861 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24862 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24863 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24864 break;
24865 }
24866 case T_INT: {
24867 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24868 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24869 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24870 break;
24871 }
24872 case T_LONG: {
24873 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24874 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24875 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24876 break;
24877 }
24878 case T_FLOAT: {
24879 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24880 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24881 break;
24882 }
24883 case T_DOUBLE: {
24884 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24885 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24886 break;
24887 }
24888 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24889 }
24890 %}
24891 ins_pipe( pipe_slow );
24892 %}
24893
24894 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24895 predicate(Matcher::vector_length(n) <= 32);
24896 match(Set dst (MaskAll src));
24897 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24898 ins_encode %{
24899 int mask_len = Matcher::vector_length(this);
24900 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24901 %}
24902 ins_pipe( pipe_slow );
24903 %}
24904
24905 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24906 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24907 match(Set dst (XorVMask src (MaskAll cnt)));
24908 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24909 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24910 ins_encode %{
24911 uint masklen = Matcher::vector_length(this);
24912 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24913 %}
24914 ins_pipe( pipe_slow );
24915 %}
24916
24917 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24918 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24919 (Matcher::vector_length(n) == 16) ||
24920 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24921 match(Set dst (XorVMask src (MaskAll cnt)));
24922 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24923 ins_encode %{
24924 uint masklen = Matcher::vector_length(this);
24925 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24926 %}
24927 ins_pipe( pipe_slow );
24928 %}
24929
24930 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24931 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24932 match(Set dst (VectorLongToMask src));
24933 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24934 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24935 ins_encode %{
24936 int mask_len = Matcher::vector_length(this);
24937 int vec_enc = vector_length_encoding(mask_len);
24938 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24939 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24940 %}
24941 ins_pipe( pipe_slow );
24942 %}
24943
24944
24945 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24946 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24947 match(Set dst (VectorLongToMask src));
24948 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24949 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24950 ins_encode %{
24951 int mask_len = Matcher::vector_length(this);
24952 assert(mask_len <= 32, "invalid mask length");
24953 int vec_enc = vector_length_encoding(mask_len);
24954 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24955 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24956 %}
24957 ins_pipe( pipe_slow );
24958 %}
24959
24960 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24961 predicate(n->bottom_type()->isa_pvectmask());
24962 match(Set dst (VectorLongToMask src));
24963 format %{ "long_to_mask_evex $dst, $src\t!" %}
24964 ins_encode %{
24965 __ kmov($dst$$KRegister, $src$$Register);
24966 %}
24967 ins_pipe( pipe_slow );
24968 %}
24969
24970 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24971 match(Set dst (AndVMask src1 src2));
24972 match(Set dst (OrVMask src1 src2));
24973 match(Set dst (XorVMask src1 src2));
24974 effect(TEMP kscratch);
24975 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24976 ins_encode %{
24977 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24978 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24979 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24980 uint masklen = Matcher::vector_length(this);
24981 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24982 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24983 %}
24984 ins_pipe( pipe_slow );
24985 %}
24986
24987 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24988 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24989 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24990 ins_encode %{
24991 int vlen_enc = vector_length_encoding(this);
24992 BasicType bt = Matcher::vector_element_basic_type(this);
24993 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24994 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24995 %}
24996 ins_pipe( pipe_slow );
24997 %}
24998
24999 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25000 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25001 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25002 ins_encode %{
25003 int vlen_enc = vector_length_encoding(this);
25004 BasicType bt = Matcher::vector_element_basic_type(this);
25005 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25006 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25007 %}
25008 ins_pipe( pipe_slow );
25009 %}
25010
25011 instruct castMM(kReg dst)
25012 %{
25013 match(Set dst (CastVV dst));
25014
25015 size(0);
25016 format %{ "# castVV of $dst" %}
25017 ins_encode(/* empty encoding */);
25018 ins_cost(0);
25019 ins_pipe(empty);
25020 %}
25021
25022 instruct castVV(vec dst)
25023 %{
25024 match(Set dst (CastVV dst));
25025
25026 size(0);
25027 format %{ "# castVV of $dst" %}
25028 ins_encode(/* empty encoding */);
25029 ins_cost(0);
25030 ins_pipe(empty);
25031 %}
25032
25033 instruct castVVLeg(legVec dst)
25034 %{
25035 match(Set dst (CastVV dst));
25036
25037 size(0);
25038 format %{ "# castVV of $dst" %}
25039 ins_encode(/* empty encoding */);
25040 ins_cost(0);
25041 ins_pipe(empty);
25042 %}
25043
25044 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25045 %{
25046 match(Set dst (IsInfiniteF src));
25047 effect(TEMP ktmp, KILL cr);
25048 format %{ "float_class_check $dst, $src" %}
25049 ins_encode %{
25050 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25051 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25052 %}
25053 ins_pipe(pipe_slow);
25054 %}
25055
25056 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25057 %{
25058 match(Set dst (IsInfiniteD src));
25059 effect(TEMP ktmp, KILL cr);
25060 format %{ "double_class_check $dst, $src" %}
25061 ins_encode %{
25062 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25063 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25064 %}
25065 ins_pipe(pipe_slow);
25066 %}
25067
25068 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25069 %{
25070 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25071 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25072 match(Set dst (SaturatingAddV src1 src2));
25073 match(Set dst (SaturatingSubV src1 src2));
25074 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25075 ins_encode %{
25076 int vlen_enc = vector_length_encoding(this);
25077 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25078 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25079 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25080 %}
25081 ins_pipe(pipe_slow);
25082 %}
25083
25084 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25085 %{
25086 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25087 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25088 match(Set dst (SaturatingAddV src1 src2));
25089 match(Set dst (SaturatingSubV src1 src2));
25090 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25091 ins_encode %{
25092 int vlen_enc = vector_length_encoding(this);
25093 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25094 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25095 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25096 %}
25097 ins_pipe(pipe_slow);
25098 %}
25099
25100 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25101 %{
25102 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25103 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25104 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25105 match(Set dst (SaturatingAddV src1 src2));
25106 match(Set dst (SaturatingSubV src1 src2));
25107 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25108 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25109 ins_encode %{
25110 int vlen_enc = vector_length_encoding(this);
25111 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25112 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25113 $src1$$XMMRegister, $src2$$XMMRegister,
25114 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25115 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25116 %}
25117 ins_pipe(pipe_slow);
25118 %}
25119
25120 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25121 %{
25122 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25123 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25124 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25125 match(Set dst (SaturatingAddV src1 src2));
25126 match(Set dst (SaturatingSubV src1 src2));
25127 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25128 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25129 ins_encode %{
25130 int vlen_enc = vector_length_encoding(this);
25131 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25132 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25133 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25134 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25135 %}
25136 ins_pipe(pipe_slow);
25137 %}
25138
25139 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25140 %{
25141 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25142 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25143 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25144 match(Set dst (SaturatingAddV src1 src2));
25145 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25146 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25147 ins_encode %{
25148 int vlen_enc = vector_length_encoding(this);
25149 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25150 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25151 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25152 %}
25153 ins_pipe(pipe_slow);
25154 %}
25155
25156 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25157 %{
25158 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25159 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25160 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25161 match(Set dst (SaturatingAddV src1 src2));
25162 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25163 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25164 ins_encode %{
25165 int vlen_enc = vector_length_encoding(this);
25166 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25167 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25168 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25169 %}
25170 ins_pipe(pipe_slow);
25171 %}
25172
25173 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25174 %{
25175 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25176 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25177 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25178 match(Set dst (SaturatingSubV src1 src2));
25179 effect(TEMP ktmp);
25180 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25181 ins_encode %{
25182 int vlen_enc = vector_length_encoding(this);
25183 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25184 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25185 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25186 %}
25187 ins_pipe(pipe_slow);
25188 %}
25189
25190 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25191 %{
25192 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25193 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25194 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25195 match(Set dst (SaturatingSubV src1 src2));
25196 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25197 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25198 ins_encode %{
25199 int vlen_enc = vector_length_encoding(this);
25200 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25201 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25202 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25203 %}
25204 ins_pipe(pipe_slow);
25205 %}
25206
25207 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25208 %{
25209 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25210 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25211 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25212 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25213 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25214 ins_encode %{
25215 int vlen_enc = vector_length_encoding(this);
25216 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25217 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25218 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25219 %}
25220 ins_pipe(pipe_slow);
25221 %}
25222
25223 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25224 %{
25225 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25226 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25227 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25228 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25229 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25230 ins_encode %{
25231 int vlen_enc = vector_length_encoding(this);
25232 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25233 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25234 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25235 %}
25236 ins_pipe(pipe_slow);
25237 %}
25238
25239 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25240 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25241 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25242 match(Set dst (SaturatingAddV (Binary dst src) mask));
25243 match(Set dst (SaturatingSubV (Binary dst src) mask));
25244 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25245 ins_encode %{
25246 int vlen_enc = vector_length_encoding(this);
25247 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25248 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25249 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25250 %}
25251 ins_pipe( pipe_slow );
25252 %}
25253
25254 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25255 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25256 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25257 match(Set dst (SaturatingAddV (Binary dst src) mask));
25258 match(Set dst (SaturatingSubV (Binary dst src) mask));
25259 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25260 ins_encode %{
25261 int vlen_enc = vector_length_encoding(this);
25262 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25263 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25264 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25265 %}
25266 ins_pipe( pipe_slow );
25267 %}
25268
25269 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25270 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25271 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25272 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25273 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25274 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25275 ins_encode %{
25276 int vlen_enc = vector_length_encoding(this);
25277 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25278 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25279 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25280 %}
25281 ins_pipe( pipe_slow );
25282 %}
25283
25284 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25285 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25286 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25287 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25288 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25289 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25290 ins_encode %{
25291 int vlen_enc = vector_length_encoding(this);
25292 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25293 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25294 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25295 %}
25296 ins_pipe( pipe_slow );
25297 %}
25298
25299 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25300 %{
25301 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25302 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25303 ins_encode %{
25304 int vlen_enc = vector_length_encoding(this);
25305 BasicType bt = Matcher::vector_element_basic_type(this);
25306 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25307 %}
25308 ins_pipe(pipe_slow);
25309 %}
25310
25311 instruct reinterpretS2HF(regF dst, rRegI src)
25312 %{
25313 match(Set dst (ReinterpretS2HF src));
25314 format %{ "evmovw $dst, $src" %}
25315 ins_encode %{
25316 __ evmovw($dst$$XMMRegister, $src$$Register);
25317 %}
25318 ins_pipe(pipe_slow);
25319 %}
25320
25321 instruct reinterpretHF2S(rRegI dst, regF src)
25322 %{
25323 match(Set dst (ReinterpretHF2S src));
25324 format %{ "evmovw $dst, $src" %}
25325 ins_encode %{
25326 __ evmovw($dst$$Register, $src$$XMMRegister);
25327 __ narrow_subword_type($dst$$Register, T_SHORT);
25328 %}
25329 ins_pipe(pipe_slow);
25330 %}
25331
25332 instruct convF2HFAndS2HF(regF dst, regF src)
25333 %{
25334 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25335 format %{ "convF2HFAndS2HF $dst, $src" %}
25336 ins_encode %{
25337 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25338 %}
25339 ins_pipe(pipe_slow);
25340 %}
25341
25342 instruct convHF2SAndHF2F(regF dst, regF src)
25343 %{
25344 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25345 format %{ "convHF2SAndHF2F $dst, $src" %}
25346 ins_encode %{
25347 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25348 %}
25349 ins_pipe(pipe_slow);
25350 %}
25351
25352 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25353 %{
25354 match(Set dst (SqrtHF src));
25355 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25356 ins_encode %{
25357 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25358 %}
25359 ins_pipe(pipe_slow);
25360 %}
25361
25362 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25363 %{
25364 match(Set dst (AddHF src1 src2));
25365 match(Set dst (DivHF src1 src2));
25366 match(Set dst (MulHF src1 src2));
25367 match(Set dst (SubHF src1 src2));
25368 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25369 ins_encode %{
25370 int opcode = this->ideal_Opcode();
25371 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25372 %}
25373 ins_pipe(pipe_slow);
25374 %}
25375
25376 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25377 %{
25378 predicate(VM_Version::supports_avx10_2());
25379 match(Set dst (MaxHF src1 src2));
25380 match(Set dst (MinHF src1 src2));
25381
25382 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25383 ins_encode %{
25384 int opcode = this->ideal_Opcode();
25385 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25386 %}
25387 ins_pipe( pipe_slow );
25388 %}
25389
25390 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25391 %{
25392 predicate(!VM_Version::supports_avx10_2());
25393 match(Set dst (MaxHF src1 src2));
25394 match(Set dst (MinHF src1 src2));
25395 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25396
25397 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25398 ins_encode %{
25399 int opcode = this->ideal_Opcode();
25400 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25401 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25402 %}
25403 ins_pipe( pipe_slow );
25404 %}
25405
25406 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25407 %{
25408 match(Set dst (FmaHF src2 (Binary dst src1)));
25409 effect(DEF dst);
25410 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25411 ins_encode %{
25412 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25413 %}
25414 ins_pipe( pipe_slow );
25415 %}
25416
25417
25418 instruct vector_sqrt_HF_reg(vec dst, vec src)
25419 %{
25420 match(Set dst (SqrtVHF src));
25421 format %{ "vector_sqrt_fp16 $dst, $src" %}
25422 ins_encode %{
25423 int vlen_enc = vector_length_encoding(this);
25424 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25425 %}
25426 ins_pipe(pipe_slow);
25427 %}
25428
25429 instruct vector_sqrt_HF_mem(vec dst, memory src)
25430 %{
25431 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25432 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25433 ins_encode %{
25434 int vlen_enc = vector_length_encoding(this);
25435 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25436 %}
25437 ins_pipe(pipe_slow);
25438 %}
25439
25440 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25441 %{
25442 match(Set dst (AddVHF src1 src2));
25443 match(Set dst (DivVHF src1 src2));
25444 match(Set dst (MulVHF src1 src2));
25445 match(Set dst (SubVHF src1 src2));
25446 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25447 ins_encode %{
25448 int vlen_enc = vector_length_encoding(this);
25449 int opcode = this->ideal_Opcode();
25450 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25451 %}
25452 ins_pipe(pipe_slow);
25453 %}
25454
25455
25456 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25457 %{
25458 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25459 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25460 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25461 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25462 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25463 ins_encode %{
25464 int vlen_enc = vector_length_encoding(this);
25465 int opcode = this->ideal_Opcode();
25466 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25467 %}
25468 ins_pipe(pipe_slow);
25469 %}
25470
25471 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25472 %{
25473 match(Set dst (FmaVHF src2 (Binary dst src1)));
25474 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25475 ins_encode %{
25476 int vlen_enc = vector_length_encoding(this);
25477 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25478 %}
25479 ins_pipe( pipe_slow );
25480 %}
25481
25482 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25483 %{
25484 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25485 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25486 ins_encode %{
25487 int vlen_enc = vector_length_encoding(this);
25488 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25489 %}
25490 ins_pipe( pipe_slow );
25491 %}
25492
25493 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25494 %{
25495 predicate(VM_Version::supports_avx10_2());
25496 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25497 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25498 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25499 ins_encode %{
25500 int vlen_enc = vector_length_encoding(this);
25501 int opcode = this->ideal_Opcode();
25502 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25503 k0, vlen_enc);
25504 %}
25505 ins_pipe( pipe_slow );
25506 %}
25507
25508 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25509 %{
25510 predicate(VM_Version::supports_avx10_2());
25511 match(Set dst (MinVHF src1 src2));
25512 match(Set dst (MaxVHF src1 src2));
25513 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25514 ins_encode %{
25515 int vlen_enc = vector_length_encoding(this);
25516 int opcode = this->ideal_Opcode();
25517 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25518 k0, vlen_enc);
25519 %}
25520 ins_pipe( pipe_slow );
25521 %}
25522
25523 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25524 %{
25525 predicate(!VM_Version::supports_avx10_2());
25526 match(Set dst (MinVHF src1 src2));
25527 match(Set dst (MaxVHF src1 src2));
25528 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25529 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25530 ins_encode %{
25531 int vlen_enc = vector_length_encoding(this);
25532 int opcode = this->ideal_Opcode();
25533 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25534 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25535 %}
25536 ins_pipe( pipe_slow );
25537 %}
25538
25539 //----------PEEPHOLE RULES-----------------------------------------------------
25540 // These must follow all instruction definitions as they use the names
25541 // defined in the instructions definitions.
25542 //
25543 // peeppredicate ( rule_predicate );
25544 // // the predicate unless which the peephole rule will be ignored
25545 //
25546 // peepmatch ( root_instr_name [preceding_instruction]* );
25547 //
25548 // peepprocedure ( procedure_name );
25549 // // provide a procedure name to perform the optimization, the procedure should
25550 // // reside in the architecture dependent peephole file, the method has the
25551 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25552 // // with the arguments being the basic block, the current node index inside the
25553 // // block, the register allocator, the functions upon invoked return a new node
25554 // // defined in peepreplace, and the rules of the nodes appearing in the
25555 // // corresponding peepmatch, the function return true if successful, else
25556 // // return false
25557 //
25558 // peepconstraint %{
25559 // (instruction_number.operand_name relational_op instruction_number.operand_name
25560 // [, ...] );
25561 // // instruction numbers are zero-based using left to right order in peepmatch
25562 //
25563 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25564 // // provide an instruction_number.operand_name for each operand that appears
25565 // // in the replacement instruction's match rule
25566 //
25567 // ---------VM FLAGS---------------------------------------------------------
25568 //
25569 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25570 //
25571 // Each peephole rule is given an identifying number starting with zero and
25572 // increasing by one in the order seen by the parser. An individual peephole
25573 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25574 // on the command-line.
25575 //
25576 // ---------CURRENT LIMITATIONS----------------------------------------------
25577 //
25578 // Only transformations inside a basic block (do we need more for peephole)
25579 //
25580 // ---------EXAMPLE----------------------------------------------------------
25581 //
25582 // // pertinent parts of existing instructions in architecture description
25583 // instruct movI(rRegI dst, rRegI src)
25584 // %{
25585 // match(Set dst (CopyI src));
25586 // %}
25587 //
25588 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25589 // %{
25590 // match(Set dst (AddI dst src));
25591 // effect(KILL cr);
25592 // %}
25593 //
25594 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25595 // %{
25596 // match(Set dst (AddI dst src));
25597 // %}
25598 //
25599 // 1. Simple replacement
25600 // - Only match adjacent instructions in same basic block
25601 // - Only equality constraints
25602 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25603 // - Only one replacement instruction
25604 //
25605 // // Change (inc mov) to lea
25606 // peephole %{
25607 // // lea should only be emitted when beneficial
25608 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25609 // // increment preceded by register-register move
25610 // peepmatch ( incI_rReg movI );
25611 // // require that the destination register of the increment
25612 // // match the destination register of the move
25613 // peepconstraint ( 0.dst == 1.dst );
25614 // // construct a replacement instruction that sets
25615 // // the destination to ( move's source register + one )
25616 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25617 // %}
25618 //
25619 // 2. Procedural replacement
25620 // - More flexible finding relevent nodes
25621 // - More flexible constraints
25622 // - More flexible transformations
25623 // - May utilise architecture-dependent API more effectively
25624 // - Currently only one replacement instruction due to adlc parsing capabilities
25625 //
25626 // // Change (inc mov) to lea
25627 // peephole %{
25628 // // lea should only be emitted when beneficial
25629 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25630 // // the rule numbers of these nodes inside are passed into the function below
25631 // peepmatch ( incI_rReg movI );
25632 // // the method that takes the responsibility of transformation
25633 // peepprocedure ( inc_mov_to_lea );
25634 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25635 // // node is passed into the function above
25636 // peepreplace ( leaI_rReg_immI() );
25637 // %}
25638
25639 // These instructions is not matched by the matcher but used by the peephole
25640 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25641 %{
25642 predicate(false);
25643 match(Set dst (AddI src1 src2));
25644 format %{ "leal $dst, [$src1 + $src2]" %}
25645 ins_encode %{
25646 Register dst = $dst$$Register;
25647 Register src1 = $src1$$Register;
25648 Register src2 = $src2$$Register;
25649 if (src1 != rbp && src1 != r13) {
25650 __ leal(dst, Address(src1, src2, Address::times_1));
25651 } else {
25652 assert(src2 != rbp && src2 != r13, "");
25653 __ leal(dst, Address(src2, src1, Address::times_1));
25654 }
25655 %}
25656 ins_pipe(ialu_reg_reg);
25657 %}
25658
25659 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25660 %{
25661 predicate(false);
25662 match(Set dst (AddI src1 src2));
25663 format %{ "leal $dst, [$src1 + $src2]" %}
25664 ins_encode %{
25665 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25666 %}
25667 ins_pipe(ialu_reg_reg);
25668 %}
25669
25670 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25671 %{
25672 predicate(false);
25673 match(Set dst (LShiftI src shift));
25674 format %{ "leal $dst, [$src << $shift]" %}
25675 ins_encode %{
25676 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25677 Register src = $src$$Register;
25678 if (scale == Address::times_2 && src != rbp && src != r13) {
25679 __ leal($dst$$Register, Address(src, src, Address::times_1));
25680 } else {
25681 __ leal($dst$$Register, Address(noreg, src, scale));
25682 }
25683 %}
25684 ins_pipe(ialu_reg_reg);
25685 %}
25686
25687 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25688 %{
25689 predicate(false);
25690 match(Set dst (AddL src1 src2));
25691 format %{ "leaq $dst, [$src1 + $src2]" %}
25692 ins_encode %{
25693 Register dst = $dst$$Register;
25694 Register src1 = $src1$$Register;
25695 Register src2 = $src2$$Register;
25696 if (src1 != rbp && src1 != r13) {
25697 __ leaq(dst, Address(src1, src2, Address::times_1));
25698 } else {
25699 assert(src2 != rbp && src2 != r13, "");
25700 __ leaq(dst, Address(src2, src1, Address::times_1));
25701 }
25702 %}
25703 ins_pipe(ialu_reg_reg);
25704 %}
25705
25706 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25707 %{
25708 predicate(false);
25709 match(Set dst (AddL src1 src2));
25710 format %{ "leaq $dst, [$src1 + $src2]" %}
25711 ins_encode %{
25712 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25713 %}
25714 ins_pipe(ialu_reg_reg);
25715 %}
25716
25717 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25718 %{
25719 predicate(false);
25720 match(Set dst (LShiftL src shift));
25721 format %{ "leaq $dst, [$src << $shift]" %}
25722 ins_encode %{
25723 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25724 Register src = $src$$Register;
25725 if (scale == Address::times_2 && src != rbp && src != r13) {
25726 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25727 } else {
25728 __ leaq($dst$$Register, Address(noreg, src, scale));
25729 }
25730 %}
25731 ins_pipe(ialu_reg_reg);
25732 %}
25733
25734 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25735 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25736 // processors with at least partial ALU support for lea
25737 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25738 // beneficial for processors with full ALU support
25739 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25740
25741 peephole
25742 %{
25743 peeppredicate(VM_Version::supports_fast_2op_lea());
25744 peepmatch (addI_rReg);
25745 peepprocedure (lea_coalesce_reg);
25746 peepreplace (leaI_rReg_rReg_peep());
25747 %}
25748
25749 peephole
25750 %{
25751 peeppredicate(VM_Version::supports_fast_2op_lea());
25752 peepmatch (addI_rReg_imm);
25753 peepprocedure (lea_coalesce_imm);
25754 peepreplace (leaI_rReg_immI_peep());
25755 %}
25756
25757 peephole
25758 %{
25759 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25760 VM_Version::is_intel_cascade_lake());
25761 peepmatch (incI_rReg);
25762 peepprocedure (lea_coalesce_imm);
25763 peepreplace (leaI_rReg_immI_peep());
25764 %}
25765
25766 peephole
25767 %{
25768 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25769 VM_Version::is_intel_cascade_lake());
25770 peepmatch (decI_rReg);
25771 peepprocedure (lea_coalesce_imm);
25772 peepreplace (leaI_rReg_immI_peep());
25773 %}
25774
25775 peephole
25776 %{
25777 peeppredicate(VM_Version::supports_fast_2op_lea());
25778 peepmatch (salI_rReg_immI2);
25779 peepprocedure (lea_coalesce_imm);
25780 peepreplace (leaI_rReg_immI2_peep());
25781 %}
25782
25783 peephole
25784 %{
25785 peeppredicate(VM_Version::supports_fast_2op_lea());
25786 peepmatch (addL_rReg);
25787 peepprocedure (lea_coalesce_reg);
25788 peepreplace (leaL_rReg_rReg_peep());
25789 %}
25790
25791 peephole
25792 %{
25793 peeppredicate(VM_Version::supports_fast_2op_lea());
25794 peepmatch (addL_rReg_imm);
25795 peepprocedure (lea_coalesce_imm);
25796 peepreplace (leaL_rReg_immL32_peep());
25797 %}
25798
25799 peephole
25800 %{
25801 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25802 VM_Version::is_intel_cascade_lake());
25803 peepmatch (incL_rReg);
25804 peepprocedure (lea_coalesce_imm);
25805 peepreplace (leaL_rReg_immL32_peep());
25806 %}
25807
25808 peephole
25809 %{
25810 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25811 VM_Version::is_intel_cascade_lake());
25812 peepmatch (decL_rReg);
25813 peepprocedure (lea_coalesce_imm);
25814 peepreplace (leaL_rReg_immL32_peep());
25815 %}
25816
25817 peephole
25818 %{
25819 peeppredicate(VM_Version::supports_fast_2op_lea());
25820 peepmatch (salL_rReg_immI2);
25821 peepprocedure (lea_coalesce_imm);
25822 peepreplace (leaL_rReg_immI2_peep());
25823 %}
25824
25825 peephole
25826 %{
25827 peepmatch (leaPCompressedOopOffset);
25828 peepprocedure (lea_remove_redundant);
25829 %}
25830
25831 peephole
25832 %{
25833 peepmatch (leaP8Narrow);
25834 peepprocedure (lea_remove_redundant);
25835 %}
25836
25837 peephole
25838 %{
25839 peepmatch (leaP32Narrow);
25840 peepprocedure (lea_remove_redundant);
25841 %}
25842
25843 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25844 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25845
25846 //int variant
25847 peephole
25848 %{
25849 peepmatch (testI_reg);
25850 peepprocedure (test_may_remove);
25851 %}
25852
25853 //long variant
25854 peephole
25855 %{
25856 peepmatch (testL_reg);
25857 peepprocedure (test_may_remove);
25858 %}
25859
25860
25861 //----------SMARTSPILL RULES---------------------------------------------------
25862 // These must follow all instruction definitions as they use the names
25863 // defined in the instructions definitions.