1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
2649 // this method should return false for offset 0.
2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2651 // The passed offset is relative to address of the branch.
2652 // On 86 a branch displacement is calculated relative to address
2653 // of a next instruction.
2654 offset -= br_size;
2655
2656 // the short version of jmpConUCF2 contains multiple branches,
2657 // making the reach slightly less
2658 if (rule == jmpConUCF2_rule)
2659 return (-126 <= offset && offset <= 125);
2660 return (-128 <= offset && offset <= 127);
2661 }
2662
2663 // Return whether or not this register is ever used as an argument.
2664 // This function is used on startup to build the trampoline stubs in
2665 // generateOptoStub. Registers not mentioned will be killed by the VM
2666 // call in the trampoline, and arguments in those registers not be
2667 // available to the callee.
2668 bool Matcher::can_be_java_arg(int reg)
2669 {
2670 return
2671 reg == RDI_num || reg == RDI_H_num ||
2672 reg == RSI_num || reg == RSI_H_num ||
2673 reg == RDX_num || reg == RDX_H_num ||
2674 reg == RCX_num || reg == RCX_H_num ||
2675 reg == R8_num || reg == R8_H_num ||
2676 reg == R9_num || reg == R9_H_num ||
2677 reg == R12_num || reg == R12_H_num ||
2678 reg == XMM0_num || reg == XMM0b_num ||
2679 reg == XMM1_num || reg == XMM1b_num ||
2680 reg == XMM2_num || reg == XMM2b_num ||
2681 reg == XMM3_num || reg == XMM3b_num ||
2682 reg == XMM4_num || reg == XMM4b_num ||
2683 reg == XMM5_num || reg == XMM5b_num ||
2684 reg == XMM6_num || reg == XMM6b_num ||
2685 reg == XMM7_num || reg == XMM7b_num;
2686 }
2687
2688 bool Matcher::is_spillable_arg(int reg)
2689 {
2690 return can_be_java_arg(reg);
2691 }
2692
2693 uint Matcher::int_pressure_limit()
2694 {
2695 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2696 }
2697
2698 uint Matcher::float_pressure_limit()
2699 {
2700 // After experiment around with different values, the following default threshold
2701 // works best for LCM's register pressure scheduling on x64.
2702 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2703 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2704 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2705 }
2706
2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2708 // In 64 bit mode a code which use multiply when
2709 // devisor is constant is faster than hardware
2710 // DIV instruction (it uses MulHiL).
2711 return false;
2712 }
2713
2714 // Register for DIVI projection of divmodI
2715 const RegMask& Matcher::divI_proj_mask() {
2716 return INT_RAX_REG_mask();
2717 }
2718
2719 // Register for MODI projection of divmodI
2720 const RegMask& Matcher::modI_proj_mask() {
2721 return INT_RDX_REG_mask();
2722 }
2723
2724 // Register for DIVL projection of divmodL
2725 const RegMask& Matcher::divL_proj_mask() {
2726 return LONG_RAX_REG_mask();
2727 }
2728
2729 // Register for MODL projection of divmodL
2730 const RegMask& Matcher::modL_proj_mask() {
2731 return LONG_RDX_REG_mask();
2732 }
2733
2734 %}
2735
2736 source_hpp %{
2737 // Header information of the source block.
2738 // Method declarations/definitions which are used outside
2739 // the ad-scope can conveniently be defined here.
2740 //
2741 // To keep related declarations/definitions/uses close together,
2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2743
2744 #include "runtime/vm_version.hpp"
2745
2746 class NativeJump;
2747
2748 class CallStubImpl {
2749
2750 //--------------------------------------------------------------
2751 //---< Used for optimization in Compile::shorten_branches >---
2752 //--------------------------------------------------------------
2753
2754 public:
2755 // Size of call trampoline stub.
2756 static uint size_call_trampoline() {
2757 return 0; // no call trampolines on this platform
2758 }
2759
2760 // number of relocations needed by a call trampoline stub
2761 static uint reloc_call_trampoline() {
2762 return 0; // no call trampolines on this platform
2763 }
2764 };
2765
2766 class HandlerImpl {
2767
2768 public:
2769
2770 static int emit_exception_handler(C2_MacroAssembler *masm);
2771 static int emit_deopt_handler(C2_MacroAssembler* masm);
2772
2773 static uint size_exception_handler() {
2774 // NativeCall instruction size is the same as NativeJump.
2775 // exception handler starts out as jump and can be patched to
2776 // a call be deoptimization. (4932387)
2777 // Note that this value is also credited (in output.cpp) to
2778 // the size of the code section.
2779 return NativeJump::instruction_size;
2780 }
2781
2782 static uint size_deopt_handler() {
2783 // three 5 byte instructions plus one move for unreachable address.
2784 return 15+3;
2785 }
2786 };
2787
2788 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2789 switch(bytes) {
2790 case 4: // fall-through
2791 case 8: // fall-through
2792 case 16: return Assembler::AVX_128bit;
2793 case 32: return Assembler::AVX_256bit;
2794 case 64: return Assembler::AVX_512bit;
2795
2796 default: {
2797 ShouldNotReachHere();
2798 return Assembler::AVX_NoVec;
2799 }
2800 }
2801 }
2802
2803 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2804 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2805 }
2806
2807 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2808 uint def_idx = use->operand_index(opnd);
2809 Node* def = use->in(def_idx);
2810 return vector_length_encoding(def);
2811 }
2812
2813 static inline bool is_vector_popcount_predicate(BasicType bt) {
2814 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2815 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2816 }
2817
2818 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2819 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2820 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2821 }
2822
2823 class Node::PD {
2824 public:
2825 enum NodeFlags {
2826 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2827 Flag_sets_carry_flag = Node::_last_flag << 2,
2828 Flag_sets_parity_flag = Node::_last_flag << 3,
2829 Flag_sets_zero_flag = Node::_last_flag << 4,
2830 Flag_sets_overflow_flag = Node::_last_flag << 5,
2831 Flag_sets_sign_flag = Node::_last_flag << 6,
2832 Flag_clears_carry_flag = Node::_last_flag << 7,
2833 Flag_clears_parity_flag = Node::_last_flag << 8,
2834 Flag_clears_zero_flag = Node::_last_flag << 9,
2835 Flag_clears_overflow_flag = Node::_last_flag << 10,
2836 Flag_clears_sign_flag = Node::_last_flag << 11,
2837 _last_flag = Flag_clears_sign_flag
2838 };
2839 };
2840
2841 %} // end source_hpp
2842
2843 source %{
2844
2845 #include "opto/addnode.hpp"
2846 #include "c2_intelJccErratum_x86.hpp"
2847
2848 void PhaseOutput::pd_perform_mach_node_analysis() {
2849 if (VM_Version::has_intel_jcc_erratum()) {
2850 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2851 _buf_sizes._code += extra_padding;
2852 }
2853 }
2854
2855 int MachNode::pd_alignment_required() const {
2856 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2857 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2858 return IntelJccErratum::largest_jcc_size() + 1;
2859 } else {
2860 return 1;
2861 }
2862 }
2863
2864 int MachNode::compute_padding(int current_offset) const {
2865 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2866 Compile* C = Compile::current();
2867 PhaseOutput* output = C->output();
2868 Block* block = output->block();
2869 int index = output->index();
2870 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2871 } else {
2872 return 0;
2873 }
2874 }
2875
2876 // Emit exception handler code.
2877 // Stuff framesize into a register and call a VM stub routine.
2878 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
2879
2880 // Note that the code buffer's insts_mark is always relative to insts.
2881 // That's why we must use the macroassembler to generate a handler.
2882 address base = __ start_a_stub(size_exception_handler());
2883 if (base == nullptr) {
2884 ciEnv::current()->record_failure("CodeCache is full");
2885 return 0; // CodeBuffer::expand failed
2886 }
2887 int offset = __ offset();
2888 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2889 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2890 __ end_a_stub();
2891 return offset;
2892 }
2893
2894 // Emit deopt handler code.
2895 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2896
2897 // Note that the code buffer's insts_mark is always relative to insts.
2898 // That's why we must use the macroassembler to generate a handler.
2899 address base = __ start_a_stub(size_deopt_handler());
2900 if (base == nullptr) {
2901 ciEnv::current()->record_failure("CodeCache is full");
2902 return 0; // CodeBuffer::expand failed
2903 }
2904 int offset = __ offset();
2905
2906 address the_pc = (address) __ pc();
2907 Label next;
2908 // push a "the_pc" on the stack without destroying any registers
2909 // as they all may be live.
2910
2911 // push address of "next"
2912 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
2913 __ bind(next);
2914 // adjust it so it matches "the_pc"
2915 __ subptr(Address(rsp, 0), __ offset() - offset);
2916
2917 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2918 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2919 __ end_a_stub();
2920 return offset;
2921 }
2922
2923 static Assembler::Width widthForType(BasicType bt) {
2924 if (bt == T_BYTE) {
2925 return Assembler::B;
2926 } else if (bt == T_SHORT) {
2927 return Assembler::W;
2928 } else if (bt == T_INT) {
2929 return Assembler::D;
2930 } else {
2931 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2932 return Assembler::Q;
2933 }
2934 }
2935
2936 //=============================================================================
2937
2938 // Float masks come from different places depending on platform.
2939 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2940 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2941 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2942 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2943 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2944 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2945 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2946 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2947 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2948 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2949 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2950 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2951 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2952 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2953 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2954 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2955 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2956 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2957 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2958
2959 //=============================================================================
2960 bool Matcher::match_rule_supported(int opcode) {
2961 if (!has_match_rule(opcode)) {
2962 return false; // no match rule present
2963 }
2964 switch (opcode) {
2965 case Op_AbsVL:
2966 case Op_StoreVectorScatter:
2967 if (UseAVX < 3) {
2968 return false;
2969 }
2970 break;
2971 case Op_PopCountI:
2972 case Op_PopCountL:
2973 if (!UsePopCountInstruction) {
2974 return false;
2975 }
2976 break;
2977 case Op_PopCountVI:
2978 if (UseAVX < 2) {
2979 return false;
2980 }
2981 break;
2982 case Op_CompressV:
2983 case Op_ExpandV:
2984 case Op_PopCountVL:
2985 if (UseAVX < 2) {
2986 return false;
2987 }
2988 break;
2989 case Op_MulVI:
2990 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
2991 return false;
2992 }
2993 break;
2994 case Op_MulVL:
2995 if (UseSSE < 4) { // only with SSE4_1 or AVX
2996 return false;
2997 }
2998 break;
2999 case Op_MulReductionVL:
3000 if (VM_Version::supports_avx512dq() == false) {
3001 return false;
3002 }
3003 break;
3004 case Op_AbsVB:
3005 case Op_AbsVS:
3006 case Op_AbsVI:
3007 case Op_AddReductionVI:
3008 case Op_AndReductionV:
3009 case Op_OrReductionV:
3010 case Op_XorReductionV:
3011 if (UseSSE < 3) { // requires at least SSSE3
3012 return false;
3013 }
3014 break;
3015 case Op_MaxHF:
3016 case Op_MinHF:
3017 if (!VM_Version::supports_avx512vlbw()) {
3018 return false;
3019 } // fallthrough
3020 case Op_AddHF:
3021 case Op_DivHF:
3022 case Op_FmaHF:
3023 case Op_MulHF:
3024 case Op_ReinterpretS2HF:
3025 case Op_ReinterpretHF2S:
3026 case Op_SubHF:
3027 case Op_SqrtHF:
3028 if (!VM_Version::supports_avx512_fp16()) {
3029 return false;
3030 }
3031 break;
3032 case Op_VectorLoadShuffle:
3033 case Op_VectorRearrange:
3034 case Op_MulReductionVI:
3035 if (UseSSE < 4) { // requires at least SSE4
3036 return false;
3037 }
3038 break;
3039 case Op_IsInfiniteF:
3040 case Op_IsInfiniteD:
3041 if (!VM_Version::supports_avx512dq()) {
3042 return false;
3043 }
3044 break;
3045 case Op_SqrtVD:
3046 case Op_SqrtVF:
3047 case Op_VectorMaskCmp:
3048 case Op_VectorCastB2X:
3049 case Op_VectorCastS2X:
3050 case Op_VectorCastI2X:
3051 case Op_VectorCastL2X:
3052 case Op_VectorCastF2X:
3053 case Op_VectorCastD2X:
3054 case Op_VectorUCastB2X:
3055 case Op_VectorUCastS2X:
3056 case Op_VectorUCastI2X:
3057 case Op_VectorMaskCast:
3058 if (UseAVX < 1) { // enabled for AVX only
3059 return false;
3060 }
3061 break;
3062 case Op_PopulateIndex:
3063 if (UseAVX < 2) {
3064 return false;
3065 }
3066 break;
3067 case Op_RoundVF:
3068 if (UseAVX < 2) { // enabled for AVX2 only
3069 return false;
3070 }
3071 break;
3072 case Op_RoundVD:
3073 if (UseAVX < 3) {
3074 return false; // enabled for AVX3 only
3075 }
3076 break;
3077 case Op_CompareAndSwapL:
3078 case Op_CompareAndSwapP:
3079 break;
3080 case Op_StrIndexOf:
3081 if (!UseSSE42Intrinsics) {
3082 return false;
3083 }
3084 break;
3085 case Op_StrIndexOfChar:
3086 if (!UseSSE42Intrinsics) {
3087 return false;
3088 }
3089 break;
3090 case Op_OnSpinWait:
3091 if (VM_Version::supports_on_spin_wait() == false) {
3092 return false;
3093 }
3094 break;
3095 case Op_MulVB:
3096 case Op_LShiftVB:
3097 case Op_RShiftVB:
3098 case Op_URShiftVB:
3099 case Op_VectorInsert:
3100 case Op_VectorLoadMask:
3101 case Op_VectorStoreMask:
3102 case Op_VectorBlend:
3103 if (UseSSE < 4) {
3104 return false;
3105 }
3106 break;
3107 case Op_MaxD:
3108 case Op_MaxF:
3109 case Op_MinD:
3110 case Op_MinF:
3111 if (UseAVX < 1) { // enabled for AVX only
3112 return false;
3113 }
3114 break;
3115 case Op_CacheWB:
3116 case Op_CacheWBPreSync:
3117 case Op_CacheWBPostSync:
3118 if (!VM_Version::supports_data_cache_line_flush()) {
3119 return false;
3120 }
3121 break;
3122 case Op_ExtractB:
3123 case Op_ExtractL:
3124 case Op_ExtractI:
3125 case Op_RoundDoubleMode:
3126 if (UseSSE < 4) {
3127 return false;
3128 }
3129 break;
3130 case Op_RoundDoubleModeV:
3131 if (VM_Version::supports_avx() == false) {
3132 return false; // 128bit vroundpd is not available
3133 }
3134 break;
3135 case Op_LoadVectorGather:
3136 case Op_LoadVectorGatherMasked:
3137 if (UseAVX < 2) {
3138 return false;
3139 }
3140 break;
3141 case Op_FmaF:
3142 case Op_FmaD:
3143 case Op_FmaVD:
3144 case Op_FmaVF:
3145 if (!UseFMA) {
3146 return false;
3147 }
3148 break;
3149 case Op_MacroLogicV:
3150 if (UseAVX < 3 || !UseVectorMacroLogic) {
3151 return false;
3152 }
3153 break;
3154
3155 case Op_VectorCmpMasked:
3156 case Op_VectorMaskGen:
3157 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3158 return false;
3159 }
3160 break;
3161 case Op_VectorMaskFirstTrue:
3162 case Op_VectorMaskLastTrue:
3163 case Op_VectorMaskTrueCount:
3164 case Op_VectorMaskToLong:
3165 if (UseAVX < 1) {
3166 return false;
3167 }
3168 break;
3169 case Op_RoundF:
3170 case Op_RoundD:
3171 break;
3172 case Op_CopySignD:
3173 case Op_CopySignF:
3174 if (UseAVX < 3) {
3175 return false;
3176 }
3177 if (!VM_Version::supports_avx512vl()) {
3178 return false;
3179 }
3180 break;
3181 case Op_CompressBits:
3182 case Op_ExpandBits:
3183 if (!VM_Version::supports_bmi2()) {
3184 return false;
3185 }
3186 break;
3187 case Op_CompressM:
3188 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3189 return false;
3190 }
3191 break;
3192 case Op_ConvF2HF:
3193 case Op_ConvHF2F:
3194 if (!VM_Version::supports_float16()) {
3195 return false;
3196 }
3197 break;
3198 case Op_VectorCastF2HF:
3199 case Op_VectorCastHF2F:
3200 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3201 return false;
3202 }
3203 break;
3204 }
3205 return true; // Match rules are supported by default.
3206 }
3207
3208 //------------------------------------------------------------------------
3209
3210 static inline bool is_pop_count_instr_target(BasicType bt) {
3211 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3212 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3213 }
3214
3215 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3216 return match_rule_supported_vector(opcode, vlen, bt);
3217 }
3218
3219 // Identify extra cases that we might want to provide match rules for vector nodes and
3220 // other intrinsics guarded with vector length (vlen) and element type (bt).
3221 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3222 if (!match_rule_supported(opcode)) {
3223 return false;
3224 }
3225 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3226 // * SSE2 supports 128bit vectors for all types;
3227 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3228 // * AVX2 supports 256bit vectors for all types;
3229 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3230 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3231 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3232 // And MaxVectorSize is taken into account as well.
3233 if (!vector_size_supported(bt, vlen)) {
3234 return false;
3235 }
3236 // Special cases which require vector length follow:
3237 // * implementation limitations
3238 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3239 // * 128bit vroundpd instruction is present only in AVX1
3240 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3241 switch (opcode) {
3242 case Op_MaxVHF:
3243 case Op_MinVHF:
3244 if (!VM_Version::supports_avx512bw()) {
3245 return false;
3246 }
3247 case Op_AddVHF:
3248 case Op_DivVHF:
3249 case Op_FmaVHF:
3250 case Op_MulVHF:
3251 case Op_SubVHF:
3252 case Op_SqrtVHF:
3253 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3254 return false;
3255 }
3256 if (!VM_Version::supports_avx512_fp16()) {
3257 return false;
3258 }
3259 break;
3260 case Op_AbsVF:
3261 case Op_NegVF:
3262 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3263 return false; // 512bit vandps and vxorps are not available
3264 }
3265 break;
3266 case Op_AbsVD:
3267 case Op_NegVD:
3268 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3269 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3270 }
3271 break;
3272 case Op_RotateRightV:
3273 case Op_RotateLeftV:
3274 if (bt != T_INT && bt != T_LONG) {
3275 return false;
3276 } // fallthrough
3277 case Op_MacroLogicV:
3278 if (!VM_Version::supports_evex() ||
3279 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3280 return false;
3281 }
3282 break;
3283 case Op_ClearArray:
3284 case Op_VectorMaskGen:
3285 case Op_VectorCmpMasked:
3286 if (!VM_Version::supports_avx512bw()) {
3287 return false;
3288 }
3289 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3290 return false;
3291 }
3292 break;
3293 case Op_LoadVectorMasked:
3294 case Op_StoreVectorMasked:
3295 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3296 return false;
3297 }
3298 break;
3299 case Op_UMinV:
3300 case Op_UMaxV:
3301 if (UseAVX == 0) {
3302 return false;
3303 }
3304 break;
3305 case Op_MaxV:
3306 case Op_MinV:
3307 if (UseSSE < 4 && is_integral_type(bt)) {
3308 return false;
3309 }
3310 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3311 // Float/Double intrinsics are enabled for AVX family currently.
3312 if (UseAVX == 0) {
3313 return false;
3314 }
3315 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3316 return false;
3317 }
3318 }
3319 break;
3320 case Op_CallLeafVector:
3321 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3322 return false;
3323 }
3324 break;
3325 case Op_AddReductionVI:
3326 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3327 return false;
3328 }
3329 // fallthrough
3330 case Op_AndReductionV:
3331 case Op_OrReductionV:
3332 case Op_XorReductionV:
3333 if (is_subword_type(bt) && (UseSSE < 4)) {
3334 return false;
3335 }
3336 break;
3337 case Op_MinReductionV:
3338 case Op_MaxReductionV:
3339 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3340 return false;
3341 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3342 return false;
3343 }
3344 // Float/Double intrinsics enabled for AVX family.
3345 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3346 return false;
3347 }
3348 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3349 return false;
3350 }
3351 break;
3352 case Op_VectorTest:
3353 if (UseSSE < 4) {
3354 return false; // Implementation limitation
3355 } else if (size_in_bits < 32) {
3356 return false; // Implementation limitation
3357 }
3358 break;
3359 case Op_VectorLoadShuffle:
3360 case Op_VectorRearrange:
3361 if(vlen == 2) {
3362 return false; // Implementation limitation due to how shuffle is loaded
3363 } else if (size_in_bits == 256 && UseAVX < 2) {
3364 return false; // Implementation limitation
3365 }
3366 break;
3367 case Op_VectorLoadMask:
3368 case Op_VectorMaskCast:
3369 if (size_in_bits == 256 && UseAVX < 2) {
3370 return false; // Implementation limitation
3371 }
3372 // fallthrough
3373 case Op_VectorStoreMask:
3374 if (vlen == 2) {
3375 return false; // Implementation limitation
3376 }
3377 break;
3378 case Op_PopulateIndex:
3379 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3380 return false;
3381 }
3382 break;
3383 case Op_VectorCastB2X:
3384 case Op_VectorCastS2X:
3385 case Op_VectorCastI2X:
3386 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorCastL2X:
3391 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3392 return false;
3393 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3394 return false;
3395 }
3396 break;
3397 case Op_VectorCastF2X: {
3398 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3399 // happen after intermediate conversion to integer and special handling
3400 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3401 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3402 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3403 return false;
3404 }
3405 }
3406 // fallthrough
3407 case Op_VectorCastD2X:
3408 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3409 return false;
3410 }
3411 break;
3412 case Op_VectorCastF2HF:
3413 case Op_VectorCastHF2F:
3414 if (!VM_Version::supports_f16c() &&
3415 ((!VM_Version::supports_evex() ||
3416 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3417 return false;
3418 }
3419 break;
3420 case Op_RoundVD:
3421 if (!VM_Version::supports_avx512dq()) {
3422 return false;
3423 }
3424 break;
3425 case Op_MulReductionVI:
3426 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3427 return false;
3428 }
3429 break;
3430 case Op_LoadVectorGatherMasked:
3431 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3432 return false;
3433 }
3434 if (is_subword_type(bt) &&
3435 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3436 (size_in_bits < 64) ||
3437 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3438 return false;
3439 }
3440 break;
3441 case Op_StoreVectorScatterMasked:
3442 case Op_StoreVectorScatter:
3443 if (is_subword_type(bt)) {
3444 return false;
3445 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3446 return false;
3447 }
3448 // fallthrough
3449 case Op_LoadVectorGather:
3450 if (!is_subword_type(bt) && size_in_bits == 64) {
3451 return false;
3452 }
3453 if (is_subword_type(bt) && size_in_bits < 64) {
3454 return false;
3455 }
3456 break;
3457 case Op_SaturatingAddV:
3458 case Op_SaturatingSubV:
3459 if (UseAVX < 1) {
3460 return false; // Implementation limitation
3461 }
3462 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3463 return false;
3464 }
3465 break;
3466 case Op_SelectFromTwoVector:
3467 if (size_in_bits < 128) {
3468 return false;
3469 }
3470 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3471 return false;
3472 }
3473 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3474 return false;
3475 }
3476 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3477 return false;
3478 }
3479 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3480 return false;
3481 }
3482 break;
3483 case Op_MaskAll:
3484 if (!VM_Version::supports_evex()) {
3485 return false;
3486 }
3487 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3488 return false;
3489 }
3490 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3491 return false;
3492 }
3493 break;
3494 case Op_VectorMaskCmp:
3495 if (vlen < 2 || size_in_bits < 32) {
3496 return false;
3497 }
3498 break;
3499 case Op_CompressM:
3500 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3501 return false;
3502 }
3503 break;
3504 case Op_CompressV:
3505 case Op_ExpandV:
3506 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3507 return false;
3508 }
3509 if (size_in_bits < 128 ) {
3510 return false;
3511 }
3512 case Op_VectorLongToMask:
3513 if (UseAVX < 1) {
3514 return false;
3515 }
3516 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3517 return false;
3518 }
3519 break;
3520 case Op_SignumVD:
3521 case Op_SignumVF:
3522 if (UseAVX < 1) {
3523 return false;
3524 }
3525 break;
3526 case Op_PopCountVI:
3527 case Op_PopCountVL: {
3528 if (!is_pop_count_instr_target(bt) &&
3529 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3530 return false;
3531 }
3532 }
3533 break;
3534 case Op_ReverseV:
3535 case Op_ReverseBytesV:
3536 if (UseAVX < 2) {
3537 return false;
3538 }
3539 break;
3540 case Op_CountTrailingZerosV:
3541 case Op_CountLeadingZerosV:
3542 if (UseAVX < 2) {
3543 return false;
3544 }
3545 break;
3546 }
3547 return true; // Per default match rules are supported.
3548 }
3549
3550 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3551 // ADLC based match_rule_supported routine checks for the existence of pattern based
3552 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3553 // of their non-masked counterpart with mask edge being the differentiator.
3554 // This routine does a strict check on the existence of masked operation patterns
3555 // by returning a default false value for all the other opcodes apart from the
3556 // ones whose masked instruction patterns are defined in this file.
3557 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3558 return false;
3559 }
3560
3561 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3562 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3563 return false;
3564 }
3565 switch(opcode) {
3566 // Unary masked operations
3567 case Op_AbsVB:
3568 case Op_AbsVS:
3569 if(!VM_Version::supports_avx512bw()) {
3570 return false; // Implementation limitation
3571 }
3572 case Op_AbsVI:
3573 case Op_AbsVL:
3574 return true;
3575
3576 // Ternary masked operations
3577 case Op_FmaVF:
3578 case Op_FmaVD:
3579 return true;
3580
3581 case Op_MacroLogicV:
3582 if(bt != T_INT && bt != T_LONG) {
3583 return false;
3584 }
3585 return true;
3586
3587 // Binary masked operations
3588 case Op_AddVB:
3589 case Op_AddVS:
3590 case Op_SubVB:
3591 case Op_SubVS:
3592 case Op_MulVS:
3593 case Op_LShiftVS:
3594 case Op_RShiftVS:
3595 case Op_URShiftVS:
3596 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3597 if (!VM_Version::supports_avx512bw()) {
3598 return false; // Implementation limitation
3599 }
3600 return true;
3601
3602 case Op_MulVL:
3603 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3604 if (!VM_Version::supports_avx512dq()) {
3605 return false; // Implementation limitation
3606 }
3607 return true;
3608
3609 case Op_AndV:
3610 case Op_OrV:
3611 case Op_XorV:
3612 case Op_RotateRightV:
3613 case Op_RotateLeftV:
3614 if (bt != T_INT && bt != T_LONG) {
3615 return false; // Implementation limitation
3616 }
3617 return true;
3618
3619 case Op_VectorLoadMask:
3620 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3621 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3622 return false;
3623 }
3624 return true;
3625
3626 case Op_AddVI:
3627 case Op_AddVL:
3628 case Op_AddVF:
3629 case Op_AddVD:
3630 case Op_SubVI:
3631 case Op_SubVL:
3632 case Op_SubVF:
3633 case Op_SubVD:
3634 case Op_MulVI:
3635 case Op_MulVF:
3636 case Op_MulVD:
3637 case Op_DivVF:
3638 case Op_DivVD:
3639 case Op_SqrtVF:
3640 case Op_SqrtVD:
3641 case Op_LShiftVI:
3642 case Op_LShiftVL:
3643 case Op_RShiftVI:
3644 case Op_RShiftVL:
3645 case Op_URShiftVI:
3646 case Op_URShiftVL:
3647 case Op_LoadVectorMasked:
3648 case Op_StoreVectorMasked:
3649 case Op_LoadVectorGatherMasked:
3650 case Op_StoreVectorScatterMasked:
3651 return true;
3652
3653 case Op_UMinV:
3654 case Op_UMaxV:
3655 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3656 return false;
3657 } // fallthrough
3658 case Op_MaxV:
3659 case Op_MinV:
3660 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3661 return false; // Implementation limitation
3662 }
3663 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3664 return false; // Implementation limitation
3665 }
3666 return true;
3667 case Op_SaturatingAddV:
3668 case Op_SaturatingSubV:
3669 if (!is_subword_type(bt)) {
3670 return false;
3671 }
3672 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3673 return false; // Implementation limitation
3674 }
3675 return true;
3676
3677 case Op_VectorMaskCmp:
3678 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3679 return false; // Implementation limitation
3680 }
3681 return true;
3682
3683 case Op_VectorRearrange:
3684 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3685 return false; // Implementation limitation
3686 }
3687 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3688 return false; // Implementation limitation
3689 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3690 return false; // Implementation limitation
3691 }
3692 return true;
3693
3694 // Binary Logical operations
3695 case Op_AndVMask:
3696 case Op_OrVMask:
3697 case Op_XorVMask:
3698 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3699 return false; // Implementation limitation
3700 }
3701 return true;
3702
3703 case Op_PopCountVI:
3704 case Op_PopCountVL:
3705 if (!is_pop_count_instr_target(bt)) {
3706 return false;
3707 }
3708 return true;
3709
3710 case Op_MaskAll:
3711 return true;
3712
3713 case Op_CountLeadingZerosV:
3714 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3715 return true;
3716 }
3717 default:
3718 return false;
3719 }
3720 }
3721
3722 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3723 return false;
3724 }
3725
3726 // Return true if Vector::rearrange needs preparation of the shuffle argument
3727 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3728 switch (elem_bt) {
3729 case T_BYTE: return false;
3730 case T_SHORT: return !VM_Version::supports_avx512bw();
3731 case T_INT: return !VM_Version::supports_avx();
3732 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3733 default:
3734 ShouldNotReachHere();
3735 return false;
3736 }
3737 }
3738
3739 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3740 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3741 bool legacy = (generic_opnd->opcode() == LEGVEC);
3742 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3743 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3744 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3745 return new legVecZOper();
3746 }
3747 if (legacy) {
3748 switch (ideal_reg) {
3749 case Op_VecS: return new legVecSOper();
3750 case Op_VecD: return new legVecDOper();
3751 case Op_VecX: return new legVecXOper();
3752 case Op_VecY: return new legVecYOper();
3753 case Op_VecZ: return new legVecZOper();
3754 }
3755 } else {
3756 switch (ideal_reg) {
3757 case Op_VecS: return new vecSOper();
3758 case Op_VecD: return new vecDOper();
3759 case Op_VecX: return new vecXOper();
3760 case Op_VecY: return new vecYOper();
3761 case Op_VecZ: return new vecZOper();
3762 }
3763 }
3764 ShouldNotReachHere();
3765 return nullptr;
3766 }
3767
3768 bool Matcher::is_reg2reg_move(MachNode* m) {
3769 switch (m->rule()) {
3770 case MoveVec2Leg_rule:
3771 case MoveLeg2Vec_rule:
3772 case MoveF2VL_rule:
3773 case MoveF2LEG_rule:
3774 case MoveVL2F_rule:
3775 case MoveLEG2F_rule:
3776 case MoveD2VL_rule:
3777 case MoveD2LEG_rule:
3778 case MoveVL2D_rule:
3779 case MoveLEG2D_rule:
3780 return true;
3781 default:
3782 return false;
3783 }
3784 }
3785
3786 bool Matcher::is_generic_vector(MachOper* opnd) {
3787 switch (opnd->opcode()) {
3788 case VEC:
3789 case LEGVEC:
3790 return true;
3791 default:
3792 return false;
3793 }
3794 }
3795
3796 //------------------------------------------------------------------------
3797
3798 const RegMask* Matcher::predicate_reg_mask(void) {
3799 return &_VECTMASK_REG_mask;
3800 }
3801
3802 // Max vector size in bytes. 0 if not supported.
3803 int Matcher::vector_width_in_bytes(BasicType bt) {
3804 assert(is_java_primitive(bt), "only primitive type vectors");
3805 // SSE2 supports 128bit vectors for all types.
3806 // AVX2 supports 256bit vectors for all types.
3807 // AVX2/EVEX supports 512bit vectors for all types.
3808 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3809 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3810 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3811 size = (UseAVX > 2) ? 64 : 32;
3812 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3813 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3814 // Use flag to limit vector size.
3815 size = MIN2(size,(int)MaxVectorSize);
3816 // Minimum 2 values in vector (or 4 for bytes).
3817 switch (bt) {
3818 case T_DOUBLE:
3819 case T_LONG:
3820 if (size < 16) return 0;
3821 break;
3822 case T_FLOAT:
3823 case T_INT:
3824 if (size < 8) return 0;
3825 break;
3826 case T_BOOLEAN:
3827 if (size < 4) return 0;
3828 break;
3829 case T_CHAR:
3830 if (size < 4) return 0;
3831 break;
3832 case T_BYTE:
3833 if (size < 4) return 0;
3834 break;
3835 case T_SHORT:
3836 if (size < 4) return 0;
3837 break;
3838 default:
3839 ShouldNotReachHere();
3840 }
3841 return size;
3842 }
3843
3844 // Limits on vector size (number of elements) loaded into vector.
3845 int Matcher::max_vector_size(const BasicType bt) {
3846 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3847 }
3848 int Matcher::min_vector_size(const BasicType bt) {
3849 int max_size = max_vector_size(bt);
3850 // Min size which can be loaded into vector is 4 bytes.
3851 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3852 // Support for calling svml double64 vectors
3853 if (bt == T_DOUBLE) {
3854 size = 1;
3855 }
3856 return MIN2(size,max_size);
3857 }
3858
3859 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3860 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3861 // by default on Cascade Lake
3862 if (VM_Version::is_default_intel_cascade_lake()) {
3863 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3864 }
3865 return Matcher::max_vector_size(bt);
3866 }
3867
3868 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3869 return -1;
3870 }
3871
3872 // Vector ideal reg corresponding to specified size in bytes
3873 uint Matcher::vector_ideal_reg(int size) {
3874 assert(MaxVectorSize >= size, "");
3875 switch(size) {
3876 case 4: return Op_VecS;
3877 case 8: return Op_VecD;
3878 case 16: return Op_VecX;
3879 case 32: return Op_VecY;
3880 case 64: return Op_VecZ;
3881 }
3882 ShouldNotReachHere();
3883 return 0;
3884 }
3885
3886 // Check for shift by small constant as well
3887 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3888 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3889 shift->in(2)->get_int() <= 3 &&
3890 // Are there other uses besides address expressions?
3891 !matcher->is_visited(shift)) {
3892 address_visited.set(shift->_idx); // Flag as address_visited
3893 mstack.push(shift->in(2), Matcher::Visit);
3894 Node *conv = shift->in(1);
3895 // Allow Matcher to match the rule which bypass
3896 // ConvI2L operation for an array index on LP64
3897 // if the index value is positive.
3898 if (conv->Opcode() == Op_ConvI2L &&
3899 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3900 // Are there other uses besides address expressions?
3901 !matcher->is_visited(conv)) {
3902 address_visited.set(conv->_idx); // Flag as address_visited
3903 mstack.push(conv->in(1), Matcher::Pre_Visit);
3904 } else {
3905 mstack.push(conv, Matcher::Pre_Visit);
3906 }
3907 return true;
3908 }
3909 return false;
3910 }
3911
3912 // This function identifies sub-graphs in which a 'load' node is
3913 // input to two different nodes, and such that it can be matched
3914 // with BMI instructions like blsi, blsr, etc.
3915 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3916 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3917 // refers to the same node.
3918 //
3919 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3920 // This is a temporary solution until we make DAGs expressible in ADL.
3921 template<typename ConType>
3922 class FusedPatternMatcher {
3923 Node* _op1_node;
3924 Node* _mop_node;
3925 int _con_op;
3926
3927 static int match_next(Node* n, int next_op, int next_op_idx) {
3928 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3929 return -1;
3930 }
3931
3932 if (next_op_idx == -1) { // n is commutative, try rotations
3933 if (n->in(1)->Opcode() == next_op) {
3934 return 1;
3935 } else if (n->in(2)->Opcode() == next_op) {
3936 return 2;
3937 }
3938 } else {
3939 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3940 if (n->in(next_op_idx)->Opcode() == next_op) {
3941 return next_op_idx;
3942 }
3943 }
3944 return -1;
3945 }
3946
3947 public:
3948 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3949 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3950
3951 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3952 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3953 typename ConType::NativeType con_value) {
3954 if (_op1_node->Opcode() != op1) {
3955 return false;
3956 }
3957 if (_mop_node->outcnt() > 2) {
3958 return false;
3959 }
3960 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3961 if (op1_op2_idx == -1) {
3962 return false;
3963 }
3964 // Memory operation must be the other edge
3965 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3966
3967 // Check that the mop node is really what we want
3968 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3969 Node* op2_node = _op1_node->in(op1_op2_idx);
3970 if (op2_node->outcnt() > 1) {
3971 return false;
3972 }
3973 assert(op2_node->Opcode() == op2, "Should be");
3974 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3975 if (op2_con_idx == -1) {
3976 return false;
3977 }
3978 // Memory operation must be the other edge
3979 int op2_mop_idx = (op2_con_idx & 1) + 1;
3980 // Check that the memory operation is the same node
3981 if (op2_node->in(op2_mop_idx) == _mop_node) {
3982 // Now check the constant
3983 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
3984 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
3985 return true;
3986 }
3987 }
3988 }
3989 return false;
3990 }
3991 };
3992
3993 static bool is_bmi_pattern(Node* n, Node* m) {
3994 assert(UseBMI1Instructions, "sanity");
3995 if (n != nullptr && m != nullptr) {
3996 if (m->Opcode() == Op_LoadI) {
3997 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
3998 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
3999 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4000 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4001 } else if (m->Opcode() == Op_LoadL) {
4002 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4003 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4004 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4005 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4006 }
4007 }
4008 return false;
4009 }
4010
4011 // Should the matcher clone input 'm' of node 'n'?
4012 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4013 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4014 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4015 mstack.push(m, Visit);
4016 return true;
4017 }
4018 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4019 mstack.push(m, Visit); // m = ShiftCntV
4020 return true;
4021 }
4022 if (is_encode_and_store_pattern(n, m)) {
4023 mstack.push(m, Visit);
4024 return true;
4025 }
4026 return false;
4027 }
4028
4029 // Should the Matcher clone shifts on addressing modes, expecting them
4030 // to be subsumed into complex addressing expressions or compute them
4031 // into registers?
4032 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4033 Node *off = m->in(AddPNode::Offset);
4034 if (off->is_Con()) {
4035 address_visited.test_set(m->_idx); // Flag as address_visited
4036 Node *adr = m->in(AddPNode::Address);
4037
4038 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4039 // AtomicAdd is not an addressing expression.
4040 // Cheap to find it by looking for screwy base.
4041 if (adr->is_AddP() &&
4042 !adr->in(AddPNode::Base)->is_top() &&
4043 !adr->in(AddPNode::Offset)->is_Con() &&
4044 off->get_long() == (int) (off->get_long()) && // immL32
4045 // Are there other uses besides address expressions?
4046 !is_visited(adr)) {
4047 address_visited.set(adr->_idx); // Flag as address_visited
4048 Node *shift = adr->in(AddPNode::Offset);
4049 if (!clone_shift(shift, this, mstack, address_visited)) {
4050 mstack.push(shift, Pre_Visit);
4051 }
4052 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4053 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4054 } else {
4055 mstack.push(adr, Pre_Visit);
4056 }
4057
4058 // Clone X+offset as it also folds into most addressing expressions
4059 mstack.push(off, Visit);
4060 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4061 return true;
4062 } else if (clone_shift(off, this, mstack, address_visited)) {
4063 address_visited.test_set(m->_idx); // Flag as address_visited
4064 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4065 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4066 return true;
4067 }
4068 return false;
4069 }
4070
4071 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4072 switch (bt) {
4073 case BoolTest::eq:
4074 return Assembler::eq;
4075 case BoolTest::ne:
4076 return Assembler::neq;
4077 case BoolTest::le:
4078 case BoolTest::ule:
4079 return Assembler::le;
4080 case BoolTest::ge:
4081 case BoolTest::uge:
4082 return Assembler::nlt;
4083 case BoolTest::lt:
4084 case BoolTest::ult:
4085 return Assembler::lt;
4086 case BoolTest::gt:
4087 case BoolTest::ugt:
4088 return Assembler::nle;
4089 default : ShouldNotReachHere(); return Assembler::_false;
4090 }
4091 }
4092
4093 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4094 switch (bt) {
4095 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4096 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4097 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4098 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4099 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4100 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4101 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4102 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4103 }
4104 }
4105
4106 // Helper methods for MachSpillCopyNode::implementation().
4107 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4108 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4109 assert(ireg == Op_VecS || // 32bit vector
4110 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4111 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4112 "no non-adjacent vector moves" );
4113 if (masm) {
4114 switch (ireg) {
4115 case Op_VecS: // copy whole register
4116 case Op_VecD:
4117 case Op_VecX:
4118 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4119 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4120 } else {
4121 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4122 }
4123 break;
4124 case Op_VecY:
4125 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4126 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4127 } else {
4128 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4129 }
4130 break;
4131 case Op_VecZ:
4132 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4133 break;
4134 default:
4135 ShouldNotReachHere();
4136 }
4137 #ifndef PRODUCT
4138 } else {
4139 switch (ireg) {
4140 case Op_VecS:
4141 case Op_VecD:
4142 case Op_VecX:
4143 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4144 break;
4145 case Op_VecY:
4146 case Op_VecZ:
4147 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4148 break;
4149 default:
4150 ShouldNotReachHere();
4151 }
4152 #endif
4153 }
4154 }
4155
4156 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4157 int stack_offset, int reg, uint ireg, outputStream* st) {
4158 if (masm) {
4159 if (is_load) {
4160 switch (ireg) {
4161 case Op_VecS:
4162 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4163 break;
4164 case Op_VecD:
4165 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4166 break;
4167 case Op_VecX:
4168 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4169 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4170 } else {
4171 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4172 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4173 }
4174 break;
4175 case Op_VecY:
4176 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4177 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4178 } else {
4179 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4180 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4181 }
4182 break;
4183 case Op_VecZ:
4184 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4185 break;
4186 default:
4187 ShouldNotReachHere();
4188 }
4189 } else { // store
4190 switch (ireg) {
4191 case Op_VecS:
4192 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4193 break;
4194 case Op_VecD:
4195 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4196 break;
4197 case Op_VecX:
4198 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4199 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4200 }
4201 else {
4202 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4203 }
4204 break;
4205 case Op_VecY:
4206 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4207 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4208 }
4209 else {
4210 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4211 }
4212 break;
4213 case Op_VecZ:
4214 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4215 break;
4216 default:
4217 ShouldNotReachHere();
4218 }
4219 }
4220 #ifndef PRODUCT
4221 } else {
4222 if (is_load) {
4223 switch (ireg) {
4224 case Op_VecS:
4225 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4226 break;
4227 case Op_VecD:
4228 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4229 break;
4230 case Op_VecX:
4231 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4232 break;
4233 case Op_VecY:
4234 case Op_VecZ:
4235 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4236 break;
4237 default:
4238 ShouldNotReachHere();
4239 }
4240 } else { // store
4241 switch (ireg) {
4242 case Op_VecS:
4243 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4244 break;
4245 case Op_VecD:
4246 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4247 break;
4248 case Op_VecX:
4249 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4250 break;
4251 case Op_VecY:
4252 case Op_VecZ:
4253 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4254 break;
4255 default:
4256 ShouldNotReachHere();
4257 }
4258 }
4259 #endif
4260 }
4261 }
4262
4263 template <class T>
4264 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4265 int size = type2aelembytes(bt) * len;
4266 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4267 for (int i = 0; i < len; i++) {
4268 int offset = i * type2aelembytes(bt);
4269 switch (bt) {
4270 case T_BYTE: val->at(i) = con; break;
4271 case T_SHORT: {
4272 jshort c = con;
4273 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4274 break;
4275 }
4276 case T_INT: {
4277 jint c = con;
4278 memcpy(val->adr_at(offset), &c, sizeof(jint));
4279 break;
4280 }
4281 case T_LONG: {
4282 jlong c = con;
4283 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4284 break;
4285 }
4286 case T_FLOAT: {
4287 jfloat c = con;
4288 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4289 break;
4290 }
4291 case T_DOUBLE: {
4292 jdouble c = con;
4293 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4294 break;
4295 }
4296 default: assert(false, "%s", type2name(bt));
4297 }
4298 }
4299 return val;
4300 }
4301
4302 static inline jlong high_bit_set(BasicType bt) {
4303 switch (bt) {
4304 case T_BYTE: return 0x8080808080808080;
4305 case T_SHORT: return 0x8000800080008000;
4306 case T_INT: return 0x8000000080000000;
4307 case T_LONG: return 0x8000000000000000;
4308 default:
4309 ShouldNotReachHere();
4310 return 0;
4311 }
4312 }
4313
4314 #ifndef PRODUCT
4315 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4316 st->print("nop \t# %d bytes pad for loops and calls", _count);
4317 }
4318 #endif
4319
4320 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4321 __ nop(_count);
4322 }
4323
4324 uint MachNopNode::size(PhaseRegAlloc*) const {
4325 return _count;
4326 }
4327
4328 #ifndef PRODUCT
4329 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4330 st->print("# breakpoint");
4331 }
4332 #endif
4333
4334 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4335 __ int3();
4336 }
4337
4338 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4339 return MachNode::size(ra_);
4340 }
4341
4342 %}
4343
4344 //----------ENCODING BLOCK-----------------------------------------------------
4345 // This block specifies the encoding classes used by the compiler to
4346 // output byte streams. Encoding classes are parameterized macros
4347 // used by Machine Instruction Nodes in order to generate the bit
4348 // encoding of the instruction. Operands specify their base encoding
4349 // interface with the interface keyword. There are currently
4350 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4351 // COND_INTER. REG_INTER causes an operand to generate a function
4352 // which returns its register number when queried. CONST_INTER causes
4353 // an operand to generate a function which returns the value of the
4354 // constant when queried. MEMORY_INTER causes an operand to generate
4355 // four functions which return the Base Register, the Index Register,
4356 // the Scale Value, and the Offset Value of the operand when queried.
4357 // COND_INTER causes an operand to generate six functions which return
4358 // the encoding code (ie - encoding bits for the instruction)
4359 // associated with each basic boolean condition for a conditional
4360 // instruction.
4361 //
4362 // Instructions specify two basic values for encoding. Again, a
4363 // function is available to check if the constant displacement is an
4364 // oop. They use the ins_encode keyword to specify their encoding
4365 // classes (which must be a sequence of enc_class names, and their
4366 // parameters, specified in the encoding block), and they use the
4367 // opcode keyword to specify, in order, their primary, secondary, and
4368 // tertiary opcode. Only the opcode sections which a particular
4369 // instruction needs for encoding need to be specified.
4370 encode %{
4371 enc_class cdql_enc(no_rax_rdx_RegI div)
4372 %{
4373 // Full implementation of Java idiv and irem; checks for
4374 // special case as described in JVM spec., p.243 & p.271.
4375 //
4376 // normal case special case
4377 //
4378 // input : rax: dividend min_int
4379 // reg: divisor -1
4380 //
4381 // output: rax: quotient (= rax idiv reg) min_int
4382 // rdx: remainder (= rax irem reg) 0
4383 //
4384 // Code sequnce:
4385 //
4386 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4387 // 5: 75 07/08 jne e <normal>
4388 // 7: 33 d2 xor %edx,%edx
4389 // [div >= 8 -> offset + 1]
4390 // [REX_B]
4391 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4392 // c: 74 03/04 je 11 <done>
4393 // 000000000000000e <normal>:
4394 // e: 99 cltd
4395 // [div >= 8 -> offset + 1]
4396 // [REX_B]
4397 // f: f7 f9 idiv $div
4398 // 0000000000000011 <done>:
4399 Label normal;
4400 Label done;
4401
4402 // cmp $0x80000000,%eax
4403 __ cmpl(as_Register(RAX_enc), 0x80000000);
4404
4405 // jne e <normal>
4406 __ jccb(Assembler::notEqual, normal);
4407
4408 // xor %edx,%edx
4409 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4410
4411 // cmp $0xffffffffffffffff,%ecx
4412 __ cmpl($div$$Register, -1);
4413
4414 // je 11 <done>
4415 __ jccb(Assembler::equal, done);
4416
4417 // <normal>
4418 // cltd
4419 __ bind(normal);
4420 __ cdql();
4421
4422 // idivl
4423 // <done>
4424 __ idivl($div$$Register);
4425 __ bind(done);
4426 %}
4427
4428 enc_class cdqq_enc(no_rax_rdx_RegL div)
4429 %{
4430 // Full implementation of Java ldiv and lrem; checks for
4431 // special case as described in JVM spec., p.243 & p.271.
4432 //
4433 // normal case special case
4434 //
4435 // input : rax: dividend min_long
4436 // reg: divisor -1
4437 //
4438 // output: rax: quotient (= rax idiv reg) min_long
4439 // rdx: remainder (= rax irem reg) 0
4440 //
4441 // Code sequnce:
4442 //
4443 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4444 // 7: 00 00 80
4445 // a: 48 39 d0 cmp %rdx,%rax
4446 // d: 75 08 jne 17 <normal>
4447 // f: 33 d2 xor %edx,%edx
4448 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4449 // 15: 74 05 je 1c <done>
4450 // 0000000000000017 <normal>:
4451 // 17: 48 99 cqto
4452 // 19: 48 f7 f9 idiv $div
4453 // 000000000000001c <done>:
4454 Label normal;
4455 Label done;
4456
4457 // mov $0x8000000000000000,%rdx
4458 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4459
4460 // cmp %rdx,%rax
4461 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4462
4463 // jne 17 <normal>
4464 __ jccb(Assembler::notEqual, normal);
4465
4466 // xor %edx,%edx
4467 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4468
4469 // cmp $0xffffffffffffffff,$div
4470 __ cmpq($div$$Register, -1);
4471
4472 // je 1e <done>
4473 __ jccb(Assembler::equal, done);
4474
4475 // <normal>
4476 // cqto
4477 __ bind(normal);
4478 __ cdqq();
4479
4480 // idivq (note: must be emitted by the user of this rule)
4481 // <done>
4482 __ idivq($div$$Register);
4483 __ bind(done);
4484 %}
4485
4486 enc_class clear_avx %{
4487 DEBUG_ONLY(int off0 = __ offset());
4488 if (generate_vzeroupper(Compile::current())) {
4489 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4490 // Clear upper bits of YMM registers when current compiled code uses
4491 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4492 __ vzeroupper();
4493 }
4494 DEBUG_ONLY(int off1 = __ offset());
4495 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4496 %}
4497
4498 enc_class Java_To_Runtime(method meth) %{
4499 __ lea(r10, RuntimeAddress((address)$meth$$method));
4500 __ call(r10);
4501 __ post_call_nop();
4502 %}
4503
4504 enc_class Java_Static_Call(method meth)
4505 %{
4506 // JAVA STATIC CALL
4507 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4508 // determine who we intended to call.
4509 if (!_method) {
4510 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4511 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4512 // The NOP here is purely to ensure that eliding a call to
4513 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4514 __ addr_nop_5();
4515 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4516 } else {
4517 int method_index = resolved_method_index(masm);
4518 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4519 : static_call_Relocation::spec(method_index);
4520 address mark = __ pc();
4521 int call_offset = __ offset();
4522 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4523 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4524 // Calls of the same statically bound method can share
4525 // a stub to the interpreter.
4526 __ code()->shared_stub_to_interp_for(_method, call_offset);
4527 } else {
4528 // Emit stubs for static call.
4529 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4530 __ clear_inst_mark();
4531 if (stub == nullptr) {
4532 ciEnv::current()->record_failure("CodeCache is full");
4533 return;
4534 }
4535 }
4536 }
4537 __ post_call_nop();
4538 %}
4539
4540 enc_class Java_Dynamic_Call(method meth) %{
4541 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4542 __ post_call_nop();
4543 %}
4544
4545 enc_class call_epilog %{
4546 if (VerifyStackAtCalls) {
4547 // Check that stack depth is unchanged: find majik cookie on stack
4548 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4549 Label L;
4550 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4551 __ jccb(Assembler::equal, L);
4552 // Die if stack mismatch
4553 __ int3();
4554 __ bind(L);
4555 }
4556 %}
4557
4558 %}
4559
4560 //----------FRAME--------------------------------------------------------------
4561 // Definition of frame structure and management information.
4562 //
4563 // S T A C K L A Y O U T Allocators stack-slot number
4564 // | (to get allocators register number
4565 // G Owned by | | v add OptoReg::stack0())
4566 // r CALLER | |
4567 // o | +--------+ pad to even-align allocators stack-slot
4568 // w V | pad0 | numbers; owned by CALLER
4569 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4570 // h ^ | in | 5
4571 // | | args | 4 Holes in incoming args owned by SELF
4572 // | | | | 3
4573 // | | +--------+
4574 // V | | old out| Empty on Intel, window on Sparc
4575 // | old |preserve| Must be even aligned.
4576 // | SP-+--------+----> Matcher::_old_SP, even aligned
4577 // | | in | 3 area for Intel ret address
4578 // Owned by |preserve| Empty on Sparc.
4579 // SELF +--------+
4580 // | | pad2 | 2 pad to align old SP
4581 // | +--------+ 1
4582 // | | locks | 0
4583 // | +--------+----> OptoReg::stack0(), even aligned
4584 // | | pad1 | 11 pad to align new SP
4585 // | +--------+
4586 // | | | 10
4587 // | | spills | 9 spills
4588 // V | | 8 (pad0 slot for callee)
4589 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4590 // ^ | out | 7
4591 // | | args | 6 Holes in outgoing args owned by CALLEE
4592 // Owned by +--------+
4593 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4594 // | new |preserve| Must be even-aligned.
4595 // | SP-+--------+----> Matcher::_new_SP, even aligned
4596 // | | |
4597 //
4598 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4599 // known from SELF's arguments and the Java calling convention.
4600 // Region 6-7 is determined per call site.
4601 // Note 2: If the calling convention leaves holes in the incoming argument
4602 // area, those holes are owned by SELF. Holes in the outgoing area
4603 // are owned by the CALLEE. Holes should not be necessary in the
4604 // incoming area, as the Java calling convention is completely under
4605 // the control of the AD file. Doubles can be sorted and packed to
4606 // avoid holes. Holes in the outgoing arguments may be necessary for
4607 // varargs C calling conventions.
4608 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4609 // even aligned with pad0 as needed.
4610 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4611 // region 6-11 is even aligned; it may be padded out more so that
4612 // the region from SP to FP meets the minimum stack alignment.
4613 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4614 // alignment. Region 11, pad1, may be dynamically extended so that
4615 // SP meets the minimum alignment.
4616
4617 frame
4618 %{
4619 // These three registers define part of the calling convention
4620 // between compiled code and the interpreter.
4621 inline_cache_reg(RAX); // Inline Cache Register
4622
4623 // Optional: name the operand used by cisc-spilling to access
4624 // [stack_pointer + offset]
4625 cisc_spilling_operand_name(indOffset32);
4626
4627 // Number of stack slots consumed by locking an object
4628 sync_stack_slots(2);
4629
4630 // Compiled code's Frame Pointer
4631 frame_pointer(RSP);
4632
4633 // Interpreter stores its frame pointer in a register which is
4634 // stored to the stack by I2CAdaptors.
4635 // I2CAdaptors convert from interpreted java to compiled java.
4636 interpreter_frame_pointer(RBP);
4637
4638 // Stack alignment requirement
4639 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4640
4641 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4642 // for calls to C. Supports the var-args backing area for register parms.
4643 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4644
4645 // The after-PROLOG location of the return address. Location of
4646 // return address specifies a type (REG or STACK) and a number
4647 // representing the register number (i.e. - use a register name) or
4648 // stack slot.
4649 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4650 // Otherwise, it is above the locks and verification slot and alignment word
4651 return_addr(STACK - 2 +
4652 align_up((Compile::current()->in_preserve_stack_slots() +
4653 Compile::current()->fixed_slots()),
4654 stack_alignment_in_slots()));
4655
4656 // Location of compiled Java return values. Same as C for now.
4657 return_value
4658 %{
4659 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4660 "only return normal values");
4661
4662 static const int lo[Op_RegL + 1] = {
4663 0,
4664 0,
4665 RAX_num, // Op_RegN
4666 RAX_num, // Op_RegI
4667 RAX_num, // Op_RegP
4668 XMM0_num, // Op_RegF
4669 XMM0_num, // Op_RegD
4670 RAX_num // Op_RegL
4671 };
4672 static const int hi[Op_RegL + 1] = {
4673 0,
4674 0,
4675 OptoReg::Bad, // Op_RegN
4676 OptoReg::Bad, // Op_RegI
4677 RAX_H_num, // Op_RegP
4678 OptoReg::Bad, // Op_RegF
4679 XMM0b_num, // Op_RegD
4680 RAX_H_num // Op_RegL
4681 };
4682 // Excluded flags and vector registers.
4683 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4684 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4685 %}
4686 %}
4687
4688 //----------ATTRIBUTES---------------------------------------------------------
4689 //----------Operand Attributes-------------------------------------------------
4690 op_attrib op_cost(0); // Required cost attribute
4691
4692 //----------Instruction Attributes---------------------------------------------
4693 ins_attrib ins_cost(100); // Required cost attribute
4694 ins_attrib ins_size(8); // Required size attribute (in bits)
4695 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4696 // a non-matching short branch variant
4697 // of some long branch?
4698 ins_attrib ins_alignment(1); // Required alignment attribute (must
4699 // be a power of 2) specifies the
4700 // alignment that some part of the
4701 // instruction (not necessarily the
4702 // start) requires. If > 1, a
4703 // compute_padding() function must be
4704 // provided for the instruction
4705
4706 // Whether this node is expanded during code emission into a sequence of
4707 // instructions and the first instruction can perform an implicit null check.
4708 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4709
4710 //----------OPERANDS-----------------------------------------------------------
4711 // Operand definitions must precede instruction definitions for correct parsing
4712 // in the ADLC because operands constitute user defined types which are used in
4713 // instruction definitions.
4714
4715 //----------Simple Operands----------------------------------------------------
4716 // Immediate Operands
4717 // Integer Immediate
4718 operand immI()
4719 %{
4720 match(ConI);
4721
4722 op_cost(10);
4723 format %{ %}
4724 interface(CONST_INTER);
4725 %}
4726
4727 // Constant for test vs zero
4728 operand immI_0()
4729 %{
4730 predicate(n->get_int() == 0);
4731 match(ConI);
4732
4733 op_cost(0);
4734 format %{ %}
4735 interface(CONST_INTER);
4736 %}
4737
4738 // Constant for increment
4739 operand immI_1()
4740 %{
4741 predicate(n->get_int() == 1);
4742 match(ConI);
4743
4744 op_cost(0);
4745 format %{ %}
4746 interface(CONST_INTER);
4747 %}
4748
4749 // Constant for decrement
4750 operand immI_M1()
4751 %{
4752 predicate(n->get_int() == -1);
4753 match(ConI);
4754
4755 op_cost(0);
4756 format %{ %}
4757 interface(CONST_INTER);
4758 %}
4759
4760 operand immI_2()
4761 %{
4762 predicate(n->get_int() == 2);
4763 match(ConI);
4764
4765 op_cost(0);
4766 format %{ %}
4767 interface(CONST_INTER);
4768 %}
4769
4770 operand immI_4()
4771 %{
4772 predicate(n->get_int() == 4);
4773 match(ConI);
4774
4775 op_cost(0);
4776 format %{ %}
4777 interface(CONST_INTER);
4778 %}
4779
4780 operand immI_8()
4781 %{
4782 predicate(n->get_int() == 8);
4783 match(ConI);
4784
4785 op_cost(0);
4786 format %{ %}
4787 interface(CONST_INTER);
4788 %}
4789
4790 // Valid scale values for addressing modes
4791 operand immI2()
4792 %{
4793 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4794 match(ConI);
4795
4796 format %{ %}
4797 interface(CONST_INTER);
4798 %}
4799
4800 operand immU7()
4801 %{
4802 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4803 match(ConI);
4804
4805 op_cost(5);
4806 format %{ %}
4807 interface(CONST_INTER);
4808 %}
4809
4810 operand immI8()
4811 %{
4812 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4813 match(ConI);
4814
4815 op_cost(5);
4816 format %{ %}
4817 interface(CONST_INTER);
4818 %}
4819
4820 operand immU8()
4821 %{
4822 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4823 match(ConI);
4824
4825 op_cost(5);
4826 format %{ %}
4827 interface(CONST_INTER);
4828 %}
4829
4830 operand immI16()
4831 %{
4832 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4833 match(ConI);
4834
4835 op_cost(10);
4836 format %{ %}
4837 interface(CONST_INTER);
4838 %}
4839
4840 // Int Immediate non-negative
4841 operand immU31()
4842 %{
4843 predicate(n->get_int() >= 0);
4844 match(ConI);
4845
4846 op_cost(0);
4847 format %{ %}
4848 interface(CONST_INTER);
4849 %}
4850
4851 // Pointer Immediate
4852 operand immP()
4853 %{
4854 match(ConP);
4855
4856 op_cost(10);
4857 format %{ %}
4858 interface(CONST_INTER);
4859 %}
4860
4861 // Null Pointer Immediate
4862 operand immP0()
4863 %{
4864 predicate(n->get_ptr() == 0);
4865 match(ConP);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 // Pointer Immediate
4873 operand immN() %{
4874 match(ConN);
4875
4876 op_cost(10);
4877 format %{ %}
4878 interface(CONST_INTER);
4879 %}
4880
4881 operand immNKlass() %{
4882 match(ConNKlass);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Null Pointer Immediate
4890 operand immN0() %{
4891 predicate(n->get_narrowcon() == 0);
4892 match(ConN);
4893
4894 op_cost(5);
4895 format %{ %}
4896 interface(CONST_INTER);
4897 %}
4898
4899 operand immP31()
4900 %{
4901 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4902 && (n->get_ptr() >> 31) == 0);
4903 match(ConP);
4904
4905 op_cost(5);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910
4911 // Long Immediate
4912 operand immL()
4913 %{
4914 match(ConL);
4915
4916 op_cost(20);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Long Immediate 8-bit
4922 operand immL8()
4923 %{
4924 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4925 match(ConL);
4926
4927 op_cost(5);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Long Immediate 32-bit unsigned
4933 operand immUL32()
4934 %{
4935 predicate(n->get_long() == (unsigned int) (n->get_long()));
4936 match(ConL);
4937
4938 op_cost(10);
4939 format %{ %}
4940 interface(CONST_INTER);
4941 %}
4942
4943 // Long Immediate 32-bit signed
4944 operand immL32()
4945 %{
4946 predicate(n->get_long() == (int) (n->get_long()));
4947 match(ConL);
4948
4949 op_cost(15);
4950 format %{ %}
4951 interface(CONST_INTER);
4952 %}
4953
4954 operand immL_Pow2()
4955 %{
4956 predicate(is_power_of_2((julong)n->get_long()));
4957 match(ConL);
4958
4959 op_cost(15);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 operand immL_NotPow2()
4965 %{
4966 predicate(is_power_of_2((julong)~n->get_long()));
4967 match(ConL);
4968
4969 op_cost(15);
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 // Long Immediate zero
4975 operand immL0()
4976 %{
4977 predicate(n->get_long() == 0L);
4978 match(ConL);
4979
4980 op_cost(10);
4981 format %{ %}
4982 interface(CONST_INTER);
4983 %}
4984
4985 // Constant for increment
4986 operand immL1()
4987 %{
4988 predicate(n->get_long() == 1);
4989 match(ConL);
4990
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 // Constant for decrement
4996 operand immL_M1()
4997 %{
4998 predicate(n->get_long() == -1);
4999 match(ConL);
5000
5001 format %{ %}
5002 interface(CONST_INTER);
5003 %}
5004
5005 // Long Immediate: low 32-bit mask
5006 operand immL_32bits()
5007 %{
5008 predicate(n->get_long() == 0xFFFFFFFFL);
5009 match(ConL);
5010 op_cost(20);
5011
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 // Int Immediate: 2^n-1, positive
5017 operand immI_Pow2M1()
5018 %{
5019 predicate((n->get_int() > 0)
5020 && is_power_of_2((juint)n->get_int() + 1));
5021 match(ConI);
5022
5023 op_cost(20);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Float Immediate zero
5029 operand immF0()
5030 %{
5031 predicate(jint_cast(n->getf()) == 0);
5032 match(ConF);
5033
5034 op_cost(5);
5035 format %{ %}
5036 interface(CONST_INTER);
5037 %}
5038
5039 // Float Immediate
5040 operand immF()
5041 %{
5042 match(ConF);
5043
5044 op_cost(15);
5045 format %{ %}
5046 interface(CONST_INTER);
5047 %}
5048
5049 // Half Float Immediate
5050 operand immH()
5051 %{
5052 match(ConH);
5053
5054 op_cost(15);
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 // Double Immediate zero
5060 operand immD0()
5061 %{
5062 predicate(jlong_cast(n->getd()) == 0);
5063 match(ConD);
5064
5065 op_cost(5);
5066 format %{ %}
5067 interface(CONST_INTER);
5068 %}
5069
5070 // Double Immediate
5071 operand immD()
5072 %{
5073 match(ConD);
5074
5075 op_cost(15);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Immediates for special shifts (sign extend)
5081
5082 // Constants for increment
5083 operand immI_16()
5084 %{
5085 predicate(n->get_int() == 16);
5086 match(ConI);
5087
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 operand immI_24()
5093 %{
5094 predicate(n->get_int() == 24);
5095 match(ConI);
5096
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Constant for byte-wide masking
5102 operand immI_255()
5103 %{
5104 predicate(n->get_int() == 255);
5105 match(ConI);
5106
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Constant for short-wide masking
5112 operand immI_65535()
5113 %{
5114 predicate(n->get_int() == 65535);
5115 match(ConI);
5116
5117 format %{ %}
5118 interface(CONST_INTER);
5119 %}
5120
5121 // Constant for byte-wide masking
5122 operand immL_255()
5123 %{
5124 predicate(n->get_long() == 255);
5125 match(ConL);
5126
5127 format %{ %}
5128 interface(CONST_INTER);
5129 %}
5130
5131 // Constant for short-wide masking
5132 operand immL_65535()
5133 %{
5134 predicate(n->get_long() == 65535);
5135 match(ConL);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand kReg()
5142 %{
5143 constraint(ALLOC_IN_RC(vectmask_reg));
5144 match(RegVectMask);
5145 format %{%}
5146 interface(REG_INTER);
5147 %}
5148
5149 // Register Operands
5150 // Integer Register
5151 operand rRegI()
5152 %{
5153 constraint(ALLOC_IN_RC(int_reg));
5154 match(RegI);
5155
5156 match(rax_RegI);
5157 match(rbx_RegI);
5158 match(rcx_RegI);
5159 match(rdx_RegI);
5160 match(rdi_RegI);
5161
5162 format %{ %}
5163 interface(REG_INTER);
5164 %}
5165
5166 // Special Registers
5167 operand rax_RegI()
5168 %{
5169 constraint(ALLOC_IN_RC(int_rax_reg));
5170 match(RegI);
5171 match(rRegI);
5172
5173 format %{ "RAX" %}
5174 interface(REG_INTER);
5175 %}
5176
5177 // Special Registers
5178 operand rbx_RegI()
5179 %{
5180 constraint(ALLOC_IN_RC(int_rbx_reg));
5181 match(RegI);
5182 match(rRegI);
5183
5184 format %{ "RBX" %}
5185 interface(REG_INTER);
5186 %}
5187
5188 operand rcx_RegI()
5189 %{
5190 constraint(ALLOC_IN_RC(int_rcx_reg));
5191 match(RegI);
5192 match(rRegI);
5193
5194 format %{ "RCX" %}
5195 interface(REG_INTER);
5196 %}
5197
5198 operand rdx_RegI()
5199 %{
5200 constraint(ALLOC_IN_RC(int_rdx_reg));
5201 match(RegI);
5202 match(rRegI);
5203
5204 format %{ "RDX" %}
5205 interface(REG_INTER);
5206 %}
5207
5208 operand rdi_RegI()
5209 %{
5210 constraint(ALLOC_IN_RC(int_rdi_reg));
5211 match(RegI);
5212 match(rRegI);
5213
5214 format %{ "RDI" %}
5215 interface(REG_INTER);
5216 %}
5217
5218 operand no_rax_rdx_RegI()
5219 %{
5220 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5221 match(RegI);
5222 match(rbx_RegI);
5223 match(rcx_RegI);
5224 match(rdi_RegI);
5225
5226 format %{ %}
5227 interface(REG_INTER);
5228 %}
5229
5230 operand no_rbp_r13_RegI()
5231 %{
5232 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5233 match(RegI);
5234 match(rRegI);
5235 match(rax_RegI);
5236 match(rbx_RegI);
5237 match(rcx_RegI);
5238 match(rdx_RegI);
5239 match(rdi_RegI);
5240
5241 format %{ %}
5242 interface(REG_INTER);
5243 %}
5244
5245 // Pointer Register
5246 operand any_RegP()
5247 %{
5248 constraint(ALLOC_IN_RC(any_reg));
5249 match(RegP);
5250 match(rax_RegP);
5251 match(rbx_RegP);
5252 match(rdi_RegP);
5253 match(rsi_RegP);
5254 match(rbp_RegP);
5255 match(r15_RegP);
5256 match(rRegP);
5257
5258 format %{ %}
5259 interface(REG_INTER);
5260 %}
5261
5262 operand rRegP()
5263 %{
5264 constraint(ALLOC_IN_RC(ptr_reg));
5265 match(RegP);
5266 match(rax_RegP);
5267 match(rbx_RegP);
5268 match(rdi_RegP);
5269 match(rsi_RegP);
5270 match(rbp_RegP); // See Q&A below about
5271 match(r15_RegP); // r15_RegP and rbp_RegP.
5272
5273 format %{ %}
5274 interface(REG_INTER);
5275 %}
5276
5277 operand rRegN() %{
5278 constraint(ALLOC_IN_RC(int_reg));
5279 match(RegN);
5280
5281 format %{ %}
5282 interface(REG_INTER);
5283 %}
5284
5285 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5286 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5287 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5288 // The output of an instruction is controlled by the allocator, which respects
5289 // register class masks, not match rules. Unless an instruction mentions
5290 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5291 // by the allocator as an input.
5292 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5293 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5294 // result, RBP is not included in the output of the instruction either.
5295
5296 // This operand is not allowed to use RBP even if
5297 // RBP is not used to hold the frame pointer.
5298 operand no_rbp_RegP()
5299 %{
5300 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5301 match(RegP);
5302 match(rbx_RegP);
5303 match(rsi_RegP);
5304 match(rdi_RegP);
5305
5306 format %{ %}
5307 interface(REG_INTER);
5308 %}
5309
5310 // Special Registers
5311 // Return a pointer value
5312 operand rax_RegP()
5313 %{
5314 constraint(ALLOC_IN_RC(ptr_rax_reg));
5315 match(RegP);
5316 match(rRegP);
5317
5318 format %{ %}
5319 interface(REG_INTER);
5320 %}
5321
5322 // Special Registers
5323 // Return a compressed pointer value
5324 operand rax_RegN()
5325 %{
5326 constraint(ALLOC_IN_RC(int_rax_reg));
5327 match(RegN);
5328 match(rRegN);
5329
5330 format %{ %}
5331 interface(REG_INTER);
5332 %}
5333
5334 // Used in AtomicAdd
5335 operand rbx_RegP()
5336 %{
5337 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5338 match(RegP);
5339 match(rRegP);
5340
5341 format %{ %}
5342 interface(REG_INTER);
5343 %}
5344
5345 operand rsi_RegP()
5346 %{
5347 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5348 match(RegP);
5349 match(rRegP);
5350
5351 format %{ %}
5352 interface(REG_INTER);
5353 %}
5354
5355 operand rbp_RegP()
5356 %{
5357 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5358 match(RegP);
5359 match(rRegP);
5360
5361 format %{ %}
5362 interface(REG_INTER);
5363 %}
5364
5365 // Used in rep stosq
5366 operand rdi_RegP()
5367 %{
5368 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5369 match(RegP);
5370 match(rRegP);
5371
5372 format %{ %}
5373 interface(REG_INTER);
5374 %}
5375
5376 operand r15_RegP()
5377 %{
5378 constraint(ALLOC_IN_RC(ptr_r15_reg));
5379 match(RegP);
5380 match(rRegP);
5381
5382 format %{ %}
5383 interface(REG_INTER);
5384 %}
5385
5386 operand rRegL()
5387 %{
5388 constraint(ALLOC_IN_RC(long_reg));
5389 match(RegL);
5390 match(rax_RegL);
5391 match(rdx_RegL);
5392
5393 format %{ %}
5394 interface(REG_INTER);
5395 %}
5396
5397 // Special Registers
5398 operand no_rax_rdx_RegL()
5399 %{
5400 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5401 match(RegL);
5402 match(rRegL);
5403
5404 format %{ %}
5405 interface(REG_INTER);
5406 %}
5407
5408 operand rax_RegL()
5409 %{
5410 constraint(ALLOC_IN_RC(long_rax_reg));
5411 match(RegL);
5412 match(rRegL);
5413
5414 format %{ "RAX" %}
5415 interface(REG_INTER);
5416 %}
5417
5418 operand rcx_RegL()
5419 %{
5420 constraint(ALLOC_IN_RC(long_rcx_reg));
5421 match(RegL);
5422 match(rRegL);
5423
5424 format %{ %}
5425 interface(REG_INTER);
5426 %}
5427
5428 operand rdx_RegL()
5429 %{
5430 constraint(ALLOC_IN_RC(long_rdx_reg));
5431 match(RegL);
5432 match(rRegL);
5433
5434 format %{ %}
5435 interface(REG_INTER);
5436 %}
5437
5438 operand r11_RegL()
5439 %{
5440 constraint(ALLOC_IN_RC(long_r11_reg));
5441 match(RegL);
5442 match(rRegL);
5443
5444 format %{ %}
5445 interface(REG_INTER);
5446 %}
5447
5448 operand no_rbp_r13_RegL()
5449 %{
5450 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5451 match(RegL);
5452 match(rRegL);
5453 match(rax_RegL);
5454 match(rcx_RegL);
5455 match(rdx_RegL);
5456
5457 format %{ %}
5458 interface(REG_INTER);
5459 %}
5460
5461 // Flags register, used as output of compare instructions
5462 operand rFlagsReg()
5463 %{
5464 constraint(ALLOC_IN_RC(int_flags));
5465 match(RegFlags);
5466
5467 format %{ "RFLAGS" %}
5468 interface(REG_INTER);
5469 %}
5470
5471 // Flags register, used as output of FLOATING POINT compare instructions
5472 operand rFlagsRegU()
5473 %{
5474 constraint(ALLOC_IN_RC(int_flags));
5475 match(RegFlags);
5476
5477 format %{ "RFLAGS_U" %}
5478 interface(REG_INTER);
5479 %}
5480
5481 operand rFlagsRegUCF() %{
5482 constraint(ALLOC_IN_RC(int_flags));
5483 match(RegFlags);
5484 predicate(false);
5485
5486 format %{ "RFLAGS_U_CF" %}
5487 interface(REG_INTER);
5488 %}
5489
5490 // Float register operands
5491 operand regF() %{
5492 constraint(ALLOC_IN_RC(float_reg));
5493 match(RegF);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 // Float register operands
5500 operand legRegF() %{
5501 constraint(ALLOC_IN_RC(float_reg_legacy));
5502 match(RegF);
5503
5504 format %{ %}
5505 interface(REG_INTER);
5506 %}
5507
5508 // Float register operands
5509 operand vlRegF() %{
5510 constraint(ALLOC_IN_RC(float_reg_vl));
5511 match(RegF);
5512
5513 format %{ %}
5514 interface(REG_INTER);
5515 %}
5516
5517 // Double register operands
5518 operand regD() %{
5519 constraint(ALLOC_IN_RC(double_reg));
5520 match(RegD);
5521
5522 format %{ %}
5523 interface(REG_INTER);
5524 %}
5525
5526 // Double register operands
5527 operand legRegD() %{
5528 constraint(ALLOC_IN_RC(double_reg_legacy));
5529 match(RegD);
5530
5531 format %{ %}
5532 interface(REG_INTER);
5533 %}
5534
5535 // Double register operands
5536 operand vlRegD() %{
5537 constraint(ALLOC_IN_RC(double_reg_vl));
5538 match(RegD);
5539
5540 format %{ %}
5541 interface(REG_INTER);
5542 %}
5543
5544 //----------Memory Operands----------------------------------------------------
5545 // Direct Memory Operand
5546 // operand direct(immP addr)
5547 // %{
5548 // match(addr);
5549
5550 // format %{ "[$addr]" %}
5551 // interface(MEMORY_INTER) %{
5552 // base(0xFFFFFFFF);
5553 // index(0x4);
5554 // scale(0x0);
5555 // disp($addr);
5556 // %}
5557 // %}
5558
5559 // Indirect Memory Operand
5560 operand indirect(any_RegP reg)
5561 %{
5562 constraint(ALLOC_IN_RC(ptr_reg));
5563 match(reg);
5564
5565 format %{ "[$reg]" %}
5566 interface(MEMORY_INTER) %{
5567 base($reg);
5568 index(0x4);
5569 scale(0x0);
5570 disp(0x0);
5571 %}
5572 %}
5573
5574 // Indirect Memory Plus Short Offset Operand
5575 operand indOffset8(any_RegP reg, immL8 off)
5576 %{
5577 constraint(ALLOC_IN_RC(ptr_reg));
5578 match(AddP reg off);
5579
5580 format %{ "[$reg + $off (8-bit)]" %}
5581 interface(MEMORY_INTER) %{
5582 base($reg);
5583 index(0x4);
5584 scale(0x0);
5585 disp($off);
5586 %}
5587 %}
5588
5589 // Indirect Memory Plus Long Offset Operand
5590 operand indOffset32(any_RegP reg, immL32 off)
5591 %{
5592 constraint(ALLOC_IN_RC(ptr_reg));
5593 match(AddP reg off);
5594
5595 format %{ "[$reg + $off (32-bit)]" %}
5596 interface(MEMORY_INTER) %{
5597 base($reg);
5598 index(0x4);
5599 scale(0x0);
5600 disp($off);
5601 %}
5602 %}
5603
5604 // Indirect Memory Plus Index Register Plus Offset Operand
5605 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5606 %{
5607 constraint(ALLOC_IN_RC(ptr_reg));
5608 match(AddP (AddP reg lreg) off);
5609
5610 op_cost(10);
5611 format %{"[$reg + $off + $lreg]" %}
5612 interface(MEMORY_INTER) %{
5613 base($reg);
5614 index($lreg);
5615 scale(0x0);
5616 disp($off);
5617 %}
5618 %}
5619
5620 // Indirect Memory Plus Index Register Plus Offset Operand
5621 operand indIndex(any_RegP reg, rRegL lreg)
5622 %{
5623 constraint(ALLOC_IN_RC(ptr_reg));
5624 match(AddP reg lreg);
5625
5626 op_cost(10);
5627 format %{"[$reg + $lreg]" %}
5628 interface(MEMORY_INTER) %{
5629 base($reg);
5630 index($lreg);
5631 scale(0x0);
5632 disp(0x0);
5633 %}
5634 %}
5635
5636 // Indirect Memory Times Scale Plus Index Register
5637 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5638 %{
5639 constraint(ALLOC_IN_RC(ptr_reg));
5640 match(AddP reg (LShiftL lreg scale));
5641
5642 op_cost(10);
5643 format %{"[$reg + $lreg << $scale]" %}
5644 interface(MEMORY_INTER) %{
5645 base($reg);
5646 index($lreg);
5647 scale($scale);
5648 disp(0x0);
5649 %}
5650 %}
5651
5652 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5653 %{
5654 constraint(ALLOC_IN_RC(ptr_reg));
5655 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5656 match(AddP reg (LShiftL (ConvI2L idx) scale));
5657
5658 op_cost(10);
5659 format %{"[$reg + pos $idx << $scale]" %}
5660 interface(MEMORY_INTER) %{
5661 base($reg);
5662 index($idx);
5663 scale($scale);
5664 disp(0x0);
5665 %}
5666 %}
5667
5668 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5669 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(AddP (AddP reg (LShiftL lreg scale)) off);
5673
5674 op_cost(10);
5675 format %{"[$reg + $off + $lreg << $scale]" %}
5676 interface(MEMORY_INTER) %{
5677 base($reg);
5678 index($lreg);
5679 scale($scale);
5680 disp($off);
5681 %}
5682 %}
5683
5684 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5685 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5686 %{
5687 constraint(ALLOC_IN_RC(ptr_reg));
5688 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5689 match(AddP (AddP reg (ConvI2L idx)) off);
5690
5691 op_cost(10);
5692 format %{"[$reg + $off + $idx]" %}
5693 interface(MEMORY_INTER) %{
5694 base($reg);
5695 index($idx);
5696 scale(0x0);
5697 disp($off);
5698 %}
5699 %}
5700
5701 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5702 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5703 %{
5704 constraint(ALLOC_IN_RC(ptr_reg));
5705 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5706 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5707
5708 op_cost(10);
5709 format %{"[$reg + $off + $idx << $scale]" %}
5710 interface(MEMORY_INTER) %{
5711 base($reg);
5712 index($idx);
5713 scale($scale);
5714 disp($off);
5715 %}
5716 %}
5717
5718 // Indirect Narrow Oop Plus Offset Operand
5719 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5720 // we can't free r12 even with CompressedOops::base() == nullptr.
5721 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5722 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5723 constraint(ALLOC_IN_RC(ptr_reg));
5724 match(AddP (DecodeN reg) off);
5725
5726 op_cost(10);
5727 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5728 interface(MEMORY_INTER) %{
5729 base(0xc); // R12
5730 index($reg);
5731 scale(0x3);
5732 disp($off);
5733 %}
5734 %}
5735
5736 // Indirect Memory Operand
5737 operand indirectNarrow(rRegN reg)
5738 %{
5739 predicate(CompressedOops::shift() == 0);
5740 constraint(ALLOC_IN_RC(ptr_reg));
5741 match(DecodeN reg);
5742
5743 format %{ "[$reg]" %}
5744 interface(MEMORY_INTER) %{
5745 base($reg);
5746 index(0x4);
5747 scale(0x0);
5748 disp(0x0);
5749 %}
5750 %}
5751
5752 // Indirect Memory Plus Short Offset Operand
5753 operand indOffset8Narrow(rRegN reg, immL8 off)
5754 %{
5755 predicate(CompressedOops::shift() == 0);
5756 constraint(ALLOC_IN_RC(ptr_reg));
5757 match(AddP (DecodeN reg) off);
5758
5759 format %{ "[$reg + $off (8-bit)]" %}
5760 interface(MEMORY_INTER) %{
5761 base($reg);
5762 index(0x4);
5763 scale(0x0);
5764 disp($off);
5765 %}
5766 %}
5767
5768 // Indirect Memory Plus Long Offset Operand
5769 operand indOffset32Narrow(rRegN reg, immL32 off)
5770 %{
5771 predicate(CompressedOops::shift() == 0);
5772 constraint(ALLOC_IN_RC(ptr_reg));
5773 match(AddP (DecodeN reg) off);
5774
5775 format %{ "[$reg + $off (32-bit)]" %}
5776 interface(MEMORY_INTER) %{
5777 base($reg);
5778 index(0x4);
5779 scale(0x0);
5780 disp($off);
5781 %}
5782 %}
5783
5784 // Indirect Memory Plus Index Register Plus Offset Operand
5785 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5786 %{
5787 predicate(CompressedOops::shift() == 0);
5788 constraint(ALLOC_IN_RC(ptr_reg));
5789 match(AddP (AddP (DecodeN reg) lreg) off);
5790
5791 op_cost(10);
5792 format %{"[$reg + $off + $lreg]" %}
5793 interface(MEMORY_INTER) %{
5794 base($reg);
5795 index($lreg);
5796 scale(0x0);
5797 disp($off);
5798 %}
5799 %}
5800
5801 // Indirect Memory Plus Index Register Plus Offset Operand
5802 operand indIndexNarrow(rRegN reg, rRegL lreg)
5803 %{
5804 predicate(CompressedOops::shift() == 0);
5805 constraint(ALLOC_IN_RC(ptr_reg));
5806 match(AddP (DecodeN reg) lreg);
5807
5808 op_cost(10);
5809 format %{"[$reg + $lreg]" %}
5810 interface(MEMORY_INTER) %{
5811 base($reg);
5812 index($lreg);
5813 scale(0x0);
5814 disp(0x0);
5815 %}
5816 %}
5817
5818 // Indirect Memory Times Scale Plus Index Register
5819 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5820 %{
5821 predicate(CompressedOops::shift() == 0);
5822 constraint(ALLOC_IN_RC(ptr_reg));
5823 match(AddP (DecodeN reg) (LShiftL lreg scale));
5824
5825 op_cost(10);
5826 format %{"[$reg + $lreg << $scale]" %}
5827 interface(MEMORY_INTER) %{
5828 base($reg);
5829 index($lreg);
5830 scale($scale);
5831 disp(0x0);
5832 %}
5833 %}
5834
5835 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5836 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5837 %{
5838 predicate(CompressedOops::shift() == 0);
5839 constraint(ALLOC_IN_RC(ptr_reg));
5840 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5841
5842 op_cost(10);
5843 format %{"[$reg + $off + $lreg << $scale]" %}
5844 interface(MEMORY_INTER) %{
5845 base($reg);
5846 index($lreg);
5847 scale($scale);
5848 disp($off);
5849 %}
5850 %}
5851
5852 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5853 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5854 %{
5855 constraint(ALLOC_IN_RC(ptr_reg));
5856 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5857 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5858
5859 op_cost(10);
5860 format %{"[$reg + $off + $idx]" %}
5861 interface(MEMORY_INTER) %{
5862 base($reg);
5863 index($idx);
5864 scale(0x0);
5865 disp($off);
5866 %}
5867 %}
5868
5869 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5870 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5871 %{
5872 constraint(ALLOC_IN_RC(ptr_reg));
5873 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5874 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5875
5876 op_cost(10);
5877 format %{"[$reg + $off + $idx << $scale]" %}
5878 interface(MEMORY_INTER) %{
5879 base($reg);
5880 index($idx);
5881 scale($scale);
5882 disp($off);
5883 %}
5884 %}
5885
5886 //----------Special Memory Operands--------------------------------------------
5887 // Stack Slot Operand - This operand is used for loading and storing temporary
5888 // values on the stack where a match requires a value to
5889 // flow through memory.
5890 operand stackSlotP(sRegP reg)
5891 %{
5892 constraint(ALLOC_IN_RC(stack_slots));
5893 // No match rule because this operand is only generated in matching
5894
5895 format %{ "[$reg]" %}
5896 interface(MEMORY_INTER) %{
5897 base(0x4); // RSP
5898 index(0x4); // No Index
5899 scale(0x0); // No Scale
5900 disp($reg); // Stack Offset
5901 %}
5902 %}
5903
5904 operand stackSlotI(sRegI reg)
5905 %{
5906 constraint(ALLOC_IN_RC(stack_slots));
5907 // No match rule because this operand is only generated in matching
5908
5909 format %{ "[$reg]" %}
5910 interface(MEMORY_INTER) %{
5911 base(0x4); // RSP
5912 index(0x4); // No Index
5913 scale(0x0); // No Scale
5914 disp($reg); // Stack Offset
5915 %}
5916 %}
5917
5918 operand stackSlotF(sRegF reg)
5919 %{
5920 constraint(ALLOC_IN_RC(stack_slots));
5921 // No match rule because this operand is only generated in matching
5922
5923 format %{ "[$reg]" %}
5924 interface(MEMORY_INTER) %{
5925 base(0x4); // RSP
5926 index(0x4); // No Index
5927 scale(0x0); // No Scale
5928 disp($reg); // Stack Offset
5929 %}
5930 %}
5931
5932 operand stackSlotD(sRegD reg)
5933 %{
5934 constraint(ALLOC_IN_RC(stack_slots));
5935 // No match rule because this operand is only generated in matching
5936
5937 format %{ "[$reg]" %}
5938 interface(MEMORY_INTER) %{
5939 base(0x4); // RSP
5940 index(0x4); // No Index
5941 scale(0x0); // No Scale
5942 disp($reg); // Stack Offset
5943 %}
5944 %}
5945 operand stackSlotL(sRegL reg)
5946 %{
5947 constraint(ALLOC_IN_RC(stack_slots));
5948 // No match rule because this operand is only generated in matching
5949
5950 format %{ "[$reg]" %}
5951 interface(MEMORY_INTER) %{
5952 base(0x4); // RSP
5953 index(0x4); // No Index
5954 scale(0x0); // No Scale
5955 disp($reg); // Stack Offset
5956 %}
5957 %}
5958
5959 //----------Conditional Branch Operands----------------------------------------
5960 // Comparison Op - This is the operation of the comparison, and is limited to
5961 // the following set of codes:
5962 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5963 //
5964 // Other attributes of the comparison, such as unsignedness, are specified
5965 // by the comparison instruction that sets a condition code flags register.
5966 // That result is represented by a flags operand whose subtype is appropriate
5967 // to the unsignedness (etc.) of the comparison.
5968 //
5969 // Later, the instruction which matches both the Comparison Op (a Bool) and
5970 // the flags (produced by the Cmp) specifies the coding of the comparison op
5971 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5972
5973 // Comparison Code
5974 operand cmpOp()
5975 %{
5976 match(Bool);
5977
5978 format %{ "" %}
5979 interface(COND_INTER) %{
5980 equal(0x4, "e");
5981 not_equal(0x5, "ne");
5982 less(0xC, "l");
5983 greater_equal(0xD, "ge");
5984 less_equal(0xE, "le");
5985 greater(0xF, "g");
5986 overflow(0x0, "o");
5987 no_overflow(0x1, "no");
5988 %}
5989 %}
5990
5991 // Comparison Code, unsigned compare. Used by FP also, with
5992 // C2 (unordered) turned into GT or LT already. The other bits
5993 // C0 and C3 are turned into Carry & Zero flags.
5994 operand cmpOpU()
5995 %{
5996 match(Bool);
5997
5998 format %{ "" %}
5999 interface(COND_INTER) %{
6000 equal(0x4, "e");
6001 not_equal(0x5, "ne");
6002 less(0x2, "b");
6003 greater_equal(0x3, "ae");
6004 less_equal(0x6, "be");
6005 greater(0x7, "a");
6006 overflow(0x0, "o");
6007 no_overflow(0x1, "no");
6008 %}
6009 %}
6010
6011
6012 // Floating comparisons that don't require any fixup for the unordered case,
6013 // If both inputs of the comparison are the same, ZF is always set so we
6014 // don't need to use cmpOpUCF2 for eq/ne
6015 operand cmpOpUCF() %{
6016 match(Bool);
6017 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6018 n->as_Bool()->_test._test == BoolTest::ge ||
6019 n->as_Bool()->_test._test == BoolTest::le ||
6020 n->as_Bool()->_test._test == BoolTest::gt ||
6021 n->in(1)->in(1) == n->in(1)->in(2));
6022 format %{ "" %}
6023 interface(COND_INTER) %{
6024 equal(0xb, "np");
6025 not_equal(0xa, "p");
6026 less(0x2, "b");
6027 greater_equal(0x3, "ae");
6028 less_equal(0x6, "be");
6029 greater(0x7, "a");
6030 overflow(0x0, "o");
6031 no_overflow(0x1, "no");
6032 %}
6033 %}
6034
6035
6036 // Floating comparisons that can be fixed up with extra conditional jumps
6037 operand cmpOpUCF2() %{
6038 match(Bool);
6039 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6040 n->as_Bool()->_test._test == BoolTest::eq) &&
6041 n->in(1)->in(1) != n->in(1)->in(2));
6042 format %{ "" %}
6043 interface(COND_INTER) %{
6044 equal(0x4, "e");
6045 not_equal(0x5, "ne");
6046 less(0x2, "b");
6047 greater_equal(0x3, "ae");
6048 less_equal(0x6, "be");
6049 greater(0x7, "a");
6050 overflow(0x0, "o");
6051 no_overflow(0x1, "no");
6052 %}
6053 %}
6054
6055 // Operands for bound floating pointer register arguments
6056 operand rxmm0() %{
6057 constraint(ALLOC_IN_RC(xmm0_reg));
6058 match(VecX);
6059 format%{%}
6060 interface(REG_INTER);
6061 %}
6062
6063 // Vectors
6064
6065 // Dummy generic vector class. Should be used for all vector operands.
6066 // Replaced with vec[SDXYZ] during post-selection pass.
6067 operand vec() %{
6068 constraint(ALLOC_IN_RC(dynamic));
6069 match(VecX);
6070 match(VecY);
6071 match(VecZ);
6072 match(VecS);
6073 match(VecD);
6074
6075 format %{ %}
6076 interface(REG_INTER);
6077 %}
6078
6079 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6080 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6081 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6082 // runtime code generation via reg_class_dynamic.
6083 operand legVec() %{
6084 constraint(ALLOC_IN_RC(dynamic));
6085 match(VecX);
6086 match(VecY);
6087 match(VecZ);
6088 match(VecS);
6089 match(VecD);
6090
6091 format %{ %}
6092 interface(REG_INTER);
6093 %}
6094
6095 // Replaces vec during post-selection cleanup. See above.
6096 operand vecS() %{
6097 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6098 match(VecS);
6099
6100 format %{ %}
6101 interface(REG_INTER);
6102 %}
6103
6104 // Replaces legVec during post-selection cleanup. See above.
6105 operand legVecS() %{
6106 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6107 match(VecS);
6108
6109 format %{ %}
6110 interface(REG_INTER);
6111 %}
6112
6113 // Replaces vec during post-selection cleanup. See above.
6114 operand vecD() %{
6115 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6116 match(VecD);
6117
6118 format %{ %}
6119 interface(REG_INTER);
6120 %}
6121
6122 // Replaces legVec during post-selection cleanup. See above.
6123 operand legVecD() %{
6124 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6125 match(VecD);
6126
6127 format %{ %}
6128 interface(REG_INTER);
6129 %}
6130
6131 // Replaces vec during post-selection cleanup. See above.
6132 operand vecX() %{
6133 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6134 match(VecX);
6135
6136 format %{ %}
6137 interface(REG_INTER);
6138 %}
6139
6140 // Replaces legVec during post-selection cleanup. See above.
6141 operand legVecX() %{
6142 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6143 match(VecX);
6144
6145 format %{ %}
6146 interface(REG_INTER);
6147 %}
6148
6149 // Replaces vec during post-selection cleanup. See above.
6150 operand vecY() %{
6151 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6152 match(VecY);
6153
6154 format %{ %}
6155 interface(REG_INTER);
6156 %}
6157
6158 // Replaces legVec during post-selection cleanup. See above.
6159 operand legVecY() %{
6160 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6161 match(VecY);
6162
6163 format %{ %}
6164 interface(REG_INTER);
6165 %}
6166
6167 // Replaces vec during post-selection cleanup. See above.
6168 operand vecZ() %{
6169 constraint(ALLOC_IN_RC(vectorz_reg));
6170 match(VecZ);
6171
6172 format %{ %}
6173 interface(REG_INTER);
6174 %}
6175
6176 // Replaces legVec during post-selection cleanup. See above.
6177 operand legVecZ() %{
6178 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6179 match(VecZ);
6180
6181 format %{ %}
6182 interface(REG_INTER);
6183 %}
6184
6185 //----------OPERAND CLASSES----------------------------------------------------
6186 // Operand Classes are groups of operands that are used as to simplify
6187 // instruction definitions by not requiring the AD writer to specify separate
6188 // instructions for every form of operand when the instruction accepts
6189 // multiple operand types with the same basic encoding and format. The classic
6190 // case of this is memory operands.
6191
6192 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6193 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6194 indCompressedOopOffset,
6195 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6196 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6197 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6198
6199 //----------PIPELINE-----------------------------------------------------------
6200 // Rules which define the behavior of the target architectures pipeline.
6201 pipeline %{
6202
6203 //----------ATTRIBUTES---------------------------------------------------------
6204 attributes %{
6205 variable_size_instructions; // Fixed size instructions
6206 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6207 instruction_unit_size = 1; // An instruction is 1 bytes long
6208 instruction_fetch_unit_size = 16; // The processor fetches one line
6209 instruction_fetch_units = 1; // of 16 bytes
6210 %}
6211
6212 //----------RESOURCES----------------------------------------------------------
6213 // Resources are the functional units available to the machine
6214
6215 // Generic P2/P3 pipeline
6216 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6217 // 3 instructions decoded per cycle.
6218 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6219 // 3 ALU op, only ALU0 handles mul instructions.
6220 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6221 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6222 BR, FPU,
6223 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6224
6225 //----------PIPELINE DESCRIPTION-----------------------------------------------
6226 // Pipeline Description specifies the stages in the machine's pipeline
6227
6228 // Generic P2/P3 pipeline
6229 pipe_desc(S0, S1, S2, S3, S4, S5);
6230
6231 //----------PIPELINE CLASSES---------------------------------------------------
6232 // Pipeline Classes describe the stages in which input and output are
6233 // referenced by the hardware pipeline.
6234
6235 // Naming convention: ialu or fpu
6236 // Then: _reg
6237 // Then: _reg if there is a 2nd register
6238 // Then: _long if it's a pair of instructions implementing a long
6239 // Then: _fat if it requires the big decoder
6240 // Or: _mem if it requires the big decoder and a memory unit.
6241
6242 // Integer ALU reg operation
6243 pipe_class ialu_reg(rRegI dst)
6244 %{
6245 single_instruction;
6246 dst : S4(write);
6247 dst : S3(read);
6248 DECODE : S0; // any decoder
6249 ALU : S3; // any alu
6250 %}
6251
6252 // Long ALU reg operation
6253 pipe_class ialu_reg_long(rRegL dst)
6254 %{
6255 instruction_count(2);
6256 dst : S4(write);
6257 dst : S3(read);
6258 DECODE : S0(2); // any 2 decoders
6259 ALU : S3(2); // both alus
6260 %}
6261
6262 // Integer ALU reg operation using big decoder
6263 pipe_class ialu_reg_fat(rRegI dst)
6264 %{
6265 single_instruction;
6266 dst : S4(write);
6267 dst : S3(read);
6268 D0 : S0; // big decoder only
6269 ALU : S3; // any alu
6270 %}
6271
6272 // Integer ALU reg-reg operation
6273 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6274 %{
6275 single_instruction;
6276 dst : S4(write);
6277 src : S3(read);
6278 DECODE : S0; // any decoder
6279 ALU : S3; // any alu
6280 %}
6281
6282 // Integer ALU reg-reg operation
6283 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6284 %{
6285 single_instruction;
6286 dst : S4(write);
6287 src : S3(read);
6288 D0 : S0; // big decoder only
6289 ALU : S3; // any alu
6290 %}
6291
6292 // Integer ALU reg-mem operation
6293 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6294 %{
6295 single_instruction;
6296 dst : S5(write);
6297 mem : S3(read);
6298 D0 : S0; // big decoder only
6299 ALU : S4; // any alu
6300 MEM : S3; // any mem
6301 %}
6302
6303 // Integer mem operation (prefetch)
6304 pipe_class ialu_mem(memory mem)
6305 %{
6306 single_instruction;
6307 mem : S3(read);
6308 D0 : S0; // big decoder only
6309 MEM : S3; // any mem
6310 %}
6311
6312 // Integer Store to Memory
6313 pipe_class ialu_mem_reg(memory mem, rRegI src)
6314 %{
6315 single_instruction;
6316 mem : S3(read);
6317 src : S5(read);
6318 D0 : S0; // big decoder only
6319 ALU : S4; // any alu
6320 MEM : S3;
6321 %}
6322
6323 // // Long Store to Memory
6324 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6325 // %{
6326 // instruction_count(2);
6327 // mem : S3(read);
6328 // src : S5(read);
6329 // D0 : S0(2); // big decoder only; twice
6330 // ALU : S4(2); // any 2 alus
6331 // MEM : S3(2); // Both mems
6332 // %}
6333
6334 // Integer Store to Memory
6335 pipe_class ialu_mem_imm(memory mem)
6336 %{
6337 single_instruction;
6338 mem : S3(read);
6339 D0 : S0; // big decoder only
6340 ALU : S4; // any alu
6341 MEM : S3;
6342 %}
6343
6344 // Integer ALU0 reg-reg operation
6345 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6346 %{
6347 single_instruction;
6348 dst : S4(write);
6349 src : S3(read);
6350 D0 : S0; // Big decoder only
6351 ALU0 : S3; // only alu0
6352 %}
6353
6354 // Integer ALU0 reg-mem operation
6355 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6356 %{
6357 single_instruction;
6358 dst : S5(write);
6359 mem : S3(read);
6360 D0 : S0; // big decoder only
6361 ALU0 : S4; // ALU0 only
6362 MEM : S3; // any mem
6363 %}
6364
6365 // Integer ALU reg-reg operation
6366 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6367 %{
6368 single_instruction;
6369 cr : S4(write);
6370 src1 : S3(read);
6371 src2 : S3(read);
6372 DECODE : S0; // any decoder
6373 ALU : S3; // any alu
6374 %}
6375
6376 // Integer ALU reg-imm operation
6377 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6378 %{
6379 single_instruction;
6380 cr : S4(write);
6381 src1 : S3(read);
6382 DECODE : S0; // any decoder
6383 ALU : S3; // any alu
6384 %}
6385
6386 // Integer ALU reg-mem operation
6387 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6388 %{
6389 single_instruction;
6390 cr : S4(write);
6391 src1 : S3(read);
6392 src2 : S3(read);
6393 D0 : S0; // big decoder only
6394 ALU : S4; // any alu
6395 MEM : S3;
6396 %}
6397
6398 // Conditional move reg-reg
6399 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6400 %{
6401 instruction_count(4);
6402 y : S4(read);
6403 q : S3(read);
6404 p : S3(read);
6405 DECODE : S0(4); // any decoder
6406 %}
6407
6408 // Conditional move reg-reg
6409 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6410 %{
6411 single_instruction;
6412 dst : S4(write);
6413 src : S3(read);
6414 cr : S3(read);
6415 DECODE : S0; // any decoder
6416 %}
6417
6418 // Conditional move reg-mem
6419 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6420 %{
6421 single_instruction;
6422 dst : S4(write);
6423 src : S3(read);
6424 cr : S3(read);
6425 DECODE : S0; // any decoder
6426 MEM : S3;
6427 %}
6428
6429 // Conditional move reg-reg long
6430 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6431 %{
6432 single_instruction;
6433 dst : S4(write);
6434 src : S3(read);
6435 cr : S3(read);
6436 DECODE : S0(2); // any 2 decoders
6437 %}
6438
6439 // Float reg-reg operation
6440 pipe_class fpu_reg(regD dst)
6441 %{
6442 instruction_count(2);
6443 dst : S3(read);
6444 DECODE : S0(2); // any 2 decoders
6445 FPU : S3;
6446 %}
6447
6448 // Float reg-reg operation
6449 pipe_class fpu_reg_reg(regD dst, regD src)
6450 %{
6451 instruction_count(2);
6452 dst : S4(write);
6453 src : S3(read);
6454 DECODE : S0(2); // any 2 decoders
6455 FPU : S3;
6456 %}
6457
6458 // Float reg-reg operation
6459 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6460 %{
6461 instruction_count(3);
6462 dst : S4(write);
6463 src1 : S3(read);
6464 src2 : S3(read);
6465 DECODE : S0(3); // any 3 decoders
6466 FPU : S3(2);
6467 %}
6468
6469 // Float reg-reg operation
6470 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6471 %{
6472 instruction_count(4);
6473 dst : S4(write);
6474 src1 : S3(read);
6475 src2 : S3(read);
6476 src3 : S3(read);
6477 DECODE : S0(4); // any 3 decoders
6478 FPU : S3(2);
6479 %}
6480
6481 // Float reg-reg operation
6482 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6483 %{
6484 instruction_count(4);
6485 dst : S4(write);
6486 src1 : S3(read);
6487 src2 : S3(read);
6488 src3 : S3(read);
6489 DECODE : S1(3); // any 3 decoders
6490 D0 : S0; // Big decoder only
6491 FPU : S3(2);
6492 MEM : S3;
6493 %}
6494
6495 // Float reg-mem operation
6496 pipe_class fpu_reg_mem(regD dst, memory mem)
6497 %{
6498 instruction_count(2);
6499 dst : S5(write);
6500 mem : S3(read);
6501 D0 : S0; // big decoder only
6502 DECODE : S1; // any decoder for FPU POP
6503 FPU : S4;
6504 MEM : S3; // any mem
6505 %}
6506
6507 // Float reg-mem operation
6508 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6509 %{
6510 instruction_count(3);
6511 dst : S5(write);
6512 src1 : S3(read);
6513 mem : S3(read);
6514 D0 : S0; // big decoder only
6515 DECODE : S1(2); // any decoder for FPU POP
6516 FPU : S4;
6517 MEM : S3; // any mem
6518 %}
6519
6520 // Float mem-reg operation
6521 pipe_class fpu_mem_reg(memory mem, regD src)
6522 %{
6523 instruction_count(2);
6524 src : S5(read);
6525 mem : S3(read);
6526 DECODE : S0; // any decoder for FPU PUSH
6527 D0 : S1; // big decoder only
6528 FPU : S4;
6529 MEM : S3; // any mem
6530 %}
6531
6532 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6533 %{
6534 instruction_count(3);
6535 src1 : S3(read);
6536 src2 : S3(read);
6537 mem : S3(read);
6538 DECODE : S0(2); // any decoder for FPU PUSH
6539 D0 : S1; // big decoder only
6540 FPU : S4;
6541 MEM : S3; // any mem
6542 %}
6543
6544 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6545 %{
6546 instruction_count(3);
6547 src1 : S3(read);
6548 src2 : S3(read);
6549 mem : S4(read);
6550 DECODE : S0; // any decoder for FPU PUSH
6551 D0 : S0(2); // big decoder only
6552 FPU : S4;
6553 MEM : S3(2); // any mem
6554 %}
6555
6556 pipe_class fpu_mem_mem(memory dst, memory src1)
6557 %{
6558 instruction_count(2);
6559 src1 : S3(read);
6560 dst : S4(read);
6561 D0 : S0(2); // big decoder only
6562 MEM : S3(2); // any mem
6563 %}
6564
6565 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6566 %{
6567 instruction_count(3);
6568 src1 : S3(read);
6569 src2 : S3(read);
6570 dst : S4(read);
6571 D0 : S0(3); // big decoder only
6572 FPU : S4;
6573 MEM : S3(3); // any mem
6574 %}
6575
6576 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6577 %{
6578 instruction_count(3);
6579 src1 : S4(read);
6580 mem : S4(read);
6581 DECODE : S0; // any decoder for FPU PUSH
6582 D0 : S0(2); // big decoder only
6583 FPU : S4;
6584 MEM : S3(2); // any mem
6585 %}
6586
6587 // Float load constant
6588 pipe_class fpu_reg_con(regD dst)
6589 %{
6590 instruction_count(2);
6591 dst : S5(write);
6592 D0 : S0; // big decoder only for the load
6593 DECODE : S1; // any decoder for FPU POP
6594 FPU : S4;
6595 MEM : S3; // any mem
6596 %}
6597
6598 // Float load constant
6599 pipe_class fpu_reg_reg_con(regD dst, regD src)
6600 %{
6601 instruction_count(3);
6602 dst : S5(write);
6603 src : S3(read);
6604 D0 : S0; // big decoder only for the load
6605 DECODE : S1(2); // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // UnConditional branch
6611 pipe_class pipe_jmp(label labl)
6612 %{
6613 single_instruction;
6614 BR : S3;
6615 %}
6616
6617 // Conditional branch
6618 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6619 %{
6620 single_instruction;
6621 cr : S1(read);
6622 BR : S3;
6623 %}
6624
6625 // Allocation idiom
6626 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6627 %{
6628 instruction_count(1); force_serialization;
6629 fixed_latency(6);
6630 heap_ptr : S3(read);
6631 DECODE : S0(3);
6632 D0 : S2;
6633 MEM : S3;
6634 ALU : S3(2);
6635 dst : S5(write);
6636 BR : S5;
6637 %}
6638
6639 // Generic big/slow expanded idiom
6640 pipe_class pipe_slow()
6641 %{
6642 instruction_count(10); multiple_bundles; force_serialization;
6643 fixed_latency(100);
6644 D0 : S0(2);
6645 MEM : S3(2);
6646 %}
6647
6648 // The real do-nothing guy
6649 pipe_class empty()
6650 %{
6651 instruction_count(0);
6652 %}
6653
6654 // Define the class for the Nop node
6655 define
6656 %{
6657 MachNop = empty;
6658 %}
6659
6660 %}
6661
6662 //----------INSTRUCTIONS-------------------------------------------------------
6663 //
6664 // match -- States which machine-independent subtree may be replaced
6665 // by this instruction.
6666 // ins_cost -- The estimated cost of this instruction is used by instruction
6667 // selection to identify a minimum cost tree of machine
6668 // instructions that matches a tree of machine-independent
6669 // instructions.
6670 // format -- A string providing the disassembly for this instruction.
6671 // The value of an instruction's operand may be inserted
6672 // by referring to it with a '$' prefix.
6673 // opcode -- Three instruction opcodes may be provided. These are referred
6674 // to within an encode class as $primary, $secondary, and $tertiary
6675 // rrspectively. The primary opcode is commonly used to
6676 // indicate the type of machine instruction, while secondary
6677 // and tertiary are often used for prefix options or addressing
6678 // modes.
6679 // ins_encode -- A list of encode classes with parameters. The encode class
6680 // name must have been defined in an 'enc_class' specification
6681 // in the encode section of the architecture description.
6682
6683 // ============================================================================
6684
6685 instruct ShouldNotReachHere() %{
6686 match(Halt);
6687 format %{ "stop\t# ShouldNotReachHere" %}
6688 ins_encode %{
6689 if (is_reachable()) {
6690 const char* str = __ code_string(_halt_reason);
6691 __ stop(str);
6692 }
6693 %}
6694 ins_pipe(pipe_slow);
6695 %}
6696
6697 // ============================================================================
6698
6699 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6700 // Load Float
6701 instruct MoveF2VL(vlRegF dst, regF src) %{
6702 match(Set dst src);
6703 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6704 ins_encode %{
6705 ShouldNotReachHere();
6706 %}
6707 ins_pipe( fpu_reg_reg );
6708 %}
6709
6710 // Load Float
6711 instruct MoveF2LEG(legRegF dst, regF src) %{
6712 match(Set dst src);
6713 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6714 ins_encode %{
6715 ShouldNotReachHere();
6716 %}
6717 ins_pipe( fpu_reg_reg );
6718 %}
6719
6720 // Load Float
6721 instruct MoveVL2F(regF dst, vlRegF src) %{
6722 match(Set dst src);
6723 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6724 ins_encode %{
6725 ShouldNotReachHere();
6726 %}
6727 ins_pipe( fpu_reg_reg );
6728 %}
6729
6730 // Load Float
6731 instruct MoveLEG2F(regF dst, legRegF src) %{
6732 match(Set dst src);
6733 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6734 ins_encode %{
6735 ShouldNotReachHere();
6736 %}
6737 ins_pipe( fpu_reg_reg );
6738 %}
6739
6740 // Load Double
6741 instruct MoveD2VL(vlRegD dst, regD src) %{
6742 match(Set dst src);
6743 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6744 ins_encode %{
6745 ShouldNotReachHere();
6746 %}
6747 ins_pipe( fpu_reg_reg );
6748 %}
6749
6750 // Load Double
6751 instruct MoveD2LEG(legRegD dst, regD src) %{
6752 match(Set dst src);
6753 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6754 ins_encode %{
6755 ShouldNotReachHere();
6756 %}
6757 ins_pipe( fpu_reg_reg );
6758 %}
6759
6760 // Load Double
6761 instruct MoveVL2D(regD dst, vlRegD src) %{
6762 match(Set dst src);
6763 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6764 ins_encode %{
6765 ShouldNotReachHere();
6766 %}
6767 ins_pipe( fpu_reg_reg );
6768 %}
6769
6770 // Load Double
6771 instruct MoveLEG2D(regD dst, legRegD src) %{
6772 match(Set dst src);
6773 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6774 ins_encode %{
6775 ShouldNotReachHere();
6776 %}
6777 ins_pipe( fpu_reg_reg );
6778 %}
6779
6780 //----------Load/Store/Move Instructions---------------------------------------
6781 //----------Load Instructions--------------------------------------------------
6782
6783 // Load Byte (8 bit signed)
6784 instruct loadB(rRegI dst, memory mem)
6785 %{
6786 match(Set dst (LoadB mem));
6787
6788 ins_cost(125);
6789 format %{ "movsbl $dst, $mem\t# byte" %}
6790
6791 ins_encode %{
6792 __ movsbl($dst$$Register, $mem$$Address);
6793 %}
6794
6795 ins_pipe(ialu_reg_mem);
6796 %}
6797
6798 // Load Byte (8 bit signed) into Long Register
6799 instruct loadB2L(rRegL dst, memory mem)
6800 %{
6801 match(Set dst (ConvI2L (LoadB mem)));
6802
6803 ins_cost(125);
6804 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6805
6806 ins_encode %{
6807 __ movsbq($dst$$Register, $mem$$Address);
6808 %}
6809
6810 ins_pipe(ialu_reg_mem);
6811 %}
6812
6813 // Load Unsigned Byte (8 bit UNsigned)
6814 instruct loadUB(rRegI dst, memory mem)
6815 %{
6816 match(Set dst (LoadUB mem));
6817
6818 ins_cost(125);
6819 format %{ "movzbl $dst, $mem\t# ubyte" %}
6820
6821 ins_encode %{
6822 __ movzbl($dst$$Register, $mem$$Address);
6823 %}
6824
6825 ins_pipe(ialu_reg_mem);
6826 %}
6827
6828 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6829 instruct loadUB2L(rRegL dst, memory mem)
6830 %{
6831 match(Set dst (ConvI2L (LoadUB mem)));
6832
6833 ins_cost(125);
6834 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6835
6836 ins_encode %{
6837 __ movzbq($dst$$Register, $mem$$Address);
6838 %}
6839
6840 ins_pipe(ialu_reg_mem);
6841 %}
6842
6843 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6844 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6845 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6846 effect(KILL cr);
6847
6848 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6849 "andl $dst, right_n_bits($mask, 8)" %}
6850 ins_encode %{
6851 Register Rdst = $dst$$Register;
6852 __ movzbq(Rdst, $mem$$Address);
6853 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6854 %}
6855 ins_pipe(ialu_reg_mem);
6856 %}
6857
6858 // Load Short (16 bit signed)
6859 instruct loadS(rRegI dst, memory mem)
6860 %{
6861 match(Set dst (LoadS mem));
6862
6863 ins_cost(125);
6864 format %{ "movswl $dst, $mem\t# short" %}
6865
6866 ins_encode %{
6867 __ movswl($dst$$Register, $mem$$Address);
6868 %}
6869
6870 ins_pipe(ialu_reg_mem);
6871 %}
6872
6873 // Load Short (16 bit signed) to Byte (8 bit signed)
6874 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6875 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6876
6877 ins_cost(125);
6878 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6879 ins_encode %{
6880 __ movsbl($dst$$Register, $mem$$Address);
6881 %}
6882 ins_pipe(ialu_reg_mem);
6883 %}
6884
6885 // Load Short (16 bit signed) into Long Register
6886 instruct loadS2L(rRegL dst, memory mem)
6887 %{
6888 match(Set dst (ConvI2L (LoadS mem)));
6889
6890 ins_cost(125);
6891 format %{ "movswq $dst, $mem\t# short -> long" %}
6892
6893 ins_encode %{
6894 __ movswq($dst$$Register, $mem$$Address);
6895 %}
6896
6897 ins_pipe(ialu_reg_mem);
6898 %}
6899
6900 // Load Unsigned Short/Char (16 bit UNsigned)
6901 instruct loadUS(rRegI dst, memory mem)
6902 %{
6903 match(Set dst (LoadUS mem));
6904
6905 ins_cost(125);
6906 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6907
6908 ins_encode %{
6909 __ movzwl($dst$$Register, $mem$$Address);
6910 %}
6911
6912 ins_pipe(ialu_reg_mem);
6913 %}
6914
6915 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6916 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6917 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6918
6919 ins_cost(125);
6920 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6921 ins_encode %{
6922 __ movsbl($dst$$Register, $mem$$Address);
6923 %}
6924 ins_pipe(ialu_reg_mem);
6925 %}
6926
6927 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6928 instruct loadUS2L(rRegL dst, memory mem)
6929 %{
6930 match(Set dst (ConvI2L (LoadUS mem)));
6931
6932 ins_cost(125);
6933 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6934
6935 ins_encode %{
6936 __ movzwq($dst$$Register, $mem$$Address);
6937 %}
6938
6939 ins_pipe(ialu_reg_mem);
6940 %}
6941
6942 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6943 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6944 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6945
6946 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6947 ins_encode %{
6948 __ movzbq($dst$$Register, $mem$$Address);
6949 %}
6950 ins_pipe(ialu_reg_mem);
6951 %}
6952
6953 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
6954 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6955 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6956 effect(KILL cr);
6957
6958 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
6959 "andl $dst, right_n_bits($mask, 16)" %}
6960 ins_encode %{
6961 Register Rdst = $dst$$Register;
6962 __ movzwq(Rdst, $mem$$Address);
6963 __ andl(Rdst, $mask$$constant & right_n_bits(16));
6964 %}
6965 ins_pipe(ialu_reg_mem);
6966 %}
6967
6968 // Load Integer
6969 instruct loadI(rRegI dst, memory mem)
6970 %{
6971 match(Set dst (LoadI mem));
6972
6973 ins_cost(125);
6974 format %{ "movl $dst, $mem\t# int" %}
6975
6976 ins_encode %{
6977 __ movl($dst$$Register, $mem$$Address);
6978 %}
6979
6980 ins_pipe(ialu_reg_mem);
6981 %}
6982
6983 // Load Integer (32 bit signed) to Byte (8 bit signed)
6984 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6985 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6986
6987 ins_cost(125);
6988 format %{ "movsbl $dst, $mem\t# int -> byte" %}
6989 ins_encode %{
6990 __ movsbl($dst$$Register, $mem$$Address);
6991 %}
6992 ins_pipe(ialu_reg_mem);
6993 %}
6994
6995 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6996 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6997 match(Set dst (AndI (LoadI mem) mask));
6998
6999 ins_cost(125);
7000 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7001 ins_encode %{
7002 __ movzbl($dst$$Register, $mem$$Address);
7003 %}
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Integer (32 bit signed) to Short (16 bit signed)
7008 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7009 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7010
7011 ins_cost(125);
7012 format %{ "movswl $dst, $mem\t# int -> short" %}
7013 ins_encode %{
7014 __ movswl($dst$$Register, $mem$$Address);
7015 %}
7016 ins_pipe(ialu_reg_mem);
7017 %}
7018
7019 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7020 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7021 match(Set dst (AndI (LoadI mem) mask));
7022
7023 ins_cost(125);
7024 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7025 ins_encode %{
7026 __ movzwl($dst$$Register, $mem$$Address);
7027 %}
7028 ins_pipe(ialu_reg_mem);
7029 %}
7030
7031 // Load Integer into Long Register
7032 instruct loadI2L(rRegL dst, memory mem)
7033 %{
7034 match(Set dst (ConvI2L (LoadI mem)));
7035
7036 ins_cost(125);
7037 format %{ "movslq $dst, $mem\t# int -> long" %}
7038
7039 ins_encode %{
7040 __ movslq($dst$$Register, $mem$$Address);
7041 %}
7042
7043 ins_pipe(ialu_reg_mem);
7044 %}
7045
7046 // Load Integer with mask 0xFF into Long Register
7047 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7048 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7049
7050 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7051 ins_encode %{
7052 __ movzbq($dst$$Register, $mem$$Address);
7053 %}
7054 ins_pipe(ialu_reg_mem);
7055 %}
7056
7057 // Load Integer with mask 0xFFFF into Long Register
7058 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7059 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7060
7061 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7062 ins_encode %{
7063 __ movzwq($dst$$Register, $mem$$Address);
7064 %}
7065 ins_pipe(ialu_reg_mem);
7066 %}
7067
7068 // Load Integer with a 31-bit mask into Long Register
7069 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7070 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7071 effect(KILL cr);
7072
7073 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7074 "andl $dst, $mask" %}
7075 ins_encode %{
7076 Register Rdst = $dst$$Register;
7077 __ movl(Rdst, $mem$$Address);
7078 __ andl(Rdst, $mask$$constant);
7079 %}
7080 ins_pipe(ialu_reg_mem);
7081 %}
7082
7083 // Load Unsigned Integer into Long Register
7084 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7085 %{
7086 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7087
7088 ins_cost(125);
7089 format %{ "movl $dst, $mem\t# uint -> long" %}
7090
7091 ins_encode %{
7092 __ movl($dst$$Register, $mem$$Address);
7093 %}
7094
7095 ins_pipe(ialu_reg_mem);
7096 %}
7097
7098 // Load Long
7099 instruct loadL(rRegL dst, memory mem)
7100 %{
7101 match(Set dst (LoadL mem));
7102
7103 ins_cost(125);
7104 format %{ "movq $dst, $mem\t# long" %}
7105
7106 ins_encode %{
7107 __ movq($dst$$Register, $mem$$Address);
7108 %}
7109
7110 ins_pipe(ialu_reg_mem); // XXX
7111 %}
7112
7113 // Load Range
7114 instruct loadRange(rRegI dst, memory mem)
7115 %{
7116 match(Set dst (LoadRange mem));
7117
7118 ins_cost(125); // XXX
7119 format %{ "movl $dst, $mem\t# range" %}
7120 ins_encode %{
7121 __ movl($dst$$Register, $mem$$Address);
7122 %}
7123 ins_pipe(ialu_reg_mem);
7124 %}
7125
7126 // Load Pointer
7127 instruct loadP(rRegP dst, memory mem)
7128 %{
7129 match(Set dst (LoadP mem));
7130 predicate(n->as_Load()->barrier_data() == 0);
7131
7132 ins_cost(125); // XXX
7133 format %{ "movq $dst, $mem\t# ptr" %}
7134 ins_encode %{
7135 __ movq($dst$$Register, $mem$$Address);
7136 %}
7137 ins_pipe(ialu_reg_mem); // XXX
7138 %}
7139
7140 // Load Compressed Pointer
7141 instruct loadN(rRegN dst, memory mem)
7142 %{
7143 predicate(n->as_Load()->barrier_data() == 0);
7144 match(Set dst (LoadN mem));
7145
7146 ins_cost(125); // XXX
7147 format %{ "movl $dst, $mem\t# compressed ptr" %}
7148 ins_encode %{
7149 __ movl($dst$$Register, $mem$$Address);
7150 %}
7151 ins_pipe(ialu_reg_mem); // XXX
7152 %}
7153
7154
7155 // Load Klass Pointer
7156 instruct loadKlass(rRegP dst, memory mem)
7157 %{
7158 match(Set dst (LoadKlass mem));
7159
7160 ins_cost(125); // XXX
7161 format %{ "movq $dst, $mem\t# class" %}
7162 ins_encode %{
7163 __ movq($dst$$Register, $mem$$Address);
7164 %}
7165 ins_pipe(ialu_reg_mem); // XXX
7166 %}
7167
7168 // Load narrow Klass Pointer
7169 instruct loadNKlass(rRegN dst, memory mem)
7170 %{
7171 predicate(!UseCompactObjectHeaders);
7172 match(Set dst (LoadNKlass mem));
7173
7174 ins_cost(125); // XXX
7175 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7176 ins_encode %{
7177 __ movl($dst$$Register, $mem$$Address);
7178 %}
7179 ins_pipe(ialu_reg_mem); // XXX
7180 %}
7181
7182 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7183 %{
7184 predicate(UseCompactObjectHeaders);
7185 match(Set dst (LoadNKlass mem));
7186 effect(KILL cr);
7187 ins_cost(125);
7188 format %{
7189 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7190 "shrl $dst, markWord::klass_shift_at_offset"
7191 %}
7192 ins_encode %{
7193 if (UseAPX) {
7194 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7195 }
7196 else {
7197 __ movl($dst$$Register, $mem$$Address);
7198 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7199 }
7200 %}
7201 ins_pipe(ialu_reg_mem);
7202 %}
7203
7204 // Load Float
7205 instruct loadF(regF dst, memory mem)
7206 %{
7207 match(Set dst (LoadF mem));
7208
7209 ins_cost(145); // XXX
7210 format %{ "movss $dst, $mem\t# float" %}
7211 ins_encode %{
7212 __ movflt($dst$$XMMRegister, $mem$$Address);
7213 %}
7214 ins_pipe(pipe_slow); // XXX
7215 %}
7216
7217 // Load Double
7218 instruct loadD_partial(regD dst, memory mem)
7219 %{
7220 predicate(!UseXmmLoadAndClearUpper);
7221 match(Set dst (LoadD mem));
7222
7223 ins_cost(145); // XXX
7224 format %{ "movlpd $dst, $mem\t# double" %}
7225 ins_encode %{
7226 __ movdbl($dst$$XMMRegister, $mem$$Address);
7227 %}
7228 ins_pipe(pipe_slow); // XXX
7229 %}
7230
7231 instruct loadD(regD dst, memory mem)
7232 %{
7233 predicate(UseXmmLoadAndClearUpper);
7234 match(Set dst (LoadD mem));
7235
7236 ins_cost(145); // XXX
7237 format %{ "movsd $dst, $mem\t# double" %}
7238 ins_encode %{
7239 __ movdbl($dst$$XMMRegister, $mem$$Address);
7240 %}
7241 ins_pipe(pipe_slow); // XXX
7242 %}
7243
7244 // max = java.lang.Math.max(float a, float b)
7245 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7246 predicate(VM_Version::supports_avx10_2());
7247 match(Set dst (MaxF a b));
7248 format %{ "maxF $dst, $a, $b" %}
7249 ins_encode %{
7250 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7251 %}
7252 ins_pipe( pipe_slow );
7253 %}
7254
7255 // max = java.lang.Math.max(float a, float b)
7256 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7257 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7258 match(Set dst (MaxF a b));
7259 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7260 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7261 ins_encode %{
7262 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7263 %}
7264 ins_pipe( pipe_slow );
7265 %}
7266
7267 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7268 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7269 match(Set dst (MaxF a b));
7270 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7271
7272 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7273 ins_encode %{
7274 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7275 false /*min*/, true /*single*/);
7276 %}
7277 ins_pipe( pipe_slow );
7278 %}
7279
7280 // max = java.lang.Math.max(double a, double b)
7281 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7282 predicate(VM_Version::supports_avx10_2());
7283 match(Set dst (MaxD a b));
7284 format %{ "maxD $dst, $a, $b" %}
7285 ins_encode %{
7286 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7287 %}
7288 ins_pipe( pipe_slow );
7289 %}
7290
7291 // max = java.lang.Math.max(double a, double b)
7292 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7293 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7294 match(Set dst (MaxD a b));
7295 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7296 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7297 ins_encode %{
7298 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7299 %}
7300 ins_pipe( pipe_slow );
7301 %}
7302
7303 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7304 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7305 match(Set dst (MaxD a b));
7306 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7307
7308 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7309 ins_encode %{
7310 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7311 false /*min*/, false /*single*/);
7312 %}
7313 ins_pipe( pipe_slow );
7314 %}
7315
7316 // max = java.lang.Math.min(float a, float b)
7317 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7318 predicate(VM_Version::supports_avx10_2());
7319 match(Set dst (MinF a b));
7320 format %{ "minF $dst, $a, $b" %}
7321 ins_encode %{
7322 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7323 %}
7324 ins_pipe( pipe_slow );
7325 %}
7326
7327 // min = java.lang.Math.min(float a, float b)
7328 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7329 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7330 match(Set dst (MinF a b));
7331 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7332 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7333 ins_encode %{
7334 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7335 %}
7336 ins_pipe( pipe_slow );
7337 %}
7338
7339 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7340 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7341 match(Set dst (MinF a b));
7342 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7343
7344 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7345 ins_encode %{
7346 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7347 true /*min*/, true /*single*/);
7348 %}
7349 ins_pipe( pipe_slow );
7350 %}
7351
7352 // max = java.lang.Math.min(double a, double b)
7353 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7354 predicate(VM_Version::supports_avx10_2());
7355 match(Set dst (MinD a b));
7356 format %{ "minD $dst, $a, $b" %}
7357 ins_encode %{
7358 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7359 %}
7360 ins_pipe( pipe_slow );
7361 %}
7362
7363 // min = java.lang.Math.min(double a, double b)
7364 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7365 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7366 match(Set dst (MinD a b));
7367 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7368 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7369 ins_encode %{
7370 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7371 %}
7372 ins_pipe( pipe_slow );
7373 %}
7374
7375 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7376 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7377 match(Set dst (MinD a b));
7378 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7379
7380 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7381 ins_encode %{
7382 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7383 true /*min*/, false /*single*/);
7384 %}
7385 ins_pipe( pipe_slow );
7386 %}
7387
7388 // Load Effective Address
7389 instruct leaP8(rRegP dst, indOffset8 mem)
7390 %{
7391 match(Set dst mem);
7392
7393 ins_cost(110); // XXX
7394 format %{ "leaq $dst, $mem\t# ptr 8" %}
7395 ins_encode %{
7396 __ leaq($dst$$Register, $mem$$Address);
7397 %}
7398 ins_pipe(ialu_reg_reg_fat);
7399 %}
7400
7401 instruct leaP32(rRegP dst, indOffset32 mem)
7402 %{
7403 match(Set dst mem);
7404
7405 ins_cost(110);
7406 format %{ "leaq $dst, $mem\t# ptr 32" %}
7407 ins_encode %{
7408 __ leaq($dst$$Register, $mem$$Address);
7409 %}
7410 ins_pipe(ialu_reg_reg_fat);
7411 %}
7412
7413 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7414 %{
7415 match(Set dst mem);
7416
7417 ins_cost(110);
7418 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7419 ins_encode %{
7420 __ leaq($dst$$Register, $mem$$Address);
7421 %}
7422 ins_pipe(ialu_reg_reg_fat);
7423 %}
7424
7425 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7426 %{
7427 match(Set dst mem);
7428
7429 ins_cost(110);
7430 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7431 ins_encode %{
7432 __ leaq($dst$$Register, $mem$$Address);
7433 %}
7434 ins_pipe(ialu_reg_reg_fat);
7435 %}
7436
7437 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7438 %{
7439 match(Set dst mem);
7440
7441 ins_cost(110);
7442 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7443 ins_encode %{
7444 __ leaq($dst$$Register, $mem$$Address);
7445 %}
7446 ins_pipe(ialu_reg_reg_fat);
7447 %}
7448
7449 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7450 %{
7451 match(Set dst mem);
7452
7453 ins_cost(110);
7454 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7455 ins_encode %{
7456 __ leaq($dst$$Register, $mem$$Address);
7457 %}
7458 ins_pipe(ialu_reg_reg_fat);
7459 %}
7460
7461 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7462 %{
7463 match(Set dst mem);
7464
7465 ins_cost(110);
7466 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7467 ins_encode %{
7468 __ leaq($dst$$Register, $mem$$Address);
7469 %}
7470 ins_pipe(ialu_reg_reg_fat);
7471 %}
7472
7473 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7474 %{
7475 match(Set dst mem);
7476
7477 ins_cost(110);
7478 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7479 ins_encode %{
7480 __ leaq($dst$$Register, $mem$$Address);
7481 %}
7482 ins_pipe(ialu_reg_reg_fat);
7483 %}
7484
7485 // Load Effective Address which uses Narrow (32-bits) oop
7486 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7487 %{
7488 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7489 match(Set dst mem);
7490
7491 ins_cost(110);
7492 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7493 ins_encode %{
7494 __ leaq($dst$$Register, $mem$$Address);
7495 %}
7496 ins_pipe(ialu_reg_reg_fat);
7497 %}
7498
7499 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7500 %{
7501 predicate(CompressedOops::shift() == 0);
7502 match(Set dst mem);
7503
7504 ins_cost(110); // XXX
7505 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7506 ins_encode %{
7507 __ leaq($dst$$Register, $mem$$Address);
7508 %}
7509 ins_pipe(ialu_reg_reg_fat);
7510 %}
7511
7512 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7513 %{
7514 predicate(CompressedOops::shift() == 0);
7515 match(Set dst mem);
7516
7517 ins_cost(110);
7518 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7519 ins_encode %{
7520 __ leaq($dst$$Register, $mem$$Address);
7521 %}
7522 ins_pipe(ialu_reg_reg_fat);
7523 %}
7524
7525 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7526 %{
7527 predicate(CompressedOops::shift() == 0);
7528 match(Set dst mem);
7529
7530 ins_cost(110);
7531 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7532 ins_encode %{
7533 __ leaq($dst$$Register, $mem$$Address);
7534 %}
7535 ins_pipe(ialu_reg_reg_fat);
7536 %}
7537
7538 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7539 %{
7540 predicate(CompressedOops::shift() == 0);
7541 match(Set dst mem);
7542
7543 ins_cost(110);
7544 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7545 ins_encode %{
7546 __ leaq($dst$$Register, $mem$$Address);
7547 %}
7548 ins_pipe(ialu_reg_reg_fat);
7549 %}
7550
7551 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7552 %{
7553 predicate(CompressedOops::shift() == 0);
7554 match(Set dst mem);
7555
7556 ins_cost(110);
7557 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7558 ins_encode %{
7559 __ leaq($dst$$Register, $mem$$Address);
7560 %}
7561 ins_pipe(ialu_reg_reg_fat);
7562 %}
7563
7564 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7565 %{
7566 predicate(CompressedOops::shift() == 0);
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7578 %{
7579 predicate(CompressedOops::shift() == 0);
7580 match(Set dst mem);
7581
7582 ins_cost(110);
7583 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7584 ins_encode %{
7585 __ leaq($dst$$Register, $mem$$Address);
7586 %}
7587 ins_pipe(ialu_reg_reg_fat);
7588 %}
7589
7590 instruct loadConI(rRegI dst, immI src)
7591 %{
7592 match(Set dst src);
7593
7594 format %{ "movl $dst, $src\t# int" %}
7595 ins_encode %{
7596 __ movl($dst$$Register, $src$$constant);
7597 %}
7598 ins_pipe(ialu_reg_fat); // XXX
7599 %}
7600
7601 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7602 %{
7603 match(Set dst src);
7604 effect(KILL cr);
7605
7606 ins_cost(50);
7607 format %{ "xorl $dst, $dst\t# int" %}
7608 ins_encode %{
7609 __ xorl($dst$$Register, $dst$$Register);
7610 %}
7611 ins_pipe(ialu_reg);
7612 %}
7613
7614 instruct loadConL(rRegL dst, immL src)
7615 %{
7616 match(Set dst src);
7617
7618 ins_cost(150);
7619 format %{ "movq $dst, $src\t# long" %}
7620 ins_encode %{
7621 __ mov64($dst$$Register, $src$$constant);
7622 %}
7623 ins_pipe(ialu_reg);
7624 %}
7625
7626 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7627 %{
7628 match(Set dst src);
7629 effect(KILL cr);
7630
7631 ins_cost(50);
7632 format %{ "xorl $dst, $dst\t# long" %}
7633 ins_encode %{
7634 __ xorl($dst$$Register, $dst$$Register);
7635 %}
7636 ins_pipe(ialu_reg); // XXX
7637 %}
7638
7639 instruct loadConUL32(rRegL dst, immUL32 src)
7640 %{
7641 match(Set dst src);
7642
7643 ins_cost(60);
7644 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7645 ins_encode %{
7646 __ movl($dst$$Register, $src$$constant);
7647 %}
7648 ins_pipe(ialu_reg);
7649 %}
7650
7651 instruct loadConL32(rRegL dst, immL32 src)
7652 %{
7653 match(Set dst src);
7654
7655 ins_cost(70);
7656 format %{ "movq $dst, $src\t# long (32-bit)" %}
7657 ins_encode %{
7658 __ movq($dst$$Register, $src$$constant);
7659 %}
7660 ins_pipe(ialu_reg);
7661 %}
7662
7663 instruct loadConP(rRegP dst, immP con) %{
7664 match(Set dst con);
7665
7666 format %{ "movq $dst, $con\t# ptr" %}
7667 ins_encode %{
7668 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7669 %}
7670 ins_pipe(ialu_reg_fat); // XXX
7671 %}
7672
7673 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7674 %{
7675 match(Set dst src);
7676 effect(KILL cr);
7677
7678 ins_cost(50);
7679 format %{ "xorl $dst, $dst\t# ptr" %}
7680 ins_encode %{
7681 __ xorl($dst$$Register, $dst$$Register);
7682 %}
7683 ins_pipe(ialu_reg);
7684 %}
7685
7686 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7687 %{
7688 match(Set dst src);
7689 effect(KILL cr);
7690
7691 ins_cost(60);
7692 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7693 ins_encode %{
7694 __ movl($dst$$Register, $src$$constant);
7695 %}
7696 ins_pipe(ialu_reg);
7697 %}
7698
7699 instruct loadConF(regF dst, immF con) %{
7700 match(Set dst con);
7701 ins_cost(125);
7702 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7703 ins_encode %{
7704 __ movflt($dst$$XMMRegister, $constantaddress($con));
7705 %}
7706 ins_pipe(pipe_slow);
7707 %}
7708
7709 instruct loadConH(regF dst, immH con) %{
7710 match(Set dst con);
7711 ins_cost(125);
7712 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7713 ins_encode %{
7714 __ movflt($dst$$XMMRegister, $constantaddress($con));
7715 %}
7716 ins_pipe(pipe_slow);
7717 %}
7718
7719 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7720 match(Set dst src);
7721 effect(KILL cr);
7722 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7723 ins_encode %{
7724 __ xorq($dst$$Register, $dst$$Register);
7725 %}
7726 ins_pipe(ialu_reg);
7727 %}
7728
7729 instruct loadConN(rRegN dst, immN src) %{
7730 match(Set dst src);
7731
7732 ins_cost(125);
7733 format %{ "movl $dst, $src\t# compressed ptr" %}
7734 ins_encode %{
7735 address con = (address)$src$$constant;
7736 if (con == nullptr) {
7737 ShouldNotReachHere();
7738 } else {
7739 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7740 }
7741 %}
7742 ins_pipe(ialu_reg_fat); // XXX
7743 %}
7744
7745 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7746 match(Set dst src);
7747
7748 ins_cost(125);
7749 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7750 ins_encode %{
7751 address con = (address)$src$$constant;
7752 if (con == nullptr) {
7753 ShouldNotReachHere();
7754 } else {
7755 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7756 }
7757 %}
7758 ins_pipe(ialu_reg_fat); // XXX
7759 %}
7760
7761 instruct loadConF0(regF dst, immF0 src)
7762 %{
7763 match(Set dst src);
7764 ins_cost(100);
7765
7766 format %{ "xorps $dst, $dst\t# float 0.0" %}
7767 ins_encode %{
7768 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7769 %}
7770 ins_pipe(pipe_slow);
7771 %}
7772
7773 // Use the same format since predicate() can not be used here.
7774 instruct loadConD(regD dst, immD con) %{
7775 match(Set dst con);
7776 ins_cost(125);
7777 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7778 ins_encode %{
7779 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7780 %}
7781 ins_pipe(pipe_slow);
7782 %}
7783
7784 instruct loadConD0(regD dst, immD0 src)
7785 %{
7786 match(Set dst src);
7787 ins_cost(100);
7788
7789 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7790 ins_encode %{
7791 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7792 %}
7793 ins_pipe(pipe_slow);
7794 %}
7795
7796 instruct loadSSI(rRegI dst, stackSlotI src)
7797 %{
7798 match(Set dst src);
7799
7800 ins_cost(125);
7801 format %{ "movl $dst, $src\t# int stk" %}
7802 ins_encode %{
7803 __ movl($dst$$Register, $src$$Address);
7804 %}
7805 ins_pipe(ialu_reg_mem);
7806 %}
7807
7808 instruct loadSSL(rRegL dst, stackSlotL src)
7809 %{
7810 match(Set dst src);
7811
7812 ins_cost(125);
7813 format %{ "movq $dst, $src\t# long stk" %}
7814 ins_encode %{
7815 __ movq($dst$$Register, $src$$Address);
7816 %}
7817 ins_pipe(ialu_reg_mem);
7818 %}
7819
7820 instruct loadSSP(rRegP dst, stackSlotP src)
7821 %{
7822 match(Set dst src);
7823
7824 ins_cost(125);
7825 format %{ "movq $dst, $src\t# ptr stk" %}
7826 ins_encode %{
7827 __ movq($dst$$Register, $src$$Address);
7828 %}
7829 ins_pipe(ialu_reg_mem);
7830 %}
7831
7832 instruct loadSSF(regF dst, stackSlotF src)
7833 %{
7834 match(Set dst src);
7835
7836 ins_cost(125);
7837 format %{ "movss $dst, $src\t# float stk" %}
7838 ins_encode %{
7839 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7840 %}
7841 ins_pipe(pipe_slow); // XXX
7842 %}
7843
7844 // Use the same format since predicate() can not be used here.
7845 instruct loadSSD(regD dst, stackSlotD src)
7846 %{
7847 match(Set dst src);
7848
7849 ins_cost(125);
7850 format %{ "movsd $dst, $src\t# double stk" %}
7851 ins_encode %{
7852 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7853 %}
7854 ins_pipe(pipe_slow); // XXX
7855 %}
7856
7857 // Prefetch instructions for allocation.
7858 // Must be safe to execute with invalid address (cannot fault).
7859
7860 instruct prefetchAlloc( memory mem ) %{
7861 predicate(AllocatePrefetchInstr==3);
7862 match(PrefetchAllocation mem);
7863 ins_cost(125);
7864
7865 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7866 ins_encode %{
7867 __ prefetchw($mem$$Address);
7868 %}
7869 ins_pipe(ialu_mem);
7870 %}
7871
7872 instruct prefetchAllocNTA( memory mem ) %{
7873 predicate(AllocatePrefetchInstr==0);
7874 match(PrefetchAllocation mem);
7875 ins_cost(125);
7876
7877 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7878 ins_encode %{
7879 __ prefetchnta($mem$$Address);
7880 %}
7881 ins_pipe(ialu_mem);
7882 %}
7883
7884 instruct prefetchAllocT0( memory mem ) %{
7885 predicate(AllocatePrefetchInstr==1);
7886 match(PrefetchAllocation mem);
7887 ins_cost(125);
7888
7889 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7890 ins_encode %{
7891 __ prefetcht0($mem$$Address);
7892 %}
7893 ins_pipe(ialu_mem);
7894 %}
7895
7896 instruct prefetchAllocT2( memory mem ) %{
7897 predicate(AllocatePrefetchInstr==2);
7898 match(PrefetchAllocation mem);
7899 ins_cost(125);
7900
7901 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7902 ins_encode %{
7903 __ prefetcht2($mem$$Address);
7904 %}
7905 ins_pipe(ialu_mem);
7906 %}
7907
7908 //----------Store Instructions-------------------------------------------------
7909
7910 // Store Byte
7911 instruct storeB(memory mem, rRegI src)
7912 %{
7913 match(Set mem (StoreB mem src));
7914
7915 ins_cost(125); // XXX
7916 format %{ "movb $mem, $src\t# byte" %}
7917 ins_encode %{
7918 __ movb($mem$$Address, $src$$Register);
7919 %}
7920 ins_pipe(ialu_mem_reg);
7921 %}
7922
7923 // Store Char/Short
7924 instruct storeC(memory mem, rRegI src)
7925 %{
7926 match(Set mem (StoreC mem src));
7927
7928 ins_cost(125); // XXX
7929 format %{ "movw $mem, $src\t# char/short" %}
7930 ins_encode %{
7931 __ movw($mem$$Address, $src$$Register);
7932 %}
7933 ins_pipe(ialu_mem_reg);
7934 %}
7935
7936 // Store Integer
7937 instruct storeI(memory mem, rRegI src)
7938 %{
7939 match(Set mem (StoreI mem src));
7940
7941 ins_cost(125); // XXX
7942 format %{ "movl $mem, $src\t# int" %}
7943 ins_encode %{
7944 __ movl($mem$$Address, $src$$Register);
7945 %}
7946 ins_pipe(ialu_mem_reg);
7947 %}
7948
7949 // Store Long
7950 instruct storeL(memory mem, rRegL src)
7951 %{
7952 match(Set mem (StoreL mem src));
7953
7954 ins_cost(125); // XXX
7955 format %{ "movq $mem, $src\t# long" %}
7956 ins_encode %{
7957 __ movq($mem$$Address, $src$$Register);
7958 %}
7959 ins_pipe(ialu_mem_reg); // XXX
7960 %}
7961
7962 // Store Pointer
7963 instruct storeP(memory mem, any_RegP src)
7964 %{
7965 predicate(n->as_Store()->barrier_data() == 0);
7966 match(Set mem (StoreP mem src));
7967
7968 ins_cost(125); // XXX
7969 format %{ "movq $mem, $src\t# ptr" %}
7970 ins_encode %{
7971 __ movq($mem$$Address, $src$$Register);
7972 %}
7973 ins_pipe(ialu_mem_reg);
7974 %}
7975
7976 instruct storeImmP0(memory mem, immP0 zero)
7977 %{
7978 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
7979 match(Set mem (StoreP mem zero));
7980
7981 ins_cost(125); // XXX
7982 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
7983 ins_encode %{
7984 __ movq($mem$$Address, r12);
7985 %}
7986 ins_pipe(ialu_mem_reg);
7987 %}
7988
7989 // Store Null Pointer, mark word, or other simple pointer constant.
7990 instruct storeImmP(memory mem, immP31 src)
7991 %{
7992 predicate(n->as_Store()->barrier_data() == 0);
7993 match(Set mem (StoreP mem src));
7994
7995 ins_cost(150); // XXX
7996 format %{ "movq $mem, $src\t# ptr" %}
7997 ins_encode %{
7998 __ movq($mem$$Address, $src$$constant);
7999 %}
8000 ins_pipe(ialu_mem_imm);
8001 %}
8002
8003 // Store Compressed Pointer
8004 instruct storeN(memory mem, rRegN src)
8005 %{
8006 predicate(n->as_Store()->barrier_data() == 0);
8007 match(Set mem (StoreN mem src));
8008
8009 ins_cost(125); // XXX
8010 format %{ "movl $mem, $src\t# compressed ptr" %}
8011 ins_encode %{
8012 __ movl($mem$$Address, $src$$Register);
8013 %}
8014 ins_pipe(ialu_mem_reg);
8015 %}
8016
8017 instruct storeNKlass(memory mem, rRegN src)
8018 %{
8019 match(Set mem (StoreNKlass mem src));
8020
8021 ins_cost(125); // XXX
8022 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8023 ins_encode %{
8024 __ movl($mem$$Address, $src$$Register);
8025 %}
8026 ins_pipe(ialu_mem_reg);
8027 %}
8028
8029 instruct storeImmN0(memory mem, immN0 zero)
8030 %{
8031 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8032 match(Set mem (StoreN mem zero));
8033
8034 ins_cost(125); // XXX
8035 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8036 ins_encode %{
8037 __ movl($mem$$Address, r12);
8038 %}
8039 ins_pipe(ialu_mem_reg);
8040 %}
8041
8042 instruct storeImmN(memory mem, immN src)
8043 %{
8044 predicate(n->as_Store()->barrier_data() == 0);
8045 match(Set mem (StoreN mem src));
8046
8047 ins_cost(150); // XXX
8048 format %{ "movl $mem, $src\t# compressed ptr" %}
8049 ins_encode %{
8050 address con = (address)$src$$constant;
8051 if (con == nullptr) {
8052 __ movl($mem$$Address, 0);
8053 } else {
8054 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8055 }
8056 %}
8057 ins_pipe(ialu_mem_imm);
8058 %}
8059
8060 instruct storeImmNKlass(memory mem, immNKlass src)
8061 %{
8062 match(Set mem (StoreNKlass mem src));
8063
8064 ins_cost(150); // XXX
8065 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8066 ins_encode %{
8067 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8068 %}
8069 ins_pipe(ialu_mem_imm);
8070 %}
8071
8072 // Store Integer Immediate
8073 instruct storeImmI0(memory mem, immI_0 zero)
8074 %{
8075 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8076 match(Set mem (StoreI mem zero));
8077
8078 ins_cost(125); // XXX
8079 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8080 ins_encode %{
8081 __ movl($mem$$Address, r12);
8082 %}
8083 ins_pipe(ialu_mem_reg);
8084 %}
8085
8086 instruct storeImmI(memory mem, immI src)
8087 %{
8088 match(Set mem (StoreI mem src));
8089
8090 ins_cost(150);
8091 format %{ "movl $mem, $src\t# int" %}
8092 ins_encode %{
8093 __ movl($mem$$Address, $src$$constant);
8094 %}
8095 ins_pipe(ialu_mem_imm);
8096 %}
8097
8098 // Store Long Immediate
8099 instruct storeImmL0(memory mem, immL0 zero)
8100 %{
8101 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8102 match(Set mem (StoreL mem zero));
8103
8104 ins_cost(125); // XXX
8105 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8106 ins_encode %{
8107 __ movq($mem$$Address, r12);
8108 %}
8109 ins_pipe(ialu_mem_reg);
8110 %}
8111
8112 instruct storeImmL(memory mem, immL32 src)
8113 %{
8114 match(Set mem (StoreL mem src));
8115
8116 ins_cost(150);
8117 format %{ "movq $mem, $src\t# long" %}
8118 ins_encode %{
8119 __ movq($mem$$Address, $src$$constant);
8120 %}
8121 ins_pipe(ialu_mem_imm);
8122 %}
8123
8124 // Store Short/Char Immediate
8125 instruct storeImmC0(memory mem, immI_0 zero)
8126 %{
8127 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8128 match(Set mem (StoreC mem zero));
8129
8130 ins_cost(125); // XXX
8131 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8132 ins_encode %{
8133 __ movw($mem$$Address, r12);
8134 %}
8135 ins_pipe(ialu_mem_reg);
8136 %}
8137
8138 instruct storeImmI16(memory mem, immI16 src)
8139 %{
8140 predicate(UseStoreImmI16);
8141 match(Set mem (StoreC mem src));
8142
8143 ins_cost(150);
8144 format %{ "movw $mem, $src\t# short/char" %}
8145 ins_encode %{
8146 __ movw($mem$$Address, $src$$constant);
8147 %}
8148 ins_pipe(ialu_mem_imm);
8149 %}
8150
8151 // Store Byte Immediate
8152 instruct storeImmB0(memory mem, immI_0 zero)
8153 %{
8154 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8155 match(Set mem (StoreB mem zero));
8156
8157 ins_cost(125); // XXX
8158 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8159 ins_encode %{
8160 __ movb($mem$$Address, r12);
8161 %}
8162 ins_pipe(ialu_mem_reg);
8163 %}
8164
8165 instruct storeImmB(memory mem, immI8 src)
8166 %{
8167 match(Set mem (StoreB mem src));
8168
8169 ins_cost(150); // XXX
8170 format %{ "movb $mem, $src\t# byte" %}
8171 ins_encode %{
8172 __ movb($mem$$Address, $src$$constant);
8173 %}
8174 ins_pipe(ialu_mem_imm);
8175 %}
8176
8177 // Store Float
8178 instruct storeF(memory mem, regF src)
8179 %{
8180 match(Set mem (StoreF mem src));
8181
8182 ins_cost(95); // XXX
8183 format %{ "movss $mem, $src\t# float" %}
8184 ins_encode %{
8185 __ movflt($mem$$Address, $src$$XMMRegister);
8186 %}
8187 ins_pipe(pipe_slow); // XXX
8188 %}
8189
8190 // Store immediate Float value (it is faster than store from XMM register)
8191 instruct storeF0(memory mem, immF0 zero)
8192 %{
8193 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8194 match(Set mem (StoreF mem zero));
8195
8196 ins_cost(25); // XXX
8197 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8198 ins_encode %{
8199 __ movl($mem$$Address, r12);
8200 %}
8201 ins_pipe(ialu_mem_reg);
8202 %}
8203
8204 instruct storeF_imm(memory mem, immF src)
8205 %{
8206 match(Set mem (StoreF mem src));
8207
8208 ins_cost(50);
8209 format %{ "movl $mem, $src\t# float" %}
8210 ins_encode %{
8211 __ movl($mem$$Address, jint_cast($src$$constant));
8212 %}
8213 ins_pipe(ialu_mem_imm);
8214 %}
8215
8216 // Store Double
8217 instruct storeD(memory mem, regD src)
8218 %{
8219 match(Set mem (StoreD mem src));
8220
8221 ins_cost(95); // XXX
8222 format %{ "movsd $mem, $src\t# double" %}
8223 ins_encode %{
8224 __ movdbl($mem$$Address, $src$$XMMRegister);
8225 %}
8226 ins_pipe(pipe_slow); // XXX
8227 %}
8228
8229 // Store immediate double 0.0 (it is faster than store from XMM register)
8230 instruct storeD0_imm(memory mem, immD0 src)
8231 %{
8232 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8233 match(Set mem (StoreD mem src));
8234
8235 ins_cost(50);
8236 format %{ "movq $mem, $src\t# double 0." %}
8237 ins_encode %{
8238 __ movq($mem$$Address, $src$$constant);
8239 %}
8240 ins_pipe(ialu_mem_imm);
8241 %}
8242
8243 instruct storeD0(memory mem, immD0 zero)
8244 %{
8245 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8246 match(Set mem (StoreD mem zero));
8247
8248 ins_cost(25); // XXX
8249 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8250 ins_encode %{
8251 __ movq($mem$$Address, r12);
8252 %}
8253 ins_pipe(ialu_mem_reg);
8254 %}
8255
8256 instruct storeSSI(stackSlotI dst, rRegI src)
8257 %{
8258 match(Set dst src);
8259
8260 ins_cost(100);
8261 format %{ "movl $dst, $src\t# int stk" %}
8262 ins_encode %{
8263 __ movl($dst$$Address, $src$$Register);
8264 %}
8265 ins_pipe( ialu_mem_reg );
8266 %}
8267
8268 instruct storeSSL(stackSlotL dst, rRegL src)
8269 %{
8270 match(Set dst src);
8271
8272 ins_cost(100);
8273 format %{ "movq $dst, $src\t# long stk" %}
8274 ins_encode %{
8275 __ movq($dst$$Address, $src$$Register);
8276 %}
8277 ins_pipe(ialu_mem_reg);
8278 %}
8279
8280 instruct storeSSP(stackSlotP dst, rRegP src)
8281 %{
8282 match(Set dst src);
8283
8284 ins_cost(100);
8285 format %{ "movq $dst, $src\t# ptr stk" %}
8286 ins_encode %{
8287 __ movq($dst$$Address, $src$$Register);
8288 %}
8289 ins_pipe(ialu_mem_reg);
8290 %}
8291
8292 instruct storeSSF(stackSlotF dst, regF src)
8293 %{
8294 match(Set dst src);
8295
8296 ins_cost(95); // XXX
8297 format %{ "movss $dst, $src\t# float stk" %}
8298 ins_encode %{
8299 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8300 %}
8301 ins_pipe(pipe_slow); // XXX
8302 %}
8303
8304 instruct storeSSD(stackSlotD dst, regD src)
8305 %{
8306 match(Set dst src);
8307
8308 ins_cost(95); // XXX
8309 format %{ "movsd $dst, $src\t# double stk" %}
8310 ins_encode %{
8311 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8312 %}
8313 ins_pipe(pipe_slow); // XXX
8314 %}
8315
8316 instruct cacheWB(indirect addr)
8317 %{
8318 predicate(VM_Version::supports_data_cache_line_flush());
8319 match(CacheWB addr);
8320
8321 ins_cost(100);
8322 format %{"cache wb $addr" %}
8323 ins_encode %{
8324 assert($addr->index_position() < 0, "should be");
8325 assert($addr$$disp == 0, "should be");
8326 __ cache_wb(Address($addr$$base$$Register, 0));
8327 %}
8328 ins_pipe(pipe_slow); // XXX
8329 %}
8330
8331 instruct cacheWBPreSync()
8332 %{
8333 predicate(VM_Version::supports_data_cache_line_flush());
8334 match(CacheWBPreSync);
8335
8336 ins_cost(100);
8337 format %{"cache wb presync" %}
8338 ins_encode %{
8339 __ cache_wbsync(true);
8340 %}
8341 ins_pipe(pipe_slow); // XXX
8342 %}
8343
8344 instruct cacheWBPostSync()
8345 %{
8346 predicate(VM_Version::supports_data_cache_line_flush());
8347 match(CacheWBPostSync);
8348
8349 ins_cost(100);
8350 format %{"cache wb postsync" %}
8351 ins_encode %{
8352 __ cache_wbsync(false);
8353 %}
8354 ins_pipe(pipe_slow); // XXX
8355 %}
8356
8357 //----------BSWAP Instructions-------------------------------------------------
8358 instruct bytes_reverse_int(rRegI dst) %{
8359 match(Set dst (ReverseBytesI dst));
8360
8361 format %{ "bswapl $dst" %}
8362 ins_encode %{
8363 __ bswapl($dst$$Register);
8364 %}
8365 ins_pipe( ialu_reg );
8366 %}
8367
8368 instruct bytes_reverse_long(rRegL dst) %{
8369 match(Set dst (ReverseBytesL dst));
8370
8371 format %{ "bswapq $dst" %}
8372 ins_encode %{
8373 __ bswapq($dst$$Register);
8374 %}
8375 ins_pipe( ialu_reg);
8376 %}
8377
8378 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8379 match(Set dst (ReverseBytesUS dst));
8380 effect(KILL cr);
8381
8382 format %{ "bswapl $dst\n\t"
8383 "shrl $dst,16\n\t" %}
8384 ins_encode %{
8385 __ bswapl($dst$$Register);
8386 __ shrl($dst$$Register, 16);
8387 %}
8388 ins_pipe( ialu_reg );
8389 %}
8390
8391 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8392 match(Set dst (ReverseBytesS dst));
8393 effect(KILL cr);
8394
8395 format %{ "bswapl $dst\n\t"
8396 "sar $dst,16\n\t" %}
8397 ins_encode %{
8398 __ bswapl($dst$$Register);
8399 __ sarl($dst$$Register, 16);
8400 %}
8401 ins_pipe( ialu_reg );
8402 %}
8403
8404 //---------- Zeros Count Instructions ------------------------------------------
8405
8406 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8407 predicate(UseCountLeadingZerosInstruction);
8408 match(Set dst (CountLeadingZerosI src));
8409 effect(KILL cr);
8410
8411 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8412 ins_encode %{
8413 __ lzcntl($dst$$Register, $src$$Register);
8414 %}
8415 ins_pipe(ialu_reg);
8416 %}
8417
8418 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8419 predicate(UseCountLeadingZerosInstruction);
8420 match(Set dst (CountLeadingZerosI (LoadI src)));
8421 effect(KILL cr);
8422 ins_cost(175);
8423 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8424 ins_encode %{
8425 __ lzcntl($dst$$Register, $src$$Address);
8426 %}
8427 ins_pipe(ialu_reg_mem);
8428 %}
8429
8430 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8431 predicate(!UseCountLeadingZerosInstruction);
8432 match(Set dst (CountLeadingZerosI src));
8433 effect(KILL cr);
8434
8435 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8436 "jnz skip\n\t"
8437 "movl $dst, -1\n"
8438 "skip:\n\t"
8439 "negl $dst\n\t"
8440 "addl $dst, 31" %}
8441 ins_encode %{
8442 Register Rdst = $dst$$Register;
8443 Register Rsrc = $src$$Register;
8444 Label skip;
8445 __ bsrl(Rdst, Rsrc);
8446 __ jccb(Assembler::notZero, skip);
8447 __ movl(Rdst, -1);
8448 __ bind(skip);
8449 __ negl(Rdst);
8450 __ addl(Rdst, BitsPerInt - 1);
8451 %}
8452 ins_pipe(ialu_reg);
8453 %}
8454
8455 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8456 predicate(UseCountLeadingZerosInstruction);
8457 match(Set dst (CountLeadingZerosL src));
8458 effect(KILL cr);
8459
8460 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8461 ins_encode %{
8462 __ lzcntq($dst$$Register, $src$$Register);
8463 %}
8464 ins_pipe(ialu_reg);
8465 %}
8466
8467 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8468 predicate(UseCountLeadingZerosInstruction);
8469 match(Set dst (CountLeadingZerosL (LoadL src)));
8470 effect(KILL cr);
8471 ins_cost(175);
8472 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8473 ins_encode %{
8474 __ lzcntq($dst$$Register, $src$$Address);
8475 %}
8476 ins_pipe(ialu_reg_mem);
8477 %}
8478
8479 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8480 predicate(!UseCountLeadingZerosInstruction);
8481 match(Set dst (CountLeadingZerosL src));
8482 effect(KILL cr);
8483
8484 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8485 "jnz skip\n\t"
8486 "movl $dst, -1\n"
8487 "skip:\n\t"
8488 "negl $dst\n\t"
8489 "addl $dst, 63" %}
8490 ins_encode %{
8491 Register Rdst = $dst$$Register;
8492 Register Rsrc = $src$$Register;
8493 Label skip;
8494 __ bsrq(Rdst, Rsrc);
8495 __ jccb(Assembler::notZero, skip);
8496 __ movl(Rdst, -1);
8497 __ bind(skip);
8498 __ negl(Rdst);
8499 __ addl(Rdst, BitsPerLong - 1);
8500 %}
8501 ins_pipe(ialu_reg);
8502 %}
8503
8504 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8505 predicate(UseCountTrailingZerosInstruction);
8506 match(Set dst (CountTrailingZerosI src));
8507 effect(KILL cr);
8508
8509 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8510 ins_encode %{
8511 __ tzcntl($dst$$Register, $src$$Register);
8512 %}
8513 ins_pipe(ialu_reg);
8514 %}
8515
8516 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8517 predicate(UseCountTrailingZerosInstruction);
8518 match(Set dst (CountTrailingZerosI (LoadI src)));
8519 effect(KILL cr);
8520 ins_cost(175);
8521 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8522 ins_encode %{
8523 __ tzcntl($dst$$Register, $src$$Address);
8524 %}
8525 ins_pipe(ialu_reg_mem);
8526 %}
8527
8528 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8529 predicate(!UseCountTrailingZerosInstruction);
8530 match(Set dst (CountTrailingZerosI src));
8531 effect(KILL cr);
8532
8533 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8534 "jnz done\n\t"
8535 "movl $dst, 32\n"
8536 "done:" %}
8537 ins_encode %{
8538 Register Rdst = $dst$$Register;
8539 Label done;
8540 __ bsfl(Rdst, $src$$Register);
8541 __ jccb(Assembler::notZero, done);
8542 __ movl(Rdst, BitsPerInt);
8543 __ bind(done);
8544 %}
8545 ins_pipe(ialu_reg);
8546 %}
8547
8548 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8549 predicate(UseCountTrailingZerosInstruction);
8550 match(Set dst (CountTrailingZerosL src));
8551 effect(KILL cr);
8552
8553 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8554 ins_encode %{
8555 __ tzcntq($dst$$Register, $src$$Register);
8556 %}
8557 ins_pipe(ialu_reg);
8558 %}
8559
8560 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8561 predicate(UseCountTrailingZerosInstruction);
8562 match(Set dst (CountTrailingZerosL (LoadL src)));
8563 effect(KILL cr);
8564 ins_cost(175);
8565 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8566 ins_encode %{
8567 __ tzcntq($dst$$Register, $src$$Address);
8568 %}
8569 ins_pipe(ialu_reg_mem);
8570 %}
8571
8572 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8573 predicate(!UseCountTrailingZerosInstruction);
8574 match(Set dst (CountTrailingZerosL src));
8575 effect(KILL cr);
8576
8577 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8578 "jnz done\n\t"
8579 "movl $dst, 64\n"
8580 "done:" %}
8581 ins_encode %{
8582 Register Rdst = $dst$$Register;
8583 Label done;
8584 __ bsfq(Rdst, $src$$Register);
8585 __ jccb(Assembler::notZero, done);
8586 __ movl(Rdst, BitsPerLong);
8587 __ bind(done);
8588 %}
8589 ins_pipe(ialu_reg);
8590 %}
8591
8592 //--------------- Reverse Operation Instructions ----------------
8593 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8594 predicate(!VM_Version::supports_gfni());
8595 match(Set dst (ReverseI src));
8596 effect(TEMP dst, TEMP rtmp, KILL cr);
8597 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8598 ins_encode %{
8599 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8600 %}
8601 ins_pipe( ialu_reg );
8602 %}
8603
8604 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8605 predicate(VM_Version::supports_gfni());
8606 match(Set dst (ReverseI src));
8607 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8608 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8609 ins_encode %{
8610 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8611 %}
8612 ins_pipe( ialu_reg );
8613 %}
8614
8615 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8616 predicate(!VM_Version::supports_gfni());
8617 match(Set dst (ReverseL src));
8618 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8619 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8620 ins_encode %{
8621 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8622 %}
8623 ins_pipe( ialu_reg );
8624 %}
8625
8626 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8627 predicate(VM_Version::supports_gfni());
8628 match(Set dst (ReverseL src));
8629 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8630 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8631 ins_encode %{
8632 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8633 %}
8634 ins_pipe( ialu_reg );
8635 %}
8636
8637 //---------- Population Count Instructions -------------------------------------
8638
8639 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8640 predicate(UsePopCountInstruction);
8641 match(Set dst (PopCountI src));
8642 effect(KILL cr);
8643
8644 format %{ "popcnt $dst, $src" %}
8645 ins_encode %{
8646 __ popcntl($dst$$Register, $src$$Register);
8647 %}
8648 ins_pipe(ialu_reg);
8649 %}
8650
8651 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8652 predicate(UsePopCountInstruction);
8653 match(Set dst (PopCountI (LoadI mem)));
8654 effect(KILL cr);
8655
8656 format %{ "popcnt $dst, $mem" %}
8657 ins_encode %{
8658 __ popcntl($dst$$Register, $mem$$Address);
8659 %}
8660 ins_pipe(ialu_reg);
8661 %}
8662
8663 // Note: Long.bitCount(long) returns an int.
8664 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8665 predicate(UsePopCountInstruction);
8666 match(Set dst (PopCountL src));
8667 effect(KILL cr);
8668
8669 format %{ "popcnt $dst, $src" %}
8670 ins_encode %{
8671 __ popcntq($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 // Note: Long.bitCount(long) returns an int.
8677 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8678 predicate(UsePopCountInstruction);
8679 match(Set dst (PopCountL (LoadL mem)));
8680 effect(KILL cr);
8681
8682 format %{ "popcnt $dst, $mem" %}
8683 ins_encode %{
8684 __ popcntq($dst$$Register, $mem$$Address);
8685 %}
8686 ins_pipe(ialu_reg);
8687 %}
8688
8689
8690 //----------MemBar Instructions-----------------------------------------------
8691 // Memory barrier flavors
8692
8693 instruct membar_acquire()
8694 %{
8695 match(MemBarAcquire);
8696 match(LoadFence);
8697 ins_cost(0);
8698
8699 size(0);
8700 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8701 ins_encode();
8702 ins_pipe(empty);
8703 %}
8704
8705 instruct membar_acquire_lock()
8706 %{
8707 match(MemBarAcquireLock);
8708 ins_cost(0);
8709
8710 size(0);
8711 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8712 ins_encode();
8713 ins_pipe(empty);
8714 %}
8715
8716 instruct membar_release()
8717 %{
8718 match(MemBarRelease);
8719 match(StoreFence);
8720 ins_cost(0);
8721
8722 size(0);
8723 format %{ "MEMBAR-release ! (empty encoding)" %}
8724 ins_encode();
8725 ins_pipe(empty);
8726 %}
8727
8728 instruct membar_release_lock()
8729 %{
8730 match(MemBarReleaseLock);
8731 ins_cost(0);
8732
8733 size(0);
8734 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8735 ins_encode();
8736 ins_pipe(empty);
8737 %}
8738
8739 instruct membar_volatile(rFlagsReg cr) %{
8740 match(MemBarVolatile);
8741 effect(KILL cr);
8742 ins_cost(400);
8743
8744 format %{
8745 $$template
8746 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8747 %}
8748 ins_encode %{
8749 __ membar(Assembler::StoreLoad);
8750 %}
8751 ins_pipe(pipe_slow);
8752 %}
8753
8754 instruct unnecessary_membar_volatile()
8755 %{
8756 match(MemBarVolatile);
8757 predicate(Matcher::post_store_load_barrier(n));
8758 ins_cost(0);
8759
8760 size(0);
8761 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8762 ins_encode();
8763 ins_pipe(empty);
8764 %}
8765
8766 instruct membar_storestore() %{
8767 match(MemBarStoreStore);
8768 match(StoreStoreFence);
8769 ins_cost(0);
8770
8771 size(0);
8772 format %{ "MEMBAR-storestore (empty encoding)" %}
8773 ins_encode( );
8774 ins_pipe(empty);
8775 %}
8776
8777 //----------Move Instructions--------------------------------------------------
8778
8779 instruct castX2P(rRegP dst, rRegL src)
8780 %{
8781 match(Set dst (CastX2P src));
8782
8783 format %{ "movq $dst, $src\t# long->ptr" %}
8784 ins_encode %{
8785 if ($dst$$reg != $src$$reg) {
8786 __ movptr($dst$$Register, $src$$Register);
8787 }
8788 %}
8789 ins_pipe(ialu_reg_reg); // XXX
8790 %}
8791
8792 instruct castP2X(rRegL dst, rRegP src)
8793 %{
8794 match(Set dst (CastP2X src));
8795
8796 format %{ "movq $dst, $src\t# ptr -> long" %}
8797 ins_encode %{
8798 if ($dst$$reg != $src$$reg) {
8799 __ movptr($dst$$Register, $src$$Register);
8800 }
8801 %}
8802 ins_pipe(ialu_reg_reg); // XXX
8803 %}
8804
8805 // Convert oop into int for vectors alignment masking
8806 instruct convP2I(rRegI dst, rRegP src)
8807 %{
8808 match(Set dst (ConvL2I (CastP2X src)));
8809
8810 format %{ "movl $dst, $src\t# ptr -> int" %}
8811 ins_encode %{
8812 __ movl($dst$$Register, $src$$Register);
8813 %}
8814 ins_pipe(ialu_reg_reg); // XXX
8815 %}
8816
8817 // Convert compressed oop into int for vectors alignment masking
8818 // in case of 32bit oops (heap < 4Gb).
8819 instruct convN2I(rRegI dst, rRegN src)
8820 %{
8821 predicate(CompressedOops::shift() == 0);
8822 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8823
8824 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8825 ins_encode %{
8826 __ movl($dst$$Register, $src$$Register);
8827 %}
8828 ins_pipe(ialu_reg_reg); // XXX
8829 %}
8830
8831 // Convert oop pointer into compressed form
8832 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8833 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8834 match(Set dst (EncodeP src));
8835 effect(KILL cr);
8836 format %{ "encode_heap_oop $dst,$src" %}
8837 ins_encode %{
8838 Register s = $src$$Register;
8839 Register d = $dst$$Register;
8840 if (s != d) {
8841 __ movq(d, s);
8842 }
8843 __ encode_heap_oop(d);
8844 %}
8845 ins_pipe(ialu_reg_long);
8846 %}
8847
8848 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8849 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8850 match(Set dst (EncodeP src));
8851 effect(KILL cr);
8852 format %{ "encode_heap_oop_not_null $dst,$src" %}
8853 ins_encode %{
8854 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8855 %}
8856 ins_pipe(ialu_reg_long);
8857 %}
8858
8859 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8860 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8861 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8862 match(Set dst (DecodeN src));
8863 effect(KILL cr);
8864 format %{ "decode_heap_oop $dst,$src" %}
8865 ins_encode %{
8866 Register s = $src$$Register;
8867 Register d = $dst$$Register;
8868 if (s != d) {
8869 __ movq(d, s);
8870 }
8871 __ decode_heap_oop(d);
8872 %}
8873 ins_pipe(ialu_reg_long);
8874 %}
8875
8876 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8877 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8878 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8879 match(Set dst (DecodeN src));
8880 effect(KILL cr);
8881 format %{ "decode_heap_oop_not_null $dst,$src" %}
8882 ins_encode %{
8883 Register s = $src$$Register;
8884 Register d = $dst$$Register;
8885 if (s != d) {
8886 __ decode_heap_oop_not_null(d, s);
8887 } else {
8888 __ decode_heap_oop_not_null(d);
8889 }
8890 %}
8891 ins_pipe(ialu_reg_long);
8892 %}
8893
8894 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8895 match(Set dst (EncodePKlass src));
8896 effect(TEMP dst, KILL cr);
8897 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8898 ins_encode %{
8899 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8900 %}
8901 ins_pipe(ialu_reg_long);
8902 %}
8903
8904 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8905 match(Set dst (DecodeNKlass src));
8906 effect(TEMP dst, KILL cr);
8907 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8908 ins_encode %{
8909 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8910 %}
8911 ins_pipe(ialu_reg_long);
8912 %}
8913
8914 //----------Conditional Move---------------------------------------------------
8915 // Jump
8916 // dummy instruction for generating temp registers
8917 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8918 match(Jump (LShiftL switch_val shift));
8919 ins_cost(350);
8920 predicate(false);
8921 effect(TEMP dest);
8922
8923 format %{ "leaq $dest, [$constantaddress]\n\t"
8924 "jmp [$dest + $switch_val << $shift]\n\t" %}
8925 ins_encode %{
8926 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8927 // to do that and the compiler is using that register as one it can allocate.
8928 // So we build it all by hand.
8929 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8930 // ArrayAddress dispatch(table, index);
8931 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8932 __ lea($dest$$Register, $constantaddress);
8933 __ jmp(dispatch);
8934 %}
8935 ins_pipe(pipe_jmp);
8936 %}
8937
8938 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8939 match(Jump (AddL (LShiftL switch_val shift) offset));
8940 ins_cost(350);
8941 effect(TEMP dest);
8942
8943 format %{ "leaq $dest, [$constantaddress]\n\t"
8944 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8945 ins_encode %{
8946 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8947 // to do that and the compiler is using that register as one it can allocate.
8948 // So we build it all by hand.
8949 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8950 // ArrayAddress dispatch(table, index);
8951 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8952 __ lea($dest$$Register, $constantaddress);
8953 __ jmp(dispatch);
8954 %}
8955 ins_pipe(pipe_jmp);
8956 %}
8957
8958 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8959 match(Jump switch_val);
8960 ins_cost(350);
8961 effect(TEMP dest);
8962
8963 format %{ "leaq $dest, [$constantaddress]\n\t"
8964 "jmp [$dest + $switch_val]\n\t" %}
8965 ins_encode %{
8966 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8967 // to do that and the compiler is using that register as one it can allocate.
8968 // So we build it all by hand.
8969 // Address index(noreg, switch_reg, Address::times_1);
8970 // ArrayAddress dispatch(table, index);
8971 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
8972 __ lea($dest$$Register, $constantaddress);
8973 __ jmp(dispatch);
8974 %}
8975 ins_pipe(pipe_jmp);
8976 %}
8977
8978 // Conditional move
8979 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
8980 %{
8981 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8982 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8983
8984 ins_cost(100); // XXX
8985 format %{ "setbn$cop $dst\t# signed, int" %}
8986 ins_encode %{
8987 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
8988 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
8989 %}
8990 ins_pipe(ialu_reg);
8991 %}
8992
8993 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8994 %{
8995 predicate(!UseAPX);
8996 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8997
8998 ins_cost(200); // XXX
8999 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9000 ins_encode %{
9001 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9002 %}
9003 ins_pipe(pipe_cmov_reg);
9004 %}
9005
9006 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9007 %{
9008 predicate(UseAPX);
9009 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9010
9011 ins_cost(200);
9012 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9013 ins_encode %{
9014 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9015 %}
9016 ins_pipe(pipe_cmov_reg);
9017 %}
9018
9019 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9020 %{
9021 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9022 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9023
9024 ins_cost(100); // XXX
9025 format %{ "setbn$cop $dst\t# unsigned, int" %}
9026 ins_encode %{
9027 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9028 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9029 %}
9030 ins_pipe(ialu_reg);
9031 %}
9032
9033 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9034 predicate(!UseAPX);
9035 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9036
9037 ins_cost(200); // XXX
9038 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9039 ins_encode %{
9040 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9041 %}
9042 ins_pipe(pipe_cmov_reg);
9043 %}
9044
9045 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9046 predicate(UseAPX);
9047 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9048
9049 ins_cost(200);
9050 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9051 ins_encode %{
9052 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9053 %}
9054 ins_pipe(pipe_cmov_reg);
9055 %}
9056
9057 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9058 %{
9059 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9060 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9061
9062 ins_cost(100); // XXX
9063 format %{ "setbn$cop $dst\t# unsigned, int" %}
9064 ins_encode %{
9065 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9066 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9067 %}
9068 ins_pipe(ialu_reg);
9069 %}
9070
9071 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9072 predicate(!UseAPX);
9073 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9074 ins_cost(200);
9075 expand %{
9076 cmovI_regU(cop, cr, dst, src);
9077 %}
9078 %}
9079
9080 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9081 predicate(UseAPX);
9082 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9083 ins_cost(200);
9084 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9085 ins_encode %{
9086 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9087 %}
9088 ins_pipe(pipe_cmov_reg);
9089 %}
9090
9091 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9092 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9093 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9094
9095 ins_cost(200); // XXX
9096 format %{ "cmovpl $dst, $src\n\t"
9097 "cmovnel $dst, $src" %}
9098 ins_encode %{
9099 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9100 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9101 %}
9102 ins_pipe(pipe_cmov_reg);
9103 %}
9104
9105 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9106 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9107 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9108 effect(TEMP dst);
9109
9110 ins_cost(200);
9111 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9112 "cmovnel $dst, $src2" %}
9113 ins_encode %{
9114 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9115 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9116 %}
9117 ins_pipe(pipe_cmov_reg);
9118 %}
9119
9120 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9121 // inputs of the CMove
9122 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9123 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9124 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9125 effect(TEMP dst);
9126
9127 ins_cost(200); // XXX
9128 format %{ "cmovpl $dst, $src\n\t"
9129 "cmovnel $dst, $src" %}
9130 ins_encode %{
9131 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9132 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9133 %}
9134 ins_pipe(pipe_cmov_reg);
9135 %}
9136
9137 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9138 // and parity flag bit is set if any of the operand is a NaN.
9139 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9140 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9141 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9142 effect(TEMP dst);
9143
9144 ins_cost(200);
9145 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9146 "cmovnel $dst, $src2" %}
9147 ins_encode %{
9148 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9149 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9150 %}
9151 ins_pipe(pipe_cmov_reg);
9152 %}
9153
9154 // Conditional move
9155 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9156 predicate(!UseAPX);
9157 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9158
9159 ins_cost(250); // XXX
9160 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9161 ins_encode %{
9162 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9163 %}
9164 ins_pipe(pipe_cmov_mem);
9165 %}
9166
9167 // Conditional move
9168 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9169 %{
9170 predicate(UseAPX);
9171 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9172
9173 ins_cost(250);
9174 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9175 ins_encode %{
9176 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9177 %}
9178 ins_pipe(pipe_cmov_mem);
9179 %}
9180
9181 // Conditional move
9182 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9183 %{
9184 predicate(!UseAPX);
9185 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9186
9187 ins_cost(250); // XXX
9188 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9189 ins_encode %{
9190 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9191 %}
9192 ins_pipe(pipe_cmov_mem);
9193 %}
9194
9195 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9196 predicate(!UseAPX);
9197 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9198 ins_cost(250);
9199 expand %{
9200 cmovI_memU(cop, cr, dst, src);
9201 %}
9202 %}
9203
9204 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9205 %{
9206 predicate(UseAPX);
9207 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9208
9209 ins_cost(250);
9210 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9211 ins_encode %{
9212 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9213 %}
9214 ins_pipe(pipe_cmov_mem);
9215 %}
9216
9217 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9218 %{
9219 predicate(UseAPX);
9220 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9221 ins_cost(250);
9222 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9223 ins_encode %{
9224 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9225 %}
9226 ins_pipe(pipe_cmov_mem);
9227 %}
9228
9229 // Conditional move
9230 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9231 %{
9232 predicate(!UseAPX);
9233 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9234
9235 ins_cost(200); // XXX
9236 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9237 ins_encode %{
9238 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9239 %}
9240 ins_pipe(pipe_cmov_reg);
9241 %}
9242
9243 // Conditional move ndd
9244 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9245 %{
9246 predicate(UseAPX);
9247 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9248
9249 ins_cost(200);
9250 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9251 ins_encode %{
9252 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9253 %}
9254 ins_pipe(pipe_cmov_reg);
9255 %}
9256
9257 // Conditional move
9258 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9259 %{
9260 predicate(!UseAPX);
9261 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9262
9263 ins_cost(200); // XXX
9264 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9265 ins_encode %{
9266 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9267 %}
9268 ins_pipe(pipe_cmov_reg);
9269 %}
9270
9271 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9272 predicate(!UseAPX);
9273 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9274 ins_cost(200);
9275 expand %{
9276 cmovN_regU(cop, cr, dst, src);
9277 %}
9278 %}
9279
9280 // Conditional move ndd
9281 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9282 %{
9283 predicate(UseAPX);
9284 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9285
9286 ins_cost(200);
9287 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9288 ins_encode %{
9289 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9290 %}
9291 ins_pipe(pipe_cmov_reg);
9292 %}
9293
9294 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9295 predicate(UseAPX);
9296 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9297 ins_cost(200);
9298 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9299 ins_encode %{
9300 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9301 %}
9302 ins_pipe(pipe_cmov_reg);
9303 %}
9304
9305 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9306 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9307 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9308
9309 ins_cost(200); // XXX
9310 format %{ "cmovpl $dst, $src\n\t"
9311 "cmovnel $dst, $src" %}
9312 ins_encode %{
9313 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9314 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9315 %}
9316 ins_pipe(pipe_cmov_reg);
9317 %}
9318
9319 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9320 // inputs of the CMove
9321 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9322 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9323 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9324
9325 ins_cost(200); // XXX
9326 format %{ "cmovpl $dst, $src\n\t"
9327 "cmovnel $dst, $src" %}
9328 ins_encode %{
9329 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9330 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9331 %}
9332 ins_pipe(pipe_cmov_reg);
9333 %}
9334
9335 // Conditional move
9336 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9337 %{
9338 predicate(!UseAPX);
9339 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9340
9341 ins_cost(200); // XXX
9342 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9343 ins_encode %{
9344 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9345 %}
9346 ins_pipe(pipe_cmov_reg); // XXX
9347 %}
9348
9349 // Conditional move ndd
9350 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9351 %{
9352 predicate(UseAPX);
9353 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9354
9355 ins_cost(200);
9356 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9357 ins_encode %{
9358 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9359 %}
9360 ins_pipe(pipe_cmov_reg);
9361 %}
9362
9363 // Conditional move
9364 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9365 %{
9366 predicate(!UseAPX);
9367 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9368
9369 ins_cost(200); // XXX
9370 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9371 ins_encode %{
9372 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9373 %}
9374 ins_pipe(pipe_cmov_reg); // XXX
9375 %}
9376
9377 // Conditional move ndd
9378 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9379 %{
9380 predicate(UseAPX);
9381 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9382
9383 ins_cost(200);
9384 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9385 ins_encode %{
9386 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9387 %}
9388 ins_pipe(pipe_cmov_reg);
9389 %}
9390
9391 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9392 predicate(!UseAPX);
9393 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9394 ins_cost(200);
9395 expand %{
9396 cmovP_regU(cop, cr, dst, src);
9397 %}
9398 %}
9399
9400 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9401 predicate(UseAPX);
9402 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9403 ins_cost(200);
9404 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9405 ins_encode %{
9406 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9407 %}
9408 ins_pipe(pipe_cmov_reg);
9409 %}
9410
9411 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9412 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9413 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9414
9415 ins_cost(200); // XXX
9416 format %{ "cmovpq $dst, $src\n\t"
9417 "cmovneq $dst, $src" %}
9418 ins_encode %{
9419 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9420 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9421 %}
9422 ins_pipe(pipe_cmov_reg);
9423 %}
9424
9425 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9426 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9427 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9428 effect(TEMP dst);
9429
9430 ins_cost(200);
9431 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9432 "cmovneq $dst, $src2" %}
9433 ins_encode %{
9434 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9435 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9436 %}
9437 ins_pipe(pipe_cmov_reg);
9438 %}
9439
9440 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9441 // inputs of the CMove
9442 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9443 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9444 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9445
9446 ins_cost(200); // XXX
9447 format %{ "cmovpq $dst, $src\n\t"
9448 "cmovneq $dst, $src" %}
9449 ins_encode %{
9450 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9451 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9452 %}
9453 ins_pipe(pipe_cmov_reg);
9454 %}
9455
9456 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9457 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9458 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9459 effect(TEMP dst);
9460
9461 ins_cost(200);
9462 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9463 "cmovneq $dst, $src2" %}
9464 ins_encode %{
9465 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9466 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9467 %}
9468 ins_pipe(pipe_cmov_reg);
9469 %}
9470
9471 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9472 %{
9473 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9474 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9475
9476 ins_cost(100); // XXX
9477 format %{ "setbn$cop $dst\t# signed, long" %}
9478 ins_encode %{
9479 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9480 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9481 %}
9482 ins_pipe(ialu_reg);
9483 %}
9484
9485 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9486 %{
9487 predicate(!UseAPX);
9488 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9489
9490 ins_cost(200); // XXX
9491 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9492 ins_encode %{
9493 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9494 %}
9495 ins_pipe(pipe_cmov_reg); // XXX
9496 %}
9497
9498 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9499 %{
9500 predicate(UseAPX);
9501 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9502
9503 ins_cost(200);
9504 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9505 ins_encode %{
9506 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9507 %}
9508 ins_pipe(pipe_cmov_reg);
9509 %}
9510
9511 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9512 %{
9513 predicate(!UseAPX);
9514 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9515
9516 ins_cost(200); // XXX
9517 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9518 ins_encode %{
9519 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9520 %}
9521 ins_pipe(pipe_cmov_mem); // XXX
9522 %}
9523
9524 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9525 %{
9526 predicate(UseAPX);
9527 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9528
9529 ins_cost(200);
9530 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9531 ins_encode %{
9532 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9533 %}
9534 ins_pipe(pipe_cmov_mem);
9535 %}
9536
9537 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9538 %{
9539 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9540 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9541
9542 ins_cost(100); // XXX
9543 format %{ "setbn$cop $dst\t# unsigned, long" %}
9544 ins_encode %{
9545 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9546 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9547 %}
9548 ins_pipe(ialu_reg);
9549 %}
9550
9551 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9552 %{
9553 predicate(!UseAPX);
9554 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9555
9556 ins_cost(200); // XXX
9557 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9558 ins_encode %{
9559 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9560 %}
9561 ins_pipe(pipe_cmov_reg); // XXX
9562 %}
9563
9564 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9565 %{
9566 predicate(UseAPX);
9567 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9568
9569 ins_cost(200);
9570 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9571 ins_encode %{
9572 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9573 %}
9574 ins_pipe(pipe_cmov_reg);
9575 %}
9576
9577 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9578 %{
9579 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9580 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9581
9582 ins_cost(100); // XXX
9583 format %{ "setbn$cop $dst\t# unsigned, long" %}
9584 ins_encode %{
9585 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9586 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9587 %}
9588 ins_pipe(ialu_reg);
9589 %}
9590
9591 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9592 predicate(!UseAPX);
9593 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9594 ins_cost(200);
9595 expand %{
9596 cmovL_regU(cop, cr, dst, src);
9597 %}
9598 %}
9599
9600 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9601 %{
9602 predicate(UseAPX);
9603 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9604 ins_cost(200);
9605 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9606 ins_encode %{
9607 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9608 %}
9609 ins_pipe(pipe_cmov_reg);
9610 %}
9611
9612 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9613 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9614 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9615
9616 ins_cost(200); // XXX
9617 format %{ "cmovpq $dst, $src\n\t"
9618 "cmovneq $dst, $src" %}
9619 ins_encode %{
9620 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9621 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9622 %}
9623 ins_pipe(pipe_cmov_reg);
9624 %}
9625
9626 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9627 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9629 effect(TEMP dst);
9630
9631 ins_cost(200);
9632 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9633 "cmovneq $dst, $src2" %}
9634 ins_encode %{
9635 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9636 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9637 %}
9638 ins_pipe(pipe_cmov_reg);
9639 %}
9640
9641 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9642 // inputs of the CMove
9643 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9644 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9645 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9646
9647 ins_cost(200); // XXX
9648 format %{ "cmovpq $dst, $src\n\t"
9649 "cmovneq $dst, $src" %}
9650 ins_encode %{
9651 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9652 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9653 %}
9654 ins_pipe(pipe_cmov_reg);
9655 %}
9656
9657 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9658 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9659 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9660 effect(TEMP dst);
9661
9662 ins_cost(200);
9663 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9664 "cmovneq $dst, $src2" %}
9665 ins_encode %{
9666 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9667 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9668 %}
9669 ins_pipe(pipe_cmov_reg);
9670 %}
9671
9672 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9673 %{
9674 predicate(!UseAPX);
9675 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9676
9677 ins_cost(200); // XXX
9678 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9679 ins_encode %{
9680 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9681 %}
9682 ins_pipe(pipe_cmov_mem); // XXX
9683 %}
9684
9685 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9686 predicate(!UseAPX);
9687 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9688 ins_cost(200);
9689 expand %{
9690 cmovL_memU(cop, cr, dst, src);
9691 %}
9692 %}
9693
9694 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9695 %{
9696 predicate(UseAPX);
9697 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9698
9699 ins_cost(200);
9700 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9701 ins_encode %{
9702 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9703 %}
9704 ins_pipe(pipe_cmov_mem);
9705 %}
9706
9707 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9708 %{
9709 predicate(UseAPX);
9710 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9711 ins_cost(200);
9712 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9713 ins_encode %{
9714 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9715 %}
9716 ins_pipe(pipe_cmov_mem);
9717 %}
9718
9719 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9720 %{
9721 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9722
9723 ins_cost(200); // XXX
9724 format %{ "jn$cop skip\t# signed cmove float\n\t"
9725 "movss $dst, $src\n"
9726 "skip:" %}
9727 ins_encode %{
9728 Label Lskip;
9729 // Invert sense of branch from sense of CMOV
9730 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9731 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9732 __ bind(Lskip);
9733 %}
9734 ins_pipe(pipe_slow);
9735 %}
9736
9737 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9738 %{
9739 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9740
9741 ins_cost(200); // XXX
9742 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9743 "movss $dst, $src\n"
9744 "skip:" %}
9745 ins_encode %{
9746 Label Lskip;
9747 // Invert sense of branch from sense of CMOV
9748 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9749 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9750 __ bind(Lskip);
9751 %}
9752 ins_pipe(pipe_slow);
9753 %}
9754
9755 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9756 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9757 ins_cost(200);
9758 expand %{
9759 cmovF_regU(cop, cr, dst, src);
9760 %}
9761 %}
9762
9763 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9764 %{
9765 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9766
9767 ins_cost(200); // XXX
9768 format %{ "jn$cop skip\t# signed cmove double\n\t"
9769 "movsd $dst, $src\n"
9770 "skip:" %}
9771 ins_encode %{
9772 Label Lskip;
9773 // Invert sense of branch from sense of CMOV
9774 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9775 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9776 __ bind(Lskip);
9777 %}
9778 ins_pipe(pipe_slow);
9779 %}
9780
9781 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9782 %{
9783 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9784
9785 ins_cost(200); // XXX
9786 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9787 "movsd $dst, $src\n"
9788 "skip:" %}
9789 ins_encode %{
9790 Label Lskip;
9791 // Invert sense of branch from sense of CMOV
9792 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9793 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9794 __ bind(Lskip);
9795 %}
9796 ins_pipe(pipe_slow);
9797 %}
9798
9799 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9800 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9801 ins_cost(200);
9802 expand %{
9803 cmovD_regU(cop, cr, dst, src);
9804 %}
9805 %}
9806
9807 //----------Arithmetic Instructions--------------------------------------------
9808 //----------Addition Instructions----------------------------------------------
9809
9810 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9811 %{
9812 predicate(!UseAPX);
9813 match(Set dst (AddI dst src));
9814 effect(KILL cr);
9815 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9816 format %{ "addl $dst, $src\t# int" %}
9817 ins_encode %{
9818 __ addl($dst$$Register, $src$$Register);
9819 %}
9820 ins_pipe(ialu_reg_reg);
9821 %}
9822
9823 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9824 %{
9825 predicate(UseAPX);
9826 match(Set dst (AddI src1 src2));
9827 effect(KILL cr);
9828 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9829
9830 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9831 ins_encode %{
9832 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9833 %}
9834 ins_pipe(ialu_reg_reg);
9835 %}
9836
9837 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9838 %{
9839 predicate(!UseAPX);
9840 match(Set dst (AddI dst src));
9841 effect(KILL cr);
9842 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9843
9844 format %{ "addl $dst, $src\t# int" %}
9845 ins_encode %{
9846 __ addl($dst$$Register, $src$$constant);
9847 %}
9848 ins_pipe( ialu_reg );
9849 %}
9850
9851 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9852 %{
9853 predicate(UseAPX);
9854 match(Set dst (AddI src1 src2));
9855 effect(KILL cr);
9856 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9857
9858 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9859 ins_encode %{
9860 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9861 %}
9862 ins_pipe( ialu_reg );
9863 %}
9864
9865 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9866 %{
9867 predicate(UseAPX);
9868 match(Set dst (AddI (LoadI src1) src2));
9869 effect(KILL cr);
9870 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9871
9872 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9873 ins_encode %{
9874 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9875 %}
9876 ins_pipe( ialu_reg );
9877 %}
9878
9879 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9880 %{
9881 predicate(!UseAPX);
9882 match(Set dst (AddI dst (LoadI src)));
9883 effect(KILL cr);
9884 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9885
9886 ins_cost(150); // XXX
9887 format %{ "addl $dst, $src\t# int" %}
9888 ins_encode %{
9889 __ addl($dst$$Register, $src$$Address);
9890 %}
9891 ins_pipe(ialu_reg_mem);
9892 %}
9893
9894 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9895 %{
9896 predicate(UseAPX);
9897 match(Set dst (AddI src1 (LoadI src2)));
9898 effect(KILL cr);
9899 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9900
9901 ins_cost(150);
9902 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9903 ins_encode %{
9904 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9905 %}
9906 ins_pipe(ialu_reg_mem);
9907 %}
9908
9909 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9910 %{
9911 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9912 effect(KILL cr);
9913 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9914
9915 ins_cost(150); // XXX
9916 format %{ "addl $dst, $src\t# int" %}
9917 ins_encode %{
9918 __ addl($dst$$Address, $src$$Register);
9919 %}
9920 ins_pipe(ialu_mem_reg);
9921 %}
9922
9923 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9924 %{
9925 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9926 effect(KILL cr);
9927 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9928
9929
9930 ins_cost(125); // XXX
9931 format %{ "addl $dst, $src\t# int" %}
9932 ins_encode %{
9933 __ addl($dst$$Address, $src$$constant);
9934 %}
9935 ins_pipe(ialu_mem_imm);
9936 %}
9937
9938 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9939 %{
9940 predicate(!UseAPX && UseIncDec);
9941 match(Set dst (AddI dst src));
9942 effect(KILL cr);
9943
9944 format %{ "incl $dst\t# int" %}
9945 ins_encode %{
9946 __ incrementl($dst$$Register);
9947 %}
9948 ins_pipe(ialu_reg);
9949 %}
9950
9951 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9952 %{
9953 predicate(UseAPX && UseIncDec);
9954 match(Set dst (AddI src val));
9955 effect(KILL cr);
9956
9957 format %{ "eincl $dst, $src\t# int ndd" %}
9958 ins_encode %{
9959 __ eincl($dst$$Register, $src$$Register, false);
9960 %}
9961 ins_pipe(ialu_reg);
9962 %}
9963
9964 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
9965 %{
9966 predicate(UseAPX && UseIncDec);
9967 match(Set dst (AddI (LoadI src) val));
9968 effect(KILL cr);
9969
9970 format %{ "eincl $dst, $src\t# int ndd" %}
9971 ins_encode %{
9972 __ eincl($dst$$Register, $src$$Address, false);
9973 %}
9974 ins_pipe(ialu_reg);
9975 %}
9976
9977 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9978 %{
9979 predicate(UseIncDec);
9980 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9981 effect(KILL cr);
9982
9983 ins_cost(125); // XXX
9984 format %{ "incl $dst\t# int" %}
9985 ins_encode %{
9986 __ incrementl($dst$$Address);
9987 %}
9988 ins_pipe(ialu_mem_imm);
9989 %}
9990
9991 // XXX why does that use AddI
9992 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
9993 %{
9994 predicate(!UseAPX && UseIncDec);
9995 match(Set dst (AddI dst src));
9996 effect(KILL cr);
9997
9998 format %{ "decl $dst\t# int" %}
9999 ins_encode %{
10000 __ decrementl($dst$$Register);
10001 %}
10002 ins_pipe(ialu_reg);
10003 %}
10004
10005 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10006 %{
10007 predicate(UseAPX && UseIncDec);
10008 match(Set dst (AddI src val));
10009 effect(KILL cr);
10010
10011 format %{ "edecl $dst, $src\t# int ndd" %}
10012 ins_encode %{
10013 __ edecl($dst$$Register, $src$$Register, false);
10014 %}
10015 ins_pipe(ialu_reg);
10016 %}
10017
10018 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10019 %{
10020 predicate(UseAPX && UseIncDec);
10021 match(Set dst (AddI (LoadI src) val));
10022 effect(KILL cr);
10023
10024 format %{ "edecl $dst, $src\t# int ndd" %}
10025 ins_encode %{
10026 __ edecl($dst$$Register, $src$$Address, false);
10027 %}
10028 ins_pipe(ialu_reg);
10029 %}
10030
10031 // XXX why does that use AddI
10032 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10033 %{
10034 predicate(UseIncDec);
10035 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10036 effect(KILL cr);
10037
10038 ins_cost(125); // XXX
10039 format %{ "decl $dst\t# int" %}
10040 ins_encode %{
10041 __ decrementl($dst$$Address);
10042 %}
10043 ins_pipe(ialu_mem_imm);
10044 %}
10045
10046 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10047 %{
10048 predicate(VM_Version::supports_fast_2op_lea());
10049 match(Set dst (AddI (LShiftI index scale) disp));
10050
10051 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10052 ins_encode %{
10053 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10054 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10055 %}
10056 ins_pipe(ialu_reg_reg);
10057 %}
10058
10059 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10060 %{
10061 predicate(VM_Version::supports_fast_3op_lea());
10062 match(Set dst (AddI (AddI base index) disp));
10063
10064 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10065 ins_encode %{
10066 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10067 %}
10068 ins_pipe(ialu_reg_reg);
10069 %}
10070
10071 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10072 %{
10073 predicate(VM_Version::supports_fast_2op_lea());
10074 match(Set dst (AddI base (LShiftI index scale)));
10075
10076 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10077 ins_encode %{
10078 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10079 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10080 %}
10081 ins_pipe(ialu_reg_reg);
10082 %}
10083
10084 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10085 %{
10086 predicate(VM_Version::supports_fast_3op_lea());
10087 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10088
10089 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10090 ins_encode %{
10091 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10092 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10093 %}
10094 ins_pipe(ialu_reg_reg);
10095 %}
10096
10097 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10098 %{
10099 predicate(!UseAPX);
10100 match(Set dst (AddL dst src));
10101 effect(KILL cr);
10102 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10103
10104 format %{ "addq $dst, $src\t# long" %}
10105 ins_encode %{
10106 __ addq($dst$$Register, $src$$Register);
10107 %}
10108 ins_pipe(ialu_reg_reg);
10109 %}
10110
10111 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10112 %{
10113 predicate(UseAPX);
10114 match(Set dst (AddL src1 src2));
10115 effect(KILL cr);
10116 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10117
10118 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10119 ins_encode %{
10120 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10121 %}
10122 ins_pipe(ialu_reg_reg);
10123 %}
10124
10125 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10126 %{
10127 predicate(!UseAPX);
10128 match(Set dst (AddL dst src));
10129 effect(KILL cr);
10130 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10131
10132 format %{ "addq $dst, $src\t# long" %}
10133 ins_encode %{
10134 __ addq($dst$$Register, $src$$constant);
10135 %}
10136 ins_pipe( ialu_reg );
10137 %}
10138
10139 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10140 %{
10141 predicate(UseAPX);
10142 match(Set dst (AddL src1 src2));
10143 effect(KILL cr);
10144 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10145
10146 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10147 ins_encode %{
10148 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10149 %}
10150 ins_pipe( ialu_reg );
10151 %}
10152
10153 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10154 %{
10155 predicate(UseAPX);
10156 match(Set dst (AddL (LoadL src1) src2));
10157 effect(KILL cr);
10158 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10159
10160 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10161 ins_encode %{
10162 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10163 %}
10164 ins_pipe( ialu_reg );
10165 %}
10166
10167 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10168 %{
10169 predicate(!UseAPX);
10170 match(Set dst (AddL dst (LoadL src)));
10171 effect(KILL cr);
10172 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10173
10174 ins_cost(150); // XXX
10175 format %{ "addq $dst, $src\t# long" %}
10176 ins_encode %{
10177 __ addq($dst$$Register, $src$$Address);
10178 %}
10179 ins_pipe(ialu_reg_mem);
10180 %}
10181
10182 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10183 %{
10184 predicate(UseAPX);
10185 match(Set dst (AddL src1 (LoadL src2)));
10186 effect(KILL cr);
10187 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10188
10189 ins_cost(150);
10190 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10191 ins_encode %{
10192 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10193 %}
10194 ins_pipe(ialu_reg_mem);
10195 %}
10196
10197 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10198 %{
10199 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10200 effect(KILL cr);
10201 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10202
10203 ins_cost(150); // XXX
10204 format %{ "addq $dst, $src\t# long" %}
10205 ins_encode %{
10206 __ addq($dst$$Address, $src$$Register);
10207 %}
10208 ins_pipe(ialu_mem_reg);
10209 %}
10210
10211 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10212 %{
10213 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10214 effect(KILL cr);
10215 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10216
10217 ins_cost(125); // XXX
10218 format %{ "addq $dst, $src\t# long" %}
10219 ins_encode %{
10220 __ addq($dst$$Address, $src$$constant);
10221 %}
10222 ins_pipe(ialu_mem_imm);
10223 %}
10224
10225 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10226 %{
10227 predicate(!UseAPX && UseIncDec);
10228 match(Set dst (AddL dst src));
10229 effect(KILL cr);
10230
10231 format %{ "incq $dst\t# long" %}
10232 ins_encode %{
10233 __ incrementq($dst$$Register);
10234 %}
10235 ins_pipe(ialu_reg);
10236 %}
10237
10238 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10239 %{
10240 predicate(UseAPX && UseIncDec);
10241 match(Set dst (AddL src val));
10242 effect(KILL cr);
10243
10244 format %{ "eincq $dst, $src\t# long ndd" %}
10245 ins_encode %{
10246 __ eincq($dst$$Register, $src$$Register, false);
10247 %}
10248 ins_pipe(ialu_reg);
10249 %}
10250
10251 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10252 %{
10253 predicate(UseAPX && UseIncDec);
10254 match(Set dst (AddL (LoadL src) val));
10255 effect(KILL cr);
10256
10257 format %{ "eincq $dst, $src\t# long ndd" %}
10258 ins_encode %{
10259 __ eincq($dst$$Register, $src$$Address, false);
10260 %}
10261 ins_pipe(ialu_reg);
10262 %}
10263
10264 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10265 %{
10266 predicate(UseIncDec);
10267 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10268 effect(KILL cr);
10269
10270 ins_cost(125); // XXX
10271 format %{ "incq $dst\t# long" %}
10272 ins_encode %{
10273 __ incrementq($dst$$Address);
10274 %}
10275 ins_pipe(ialu_mem_imm);
10276 %}
10277
10278 // XXX why does that use AddL
10279 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10280 %{
10281 predicate(!UseAPX && UseIncDec);
10282 match(Set dst (AddL dst src));
10283 effect(KILL cr);
10284
10285 format %{ "decq $dst\t# long" %}
10286 ins_encode %{
10287 __ decrementq($dst$$Register);
10288 %}
10289 ins_pipe(ialu_reg);
10290 %}
10291
10292 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10293 %{
10294 predicate(UseAPX && UseIncDec);
10295 match(Set dst (AddL src val));
10296 effect(KILL cr);
10297
10298 format %{ "edecq $dst, $src\t# long ndd" %}
10299 ins_encode %{
10300 __ edecq($dst$$Register, $src$$Register, false);
10301 %}
10302 ins_pipe(ialu_reg);
10303 %}
10304
10305 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10306 %{
10307 predicate(UseAPX && UseIncDec);
10308 match(Set dst (AddL (LoadL src) val));
10309 effect(KILL cr);
10310
10311 format %{ "edecq $dst, $src\t# long ndd" %}
10312 ins_encode %{
10313 __ edecq($dst$$Register, $src$$Address, false);
10314 %}
10315 ins_pipe(ialu_reg);
10316 %}
10317
10318 // XXX why does that use AddL
10319 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10320 %{
10321 predicate(UseIncDec);
10322 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10323 effect(KILL cr);
10324
10325 ins_cost(125); // XXX
10326 format %{ "decq $dst\t# long" %}
10327 ins_encode %{
10328 __ decrementq($dst$$Address);
10329 %}
10330 ins_pipe(ialu_mem_imm);
10331 %}
10332
10333 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10334 %{
10335 predicate(VM_Version::supports_fast_2op_lea());
10336 match(Set dst (AddL (LShiftL index scale) disp));
10337
10338 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10339 ins_encode %{
10340 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10341 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10342 %}
10343 ins_pipe(ialu_reg_reg);
10344 %}
10345
10346 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10347 %{
10348 predicate(VM_Version::supports_fast_3op_lea());
10349 match(Set dst (AddL (AddL base index) disp));
10350
10351 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10352 ins_encode %{
10353 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10354 %}
10355 ins_pipe(ialu_reg_reg);
10356 %}
10357
10358 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10359 %{
10360 predicate(VM_Version::supports_fast_2op_lea());
10361 match(Set dst (AddL base (LShiftL index scale)));
10362
10363 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10364 ins_encode %{
10365 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10366 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10367 %}
10368 ins_pipe(ialu_reg_reg);
10369 %}
10370
10371 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10372 %{
10373 predicate(VM_Version::supports_fast_3op_lea());
10374 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10375
10376 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10377 ins_encode %{
10378 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10379 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10380 %}
10381 ins_pipe(ialu_reg_reg);
10382 %}
10383
10384 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10385 %{
10386 match(Set dst (AddP dst src));
10387 effect(KILL cr);
10388 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10389
10390 format %{ "addq $dst, $src\t# ptr" %}
10391 ins_encode %{
10392 __ addq($dst$$Register, $src$$Register);
10393 %}
10394 ins_pipe(ialu_reg_reg);
10395 %}
10396
10397 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10398 %{
10399 match(Set dst (AddP dst src));
10400 effect(KILL cr);
10401 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10402
10403 format %{ "addq $dst, $src\t# ptr" %}
10404 ins_encode %{
10405 __ addq($dst$$Register, $src$$constant);
10406 %}
10407 ins_pipe( ialu_reg );
10408 %}
10409
10410 // XXX addP mem ops ????
10411
10412 instruct checkCastPP(rRegP dst)
10413 %{
10414 match(Set dst (CheckCastPP dst));
10415
10416 size(0);
10417 format %{ "# checkcastPP of $dst" %}
10418 ins_encode(/* empty encoding */);
10419 ins_pipe(empty);
10420 %}
10421
10422 instruct castPP(rRegP dst)
10423 %{
10424 match(Set dst (CastPP dst));
10425
10426 size(0);
10427 format %{ "# castPP of $dst" %}
10428 ins_encode(/* empty encoding */);
10429 ins_pipe(empty);
10430 %}
10431
10432 instruct castII(rRegI dst)
10433 %{
10434 predicate(VerifyConstraintCasts == 0);
10435 match(Set dst (CastII dst));
10436
10437 size(0);
10438 format %{ "# castII of $dst" %}
10439 ins_encode(/* empty encoding */);
10440 ins_cost(0);
10441 ins_pipe(empty);
10442 %}
10443
10444 instruct castII_checked(rRegI dst, rFlagsReg cr)
10445 %{
10446 predicate(VerifyConstraintCasts > 0);
10447 match(Set dst (CastII dst));
10448
10449 effect(KILL cr);
10450 format %{ "# cast_checked_II $dst" %}
10451 ins_encode %{
10452 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10453 %}
10454 ins_pipe(pipe_slow);
10455 %}
10456
10457 instruct castLL(rRegL dst)
10458 %{
10459 predicate(VerifyConstraintCasts == 0);
10460 match(Set dst (CastLL dst));
10461
10462 size(0);
10463 format %{ "# castLL of $dst" %}
10464 ins_encode(/* empty encoding */);
10465 ins_cost(0);
10466 ins_pipe(empty);
10467 %}
10468
10469 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10470 %{
10471 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10472 match(Set dst (CastLL dst));
10473
10474 effect(KILL cr);
10475 format %{ "# cast_checked_LL $dst" %}
10476 ins_encode %{
10477 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10478 %}
10479 ins_pipe(pipe_slow);
10480 %}
10481
10482 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10483 %{
10484 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10485 match(Set dst (CastLL dst));
10486
10487 effect(KILL cr, TEMP tmp);
10488 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10489 ins_encode %{
10490 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10491 %}
10492 ins_pipe(pipe_slow);
10493 %}
10494
10495 instruct castFF(regF dst)
10496 %{
10497 match(Set dst (CastFF dst));
10498
10499 size(0);
10500 format %{ "# castFF of $dst" %}
10501 ins_encode(/* empty encoding */);
10502 ins_cost(0);
10503 ins_pipe(empty);
10504 %}
10505
10506 instruct castHH(regF dst)
10507 %{
10508 match(Set dst (CastHH dst));
10509
10510 size(0);
10511 format %{ "# castHH of $dst" %}
10512 ins_encode(/* empty encoding */);
10513 ins_cost(0);
10514 ins_pipe(empty);
10515 %}
10516
10517 instruct castDD(regD dst)
10518 %{
10519 match(Set dst (CastDD dst));
10520
10521 size(0);
10522 format %{ "# castDD of $dst" %}
10523 ins_encode(/* empty encoding */);
10524 ins_cost(0);
10525 ins_pipe(empty);
10526 %}
10527
10528 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10529 instruct compareAndSwapP(rRegI res,
10530 memory mem_ptr,
10531 rax_RegP oldval, rRegP newval,
10532 rFlagsReg cr)
10533 %{
10534 predicate(n->as_LoadStore()->barrier_data() == 0);
10535 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10536 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10537 effect(KILL cr, KILL oldval);
10538
10539 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10540 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10541 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10542 ins_encode %{
10543 __ lock();
10544 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10545 __ setcc(Assembler::equal, $res$$Register);
10546 %}
10547 ins_pipe( pipe_cmpxchg );
10548 %}
10549
10550 instruct compareAndSwapL(rRegI res,
10551 memory mem_ptr,
10552 rax_RegL oldval, rRegL newval,
10553 rFlagsReg cr)
10554 %{
10555 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10556 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10557 effect(KILL cr, KILL oldval);
10558
10559 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10560 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10561 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10562 ins_encode %{
10563 __ lock();
10564 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10565 __ setcc(Assembler::equal, $res$$Register);
10566 %}
10567 ins_pipe( pipe_cmpxchg );
10568 %}
10569
10570 instruct compareAndSwapI(rRegI res,
10571 memory mem_ptr,
10572 rax_RegI oldval, rRegI newval,
10573 rFlagsReg cr)
10574 %{
10575 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10576 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10577 effect(KILL cr, KILL oldval);
10578
10579 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10580 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10581 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10582 ins_encode %{
10583 __ lock();
10584 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10585 __ setcc(Assembler::equal, $res$$Register);
10586 %}
10587 ins_pipe( pipe_cmpxchg );
10588 %}
10589
10590 instruct compareAndSwapB(rRegI res,
10591 memory mem_ptr,
10592 rax_RegI oldval, rRegI newval,
10593 rFlagsReg cr)
10594 %{
10595 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10596 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10597 effect(KILL cr, KILL oldval);
10598
10599 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10600 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10601 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10602 ins_encode %{
10603 __ lock();
10604 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10605 __ setcc(Assembler::equal, $res$$Register);
10606 %}
10607 ins_pipe( pipe_cmpxchg );
10608 %}
10609
10610 instruct compareAndSwapS(rRegI res,
10611 memory mem_ptr,
10612 rax_RegI oldval, rRegI newval,
10613 rFlagsReg cr)
10614 %{
10615 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10616 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10617 effect(KILL cr, KILL oldval);
10618
10619 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10620 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10621 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10622 ins_encode %{
10623 __ lock();
10624 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10625 __ setcc(Assembler::equal, $res$$Register);
10626 %}
10627 ins_pipe( pipe_cmpxchg );
10628 %}
10629
10630 instruct compareAndSwapN(rRegI res,
10631 memory mem_ptr,
10632 rax_RegN oldval, rRegN newval,
10633 rFlagsReg cr) %{
10634 predicate(n->as_LoadStore()->barrier_data() == 0);
10635 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10636 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10637 effect(KILL cr, KILL oldval);
10638
10639 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10640 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10641 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10642 ins_encode %{
10643 __ lock();
10644 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10645 __ setcc(Assembler::equal, $res$$Register);
10646 %}
10647 ins_pipe( pipe_cmpxchg );
10648 %}
10649
10650 instruct compareAndExchangeB(
10651 memory mem_ptr,
10652 rax_RegI oldval, rRegI newval,
10653 rFlagsReg cr)
10654 %{
10655 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10656 effect(KILL cr);
10657
10658 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10659 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10660 ins_encode %{
10661 __ lock();
10662 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10663 %}
10664 ins_pipe( pipe_cmpxchg );
10665 %}
10666
10667 instruct compareAndExchangeS(
10668 memory mem_ptr,
10669 rax_RegI oldval, rRegI newval,
10670 rFlagsReg cr)
10671 %{
10672 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10673 effect(KILL cr);
10674
10675 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10676 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10677 ins_encode %{
10678 __ lock();
10679 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10680 %}
10681 ins_pipe( pipe_cmpxchg );
10682 %}
10683
10684 instruct compareAndExchangeI(
10685 memory mem_ptr,
10686 rax_RegI oldval, rRegI newval,
10687 rFlagsReg cr)
10688 %{
10689 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10690 effect(KILL cr);
10691
10692 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10693 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10694 ins_encode %{
10695 __ lock();
10696 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697 %}
10698 ins_pipe( pipe_cmpxchg );
10699 %}
10700
10701 instruct compareAndExchangeL(
10702 memory mem_ptr,
10703 rax_RegL oldval, rRegL newval,
10704 rFlagsReg cr)
10705 %{
10706 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10707 effect(KILL cr);
10708
10709 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10710 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10711 ins_encode %{
10712 __ lock();
10713 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10714 %}
10715 ins_pipe( pipe_cmpxchg );
10716 %}
10717
10718 instruct compareAndExchangeN(
10719 memory mem_ptr,
10720 rax_RegN oldval, rRegN newval,
10721 rFlagsReg cr) %{
10722 predicate(n->as_LoadStore()->barrier_data() == 0);
10723 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10724 effect(KILL cr);
10725
10726 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10727 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10728 ins_encode %{
10729 __ lock();
10730 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10731 %}
10732 ins_pipe( pipe_cmpxchg );
10733 %}
10734
10735 instruct compareAndExchangeP(
10736 memory mem_ptr,
10737 rax_RegP oldval, rRegP newval,
10738 rFlagsReg cr)
10739 %{
10740 predicate(n->as_LoadStore()->barrier_data() == 0);
10741 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10742 effect(KILL cr);
10743
10744 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10745 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10746 ins_encode %{
10747 __ lock();
10748 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10749 %}
10750 ins_pipe( pipe_cmpxchg );
10751 %}
10752
10753 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10754 predicate(n->as_LoadStore()->result_not_used());
10755 match(Set dummy (GetAndAddB mem add));
10756 effect(KILL cr);
10757 format %{ "addb_lock $mem, $add" %}
10758 ins_encode %{
10759 __ lock();
10760 __ addb($mem$$Address, $add$$Register);
10761 %}
10762 ins_pipe(pipe_cmpxchg);
10763 %}
10764
10765 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10766 predicate(n->as_LoadStore()->result_not_used());
10767 match(Set dummy (GetAndAddB mem add));
10768 effect(KILL cr);
10769 format %{ "addb_lock $mem, $add" %}
10770 ins_encode %{
10771 __ lock();
10772 __ addb($mem$$Address, $add$$constant);
10773 %}
10774 ins_pipe(pipe_cmpxchg);
10775 %}
10776
10777 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10778 predicate(!n->as_LoadStore()->result_not_used());
10779 match(Set newval (GetAndAddB mem newval));
10780 effect(KILL cr);
10781 format %{ "xaddb_lock $mem, $newval" %}
10782 ins_encode %{
10783 __ lock();
10784 __ xaddb($mem$$Address, $newval$$Register);
10785 %}
10786 ins_pipe(pipe_cmpxchg);
10787 %}
10788
10789 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10790 predicate(n->as_LoadStore()->result_not_used());
10791 match(Set dummy (GetAndAddS mem add));
10792 effect(KILL cr);
10793 format %{ "addw_lock $mem, $add" %}
10794 ins_encode %{
10795 __ lock();
10796 __ addw($mem$$Address, $add$$Register);
10797 %}
10798 ins_pipe(pipe_cmpxchg);
10799 %}
10800
10801 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10802 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10803 match(Set dummy (GetAndAddS mem add));
10804 effect(KILL cr);
10805 format %{ "addw_lock $mem, $add" %}
10806 ins_encode %{
10807 __ lock();
10808 __ addw($mem$$Address, $add$$constant);
10809 %}
10810 ins_pipe(pipe_cmpxchg);
10811 %}
10812
10813 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10814 predicate(!n->as_LoadStore()->result_not_used());
10815 match(Set newval (GetAndAddS mem newval));
10816 effect(KILL cr);
10817 format %{ "xaddw_lock $mem, $newval" %}
10818 ins_encode %{
10819 __ lock();
10820 __ xaddw($mem$$Address, $newval$$Register);
10821 %}
10822 ins_pipe(pipe_cmpxchg);
10823 %}
10824
10825 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10826 predicate(n->as_LoadStore()->result_not_used());
10827 match(Set dummy (GetAndAddI mem add));
10828 effect(KILL cr);
10829 format %{ "addl_lock $mem, $add" %}
10830 ins_encode %{
10831 __ lock();
10832 __ addl($mem$$Address, $add$$Register);
10833 %}
10834 ins_pipe(pipe_cmpxchg);
10835 %}
10836
10837 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10838 predicate(n->as_LoadStore()->result_not_used());
10839 match(Set dummy (GetAndAddI mem add));
10840 effect(KILL cr);
10841 format %{ "addl_lock $mem, $add" %}
10842 ins_encode %{
10843 __ lock();
10844 __ addl($mem$$Address, $add$$constant);
10845 %}
10846 ins_pipe(pipe_cmpxchg);
10847 %}
10848
10849 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10850 predicate(!n->as_LoadStore()->result_not_used());
10851 match(Set newval (GetAndAddI mem newval));
10852 effect(KILL cr);
10853 format %{ "xaddl_lock $mem, $newval" %}
10854 ins_encode %{
10855 __ lock();
10856 __ xaddl($mem$$Address, $newval$$Register);
10857 %}
10858 ins_pipe(pipe_cmpxchg);
10859 %}
10860
10861 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10862 predicate(n->as_LoadStore()->result_not_used());
10863 match(Set dummy (GetAndAddL mem add));
10864 effect(KILL cr);
10865 format %{ "addq_lock $mem, $add" %}
10866 ins_encode %{
10867 __ lock();
10868 __ addq($mem$$Address, $add$$Register);
10869 %}
10870 ins_pipe(pipe_cmpxchg);
10871 %}
10872
10873 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10874 predicate(n->as_LoadStore()->result_not_used());
10875 match(Set dummy (GetAndAddL mem add));
10876 effect(KILL cr);
10877 format %{ "addq_lock $mem, $add" %}
10878 ins_encode %{
10879 __ lock();
10880 __ addq($mem$$Address, $add$$constant);
10881 %}
10882 ins_pipe(pipe_cmpxchg);
10883 %}
10884
10885 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10886 predicate(!n->as_LoadStore()->result_not_used());
10887 match(Set newval (GetAndAddL mem newval));
10888 effect(KILL cr);
10889 format %{ "xaddq_lock $mem, $newval" %}
10890 ins_encode %{
10891 __ lock();
10892 __ xaddq($mem$$Address, $newval$$Register);
10893 %}
10894 ins_pipe(pipe_cmpxchg);
10895 %}
10896
10897 instruct xchgB( memory mem, rRegI newval) %{
10898 match(Set newval (GetAndSetB mem newval));
10899 format %{ "XCHGB $newval,[$mem]" %}
10900 ins_encode %{
10901 __ xchgb($newval$$Register, $mem$$Address);
10902 %}
10903 ins_pipe( pipe_cmpxchg );
10904 %}
10905
10906 instruct xchgS( memory mem, rRegI newval) %{
10907 match(Set newval (GetAndSetS mem newval));
10908 format %{ "XCHGW $newval,[$mem]" %}
10909 ins_encode %{
10910 __ xchgw($newval$$Register, $mem$$Address);
10911 %}
10912 ins_pipe( pipe_cmpxchg );
10913 %}
10914
10915 instruct xchgI( memory mem, rRegI newval) %{
10916 match(Set newval (GetAndSetI mem newval));
10917 format %{ "XCHGL $newval,[$mem]" %}
10918 ins_encode %{
10919 __ xchgl($newval$$Register, $mem$$Address);
10920 %}
10921 ins_pipe( pipe_cmpxchg );
10922 %}
10923
10924 instruct xchgL( memory mem, rRegL newval) %{
10925 match(Set newval (GetAndSetL mem newval));
10926 format %{ "XCHGL $newval,[$mem]" %}
10927 ins_encode %{
10928 __ xchgq($newval$$Register, $mem$$Address);
10929 %}
10930 ins_pipe( pipe_cmpxchg );
10931 %}
10932
10933 instruct xchgP( memory mem, rRegP newval) %{
10934 match(Set newval (GetAndSetP mem newval));
10935 predicate(n->as_LoadStore()->barrier_data() == 0);
10936 format %{ "XCHGQ $newval,[$mem]" %}
10937 ins_encode %{
10938 __ xchgq($newval$$Register, $mem$$Address);
10939 %}
10940 ins_pipe( pipe_cmpxchg );
10941 %}
10942
10943 instruct xchgN( memory mem, rRegN newval) %{
10944 predicate(n->as_LoadStore()->barrier_data() == 0);
10945 match(Set newval (GetAndSetN mem newval));
10946 format %{ "XCHGL $newval,$mem]" %}
10947 ins_encode %{
10948 __ xchgl($newval$$Register, $mem$$Address);
10949 %}
10950 ins_pipe( pipe_cmpxchg );
10951 %}
10952
10953 //----------Abs Instructions-------------------------------------------
10954
10955 // Integer Absolute Instructions
10956 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10957 %{
10958 match(Set dst (AbsI src));
10959 effect(TEMP dst, KILL cr);
10960 format %{ "xorl $dst, $dst\t# abs int\n\t"
10961 "subl $dst, $src\n\t"
10962 "cmovll $dst, $src" %}
10963 ins_encode %{
10964 __ xorl($dst$$Register, $dst$$Register);
10965 __ subl($dst$$Register, $src$$Register);
10966 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10967 %}
10968
10969 ins_pipe(ialu_reg_reg);
10970 %}
10971
10972 // Long Absolute Instructions
10973 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10974 %{
10975 match(Set dst (AbsL src));
10976 effect(TEMP dst, KILL cr);
10977 format %{ "xorl $dst, $dst\t# abs long\n\t"
10978 "subq $dst, $src\n\t"
10979 "cmovlq $dst, $src" %}
10980 ins_encode %{
10981 __ xorl($dst$$Register, $dst$$Register);
10982 __ subq($dst$$Register, $src$$Register);
10983 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10984 %}
10985
10986 ins_pipe(ialu_reg_reg);
10987 %}
10988
10989 //----------Subtraction Instructions-------------------------------------------
10990
10991 // Integer Subtraction Instructions
10992 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10993 %{
10994 predicate(!UseAPX);
10995 match(Set dst (SubI dst src));
10996 effect(KILL cr);
10997 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10998
10999 format %{ "subl $dst, $src\t# int" %}
11000 ins_encode %{
11001 __ subl($dst$$Register, $src$$Register);
11002 %}
11003 ins_pipe(ialu_reg_reg);
11004 %}
11005
11006 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11007 %{
11008 predicate(UseAPX);
11009 match(Set dst (SubI src1 src2));
11010 effect(KILL cr);
11011 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11012
11013 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11014 ins_encode %{
11015 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11016 %}
11017 ins_pipe(ialu_reg_reg);
11018 %}
11019
11020 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11021 %{
11022 predicate(UseAPX);
11023 match(Set dst (SubI src1 src2));
11024 effect(KILL cr);
11025 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11026
11027 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11028 ins_encode %{
11029 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11030 %}
11031 ins_pipe(ialu_reg_reg);
11032 %}
11033
11034 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11035 %{
11036 predicate(UseAPX);
11037 match(Set dst (SubI (LoadI src1) src2));
11038 effect(KILL cr);
11039 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11040
11041 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11042 ins_encode %{
11043 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11044 %}
11045 ins_pipe(ialu_reg_reg);
11046 %}
11047
11048 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11049 %{
11050 predicate(!UseAPX);
11051 match(Set dst (SubI dst (LoadI src)));
11052 effect(KILL cr);
11053 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11054
11055 ins_cost(150);
11056 format %{ "subl $dst, $src\t# int" %}
11057 ins_encode %{
11058 __ subl($dst$$Register, $src$$Address);
11059 %}
11060 ins_pipe(ialu_reg_mem);
11061 %}
11062
11063 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11064 %{
11065 predicate(UseAPX);
11066 match(Set dst (SubI src1 (LoadI src2)));
11067 effect(KILL cr);
11068 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11069
11070 ins_cost(150);
11071 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11072 ins_encode %{
11073 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11074 %}
11075 ins_pipe(ialu_reg_mem);
11076 %}
11077
11078 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11079 %{
11080 predicate(UseAPX);
11081 match(Set dst (SubI (LoadI src1) src2));
11082 effect(KILL cr);
11083 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11084
11085 ins_cost(150);
11086 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11087 ins_encode %{
11088 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11089 %}
11090 ins_pipe(ialu_reg_mem);
11091 %}
11092
11093 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11094 %{
11095 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11096 effect(KILL cr);
11097 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11098
11099 ins_cost(150);
11100 format %{ "subl $dst, $src\t# int" %}
11101 ins_encode %{
11102 __ subl($dst$$Address, $src$$Register);
11103 %}
11104 ins_pipe(ialu_mem_reg);
11105 %}
11106
11107 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11108 %{
11109 predicate(!UseAPX);
11110 match(Set dst (SubL dst src));
11111 effect(KILL cr);
11112 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11113
11114 format %{ "subq $dst, $src\t# long" %}
11115 ins_encode %{
11116 __ subq($dst$$Register, $src$$Register);
11117 %}
11118 ins_pipe(ialu_reg_reg);
11119 %}
11120
11121 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11122 %{
11123 predicate(UseAPX);
11124 match(Set dst (SubL src1 src2));
11125 effect(KILL cr);
11126 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11127
11128 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11129 ins_encode %{
11130 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11131 %}
11132 ins_pipe(ialu_reg_reg);
11133 %}
11134
11135 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11136 %{
11137 predicate(UseAPX);
11138 match(Set dst (SubL src1 src2));
11139 effect(KILL cr);
11140 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11141
11142 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11143 ins_encode %{
11144 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11145 %}
11146 ins_pipe(ialu_reg_reg);
11147 %}
11148
11149 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11150 %{
11151 predicate(UseAPX);
11152 match(Set dst (SubL (LoadL src1) src2));
11153 effect(KILL cr);
11154 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11155
11156 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11157 ins_encode %{
11158 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11159 %}
11160 ins_pipe(ialu_reg_reg);
11161 %}
11162
11163 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11164 %{
11165 predicate(!UseAPX);
11166 match(Set dst (SubL dst (LoadL src)));
11167 effect(KILL cr);
11168 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11169
11170 ins_cost(150);
11171 format %{ "subq $dst, $src\t# long" %}
11172 ins_encode %{
11173 __ subq($dst$$Register, $src$$Address);
11174 %}
11175 ins_pipe(ialu_reg_mem);
11176 %}
11177
11178 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11179 %{
11180 predicate(UseAPX);
11181 match(Set dst (SubL src1 (LoadL src2)));
11182 effect(KILL cr);
11183 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11184
11185 ins_cost(150);
11186 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11187 ins_encode %{
11188 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11189 %}
11190 ins_pipe(ialu_reg_mem);
11191 %}
11192
11193 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11194 %{
11195 predicate(UseAPX);
11196 match(Set dst (SubL (LoadL src1) src2));
11197 effect(KILL cr);
11198 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11199
11200 ins_cost(150);
11201 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11202 ins_encode %{
11203 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11204 %}
11205 ins_pipe(ialu_reg_mem);
11206 %}
11207
11208 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11209 %{
11210 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11211 effect(KILL cr);
11212 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11213
11214 ins_cost(150);
11215 format %{ "subq $dst, $src\t# long" %}
11216 ins_encode %{
11217 __ subq($dst$$Address, $src$$Register);
11218 %}
11219 ins_pipe(ialu_mem_reg);
11220 %}
11221
11222 // Subtract from a pointer
11223 // XXX hmpf???
11224 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11225 %{
11226 match(Set dst (AddP dst (SubI zero src)));
11227 effect(KILL cr);
11228
11229 format %{ "subq $dst, $src\t# ptr - int" %}
11230 ins_encode %{
11231 __ subq($dst$$Register, $src$$Register);
11232 %}
11233 ins_pipe(ialu_reg_reg);
11234 %}
11235
11236 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11237 %{
11238 predicate(!UseAPX);
11239 match(Set dst (SubI zero dst));
11240 effect(KILL cr);
11241 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11242
11243 format %{ "negl $dst\t# int" %}
11244 ins_encode %{
11245 __ negl($dst$$Register);
11246 %}
11247 ins_pipe(ialu_reg);
11248 %}
11249
11250 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11251 %{
11252 predicate(UseAPX);
11253 match(Set dst (SubI zero src));
11254 effect(KILL cr);
11255 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11256
11257 format %{ "enegl $dst, $src\t# int ndd" %}
11258 ins_encode %{
11259 __ enegl($dst$$Register, $src$$Register, false);
11260 %}
11261 ins_pipe(ialu_reg);
11262 %}
11263
11264 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11265 %{
11266 predicate(!UseAPX);
11267 match(Set dst (NegI dst));
11268 effect(KILL cr);
11269 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11270
11271 format %{ "negl $dst\t# int" %}
11272 ins_encode %{
11273 __ negl($dst$$Register);
11274 %}
11275 ins_pipe(ialu_reg);
11276 %}
11277
11278 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11279 %{
11280 predicate(UseAPX);
11281 match(Set dst (NegI src));
11282 effect(KILL cr);
11283 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11284
11285 format %{ "enegl $dst, $src\t# int ndd" %}
11286 ins_encode %{
11287 __ enegl($dst$$Register, $src$$Register, false);
11288 %}
11289 ins_pipe(ialu_reg);
11290 %}
11291
11292 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11293 %{
11294 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11295 effect(KILL cr);
11296 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11297
11298 format %{ "negl $dst\t# int" %}
11299 ins_encode %{
11300 __ negl($dst$$Address);
11301 %}
11302 ins_pipe(ialu_reg);
11303 %}
11304
11305 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11306 %{
11307 predicate(!UseAPX);
11308 match(Set dst (SubL zero dst));
11309 effect(KILL cr);
11310 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11311
11312 format %{ "negq $dst\t# long" %}
11313 ins_encode %{
11314 __ negq($dst$$Register);
11315 %}
11316 ins_pipe(ialu_reg);
11317 %}
11318
11319 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11320 %{
11321 predicate(UseAPX);
11322 match(Set dst (SubL zero src));
11323 effect(KILL cr);
11324 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11325
11326 format %{ "enegq $dst, $src\t# long ndd" %}
11327 ins_encode %{
11328 __ enegq($dst$$Register, $src$$Register, false);
11329 %}
11330 ins_pipe(ialu_reg);
11331 %}
11332
11333 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11334 %{
11335 predicate(!UseAPX);
11336 match(Set dst (NegL dst));
11337 effect(KILL cr);
11338 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11339
11340 format %{ "negq $dst\t# int" %}
11341 ins_encode %{
11342 __ negq($dst$$Register);
11343 %}
11344 ins_pipe(ialu_reg);
11345 %}
11346
11347 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11348 %{
11349 predicate(UseAPX);
11350 match(Set dst (NegL src));
11351 effect(KILL cr);
11352 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11353
11354 format %{ "enegq $dst, $src\t# long ndd" %}
11355 ins_encode %{
11356 __ enegq($dst$$Register, $src$$Register, false);
11357 %}
11358 ins_pipe(ialu_reg);
11359 %}
11360
11361 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11362 %{
11363 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11364 effect(KILL cr);
11365 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11366
11367 format %{ "negq $dst\t# long" %}
11368 ins_encode %{
11369 __ negq($dst$$Address);
11370 %}
11371 ins_pipe(ialu_reg);
11372 %}
11373
11374 //----------Multiplication/Division Instructions-------------------------------
11375 // Integer Multiplication Instructions
11376 // Multiply Register
11377
11378 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11379 %{
11380 predicate(!UseAPX);
11381 match(Set dst (MulI dst src));
11382 effect(KILL cr);
11383
11384 ins_cost(300);
11385 format %{ "imull $dst, $src\t# int" %}
11386 ins_encode %{
11387 __ imull($dst$$Register, $src$$Register);
11388 %}
11389 ins_pipe(ialu_reg_reg_alu0);
11390 %}
11391
11392 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11393 %{
11394 predicate(UseAPX);
11395 match(Set dst (MulI src1 src2));
11396 effect(KILL cr);
11397
11398 ins_cost(300);
11399 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11400 ins_encode %{
11401 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11402 %}
11403 ins_pipe(ialu_reg_reg_alu0);
11404 %}
11405
11406 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11407 %{
11408 match(Set dst (MulI src imm));
11409 effect(KILL cr);
11410
11411 ins_cost(300);
11412 format %{ "imull $dst, $src, $imm\t# int" %}
11413 ins_encode %{
11414 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11415 %}
11416 ins_pipe(ialu_reg_reg_alu0);
11417 %}
11418
11419 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11420 %{
11421 predicate(!UseAPX);
11422 match(Set dst (MulI dst (LoadI src)));
11423 effect(KILL cr);
11424
11425 ins_cost(350);
11426 format %{ "imull $dst, $src\t# int" %}
11427 ins_encode %{
11428 __ imull($dst$$Register, $src$$Address);
11429 %}
11430 ins_pipe(ialu_reg_mem_alu0);
11431 %}
11432
11433 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11434 %{
11435 predicate(UseAPX);
11436 match(Set dst (MulI src1 (LoadI src2)));
11437 effect(KILL cr);
11438
11439 ins_cost(350);
11440 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11441 ins_encode %{
11442 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11443 %}
11444 ins_pipe(ialu_reg_mem_alu0);
11445 %}
11446
11447 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11448 %{
11449 match(Set dst (MulI (LoadI src) imm));
11450 effect(KILL cr);
11451
11452 ins_cost(300);
11453 format %{ "imull $dst, $src, $imm\t# int" %}
11454 ins_encode %{
11455 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11456 %}
11457 ins_pipe(ialu_reg_mem_alu0);
11458 %}
11459
11460 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11461 %{
11462 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11463 effect(KILL cr, KILL src2);
11464
11465 expand %{ mulI_rReg(dst, src1, cr);
11466 mulI_rReg(src2, src3, cr);
11467 addI_rReg(dst, src2, cr); %}
11468 %}
11469
11470 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11471 %{
11472 predicate(!UseAPX);
11473 match(Set dst (MulL dst src));
11474 effect(KILL cr);
11475
11476 ins_cost(300);
11477 format %{ "imulq $dst, $src\t# long" %}
11478 ins_encode %{
11479 __ imulq($dst$$Register, $src$$Register);
11480 %}
11481 ins_pipe(ialu_reg_reg_alu0);
11482 %}
11483
11484 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11485 %{
11486 predicate(UseAPX);
11487 match(Set dst (MulL src1 src2));
11488 effect(KILL cr);
11489
11490 ins_cost(300);
11491 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11492 ins_encode %{
11493 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11494 %}
11495 ins_pipe(ialu_reg_reg_alu0);
11496 %}
11497
11498 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11499 %{
11500 match(Set dst (MulL src imm));
11501 effect(KILL cr);
11502
11503 ins_cost(300);
11504 format %{ "imulq $dst, $src, $imm\t# long" %}
11505 ins_encode %{
11506 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11507 %}
11508 ins_pipe(ialu_reg_reg_alu0);
11509 %}
11510
11511 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11512 %{
11513 predicate(!UseAPX);
11514 match(Set dst (MulL dst (LoadL src)));
11515 effect(KILL cr);
11516
11517 ins_cost(350);
11518 format %{ "imulq $dst, $src\t# long" %}
11519 ins_encode %{
11520 __ imulq($dst$$Register, $src$$Address);
11521 %}
11522 ins_pipe(ialu_reg_mem_alu0);
11523 %}
11524
11525 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11526 %{
11527 predicate(UseAPX);
11528 match(Set dst (MulL src1 (LoadL src2)));
11529 effect(KILL cr);
11530
11531 ins_cost(350);
11532 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11533 ins_encode %{
11534 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11535 %}
11536 ins_pipe(ialu_reg_mem_alu0);
11537 %}
11538
11539 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11540 %{
11541 match(Set dst (MulL (LoadL src) imm));
11542 effect(KILL cr);
11543
11544 ins_cost(300);
11545 format %{ "imulq $dst, $src, $imm\t# long" %}
11546 ins_encode %{
11547 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11548 %}
11549 ins_pipe(ialu_reg_mem_alu0);
11550 %}
11551
11552 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11553 %{
11554 match(Set dst (MulHiL src rax));
11555 effect(USE_KILL rax, KILL cr);
11556
11557 ins_cost(300);
11558 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11559 ins_encode %{
11560 __ imulq($src$$Register);
11561 %}
11562 ins_pipe(ialu_reg_reg_alu0);
11563 %}
11564
11565 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11566 %{
11567 match(Set dst (UMulHiL src rax));
11568 effect(USE_KILL rax, KILL cr);
11569
11570 ins_cost(300);
11571 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11572 ins_encode %{
11573 __ mulq($src$$Register);
11574 %}
11575 ins_pipe(ialu_reg_reg_alu0);
11576 %}
11577
11578 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11579 rFlagsReg cr)
11580 %{
11581 match(Set rax (DivI rax div));
11582 effect(KILL rdx, KILL cr);
11583
11584 ins_cost(30*100+10*100); // XXX
11585 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11586 "jne,s normal\n\t"
11587 "xorl rdx, rdx\n\t"
11588 "cmpl $div, -1\n\t"
11589 "je,s done\n"
11590 "normal: cdql\n\t"
11591 "idivl $div\n"
11592 "done:" %}
11593 ins_encode(cdql_enc(div));
11594 ins_pipe(ialu_reg_reg_alu0);
11595 %}
11596
11597 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11598 rFlagsReg cr)
11599 %{
11600 match(Set rax (DivL rax div));
11601 effect(KILL rdx, KILL cr);
11602
11603 ins_cost(30*100+10*100); // XXX
11604 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11605 "cmpq rax, rdx\n\t"
11606 "jne,s normal\n\t"
11607 "xorl rdx, rdx\n\t"
11608 "cmpq $div, -1\n\t"
11609 "je,s done\n"
11610 "normal: cdqq\n\t"
11611 "idivq $div\n"
11612 "done:" %}
11613 ins_encode(cdqq_enc(div));
11614 ins_pipe(ialu_reg_reg_alu0);
11615 %}
11616
11617 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11618 %{
11619 match(Set rax (UDivI rax div));
11620 effect(KILL rdx, KILL cr);
11621
11622 ins_cost(300);
11623 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11624 ins_encode %{
11625 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11626 %}
11627 ins_pipe(ialu_reg_reg_alu0);
11628 %}
11629
11630 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11631 %{
11632 match(Set rax (UDivL rax div));
11633 effect(KILL rdx, KILL cr);
11634
11635 ins_cost(300);
11636 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11637 ins_encode %{
11638 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11639 %}
11640 ins_pipe(ialu_reg_reg_alu0);
11641 %}
11642
11643 // Integer DIVMOD with Register, both quotient and mod results
11644 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11645 rFlagsReg cr)
11646 %{
11647 match(DivModI rax div);
11648 effect(KILL cr);
11649
11650 ins_cost(30*100+10*100); // XXX
11651 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11652 "jne,s normal\n\t"
11653 "xorl rdx, rdx\n\t"
11654 "cmpl $div, -1\n\t"
11655 "je,s done\n"
11656 "normal: cdql\n\t"
11657 "idivl $div\n"
11658 "done:" %}
11659 ins_encode(cdql_enc(div));
11660 ins_pipe(pipe_slow);
11661 %}
11662
11663 // Long DIVMOD with Register, both quotient and mod results
11664 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11665 rFlagsReg cr)
11666 %{
11667 match(DivModL rax div);
11668 effect(KILL cr);
11669
11670 ins_cost(30*100+10*100); // XXX
11671 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11672 "cmpq rax, rdx\n\t"
11673 "jne,s normal\n\t"
11674 "xorl rdx, rdx\n\t"
11675 "cmpq $div, -1\n\t"
11676 "je,s done\n"
11677 "normal: cdqq\n\t"
11678 "idivq $div\n"
11679 "done:" %}
11680 ins_encode(cdqq_enc(div));
11681 ins_pipe(pipe_slow);
11682 %}
11683
11684 // Unsigned integer DIVMOD with Register, both quotient and mod results
11685 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11686 no_rax_rdx_RegI div, rFlagsReg cr)
11687 %{
11688 match(UDivModI rax div);
11689 effect(TEMP tmp, KILL cr);
11690
11691 ins_cost(300);
11692 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11693 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11694 %}
11695 ins_encode %{
11696 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11697 %}
11698 ins_pipe(pipe_slow);
11699 %}
11700
11701 // Unsigned long DIVMOD with Register, both quotient and mod results
11702 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11703 no_rax_rdx_RegL div, rFlagsReg cr)
11704 %{
11705 match(UDivModL rax div);
11706 effect(TEMP tmp, KILL cr);
11707
11708 ins_cost(300);
11709 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11710 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11711 %}
11712 ins_encode %{
11713 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11714 %}
11715 ins_pipe(pipe_slow);
11716 %}
11717
11718 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11719 rFlagsReg cr)
11720 %{
11721 match(Set rdx (ModI rax div));
11722 effect(KILL rax, KILL cr);
11723
11724 ins_cost(300); // XXX
11725 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11726 "jne,s normal\n\t"
11727 "xorl rdx, rdx\n\t"
11728 "cmpl $div, -1\n\t"
11729 "je,s done\n"
11730 "normal: cdql\n\t"
11731 "idivl $div\n"
11732 "done:" %}
11733 ins_encode(cdql_enc(div));
11734 ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736
11737 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11738 rFlagsReg cr)
11739 %{
11740 match(Set rdx (ModL rax div));
11741 effect(KILL rax, KILL cr);
11742
11743 ins_cost(300); // XXX
11744 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11745 "cmpq rax, rdx\n\t"
11746 "jne,s normal\n\t"
11747 "xorl rdx, rdx\n\t"
11748 "cmpq $div, -1\n\t"
11749 "je,s done\n"
11750 "normal: cdqq\n\t"
11751 "idivq $div\n"
11752 "done:" %}
11753 ins_encode(cdqq_enc(div));
11754 ins_pipe(ialu_reg_reg_alu0);
11755 %}
11756
11757 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11758 %{
11759 match(Set rdx (UModI rax div));
11760 effect(KILL rax, KILL cr);
11761
11762 ins_cost(300);
11763 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11764 ins_encode %{
11765 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11766 %}
11767 ins_pipe(ialu_reg_reg_alu0);
11768 %}
11769
11770 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11771 %{
11772 match(Set rdx (UModL rax div));
11773 effect(KILL rax, KILL cr);
11774
11775 ins_cost(300);
11776 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11777 ins_encode %{
11778 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11779 %}
11780 ins_pipe(ialu_reg_reg_alu0);
11781 %}
11782
11783 // Integer Shift Instructions
11784 // Shift Left by one, two, three
11785 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11786 %{
11787 predicate(!UseAPX);
11788 match(Set dst (LShiftI dst shift));
11789 effect(KILL cr);
11790
11791 format %{ "sall $dst, $shift" %}
11792 ins_encode %{
11793 __ sall($dst$$Register, $shift$$constant);
11794 %}
11795 ins_pipe(ialu_reg);
11796 %}
11797
11798 // Shift Left by one, two, three
11799 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11800 %{
11801 predicate(UseAPX);
11802 match(Set dst (LShiftI src shift));
11803 effect(KILL cr);
11804
11805 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11806 ins_encode %{
11807 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11808 %}
11809 ins_pipe(ialu_reg);
11810 %}
11811
11812 // Shift Left by 8-bit immediate
11813 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11814 %{
11815 predicate(!UseAPX);
11816 match(Set dst (LShiftI dst shift));
11817 effect(KILL cr);
11818
11819 format %{ "sall $dst, $shift" %}
11820 ins_encode %{
11821 __ sall($dst$$Register, $shift$$constant);
11822 %}
11823 ins_pipe(ialu_reg);
11824 %}
11825
11826 // Shift Left by 8-bit immediate
11827 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11828 %{
11829 predicate(UseAPX);
11830 match(Set dst (LShiftI src shift));
11831 effect(KILL cr);
11832
11833 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11834 ins_encode %{
11835 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11836 %}
11837 ins_pipe(ialu_reg);
11838 %}
11839
11840 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11841 %{
11842 predicate(UseAPX);
11843 match(Set dst (LShiftI (LoadI src) shift));
11844 effect(KILL cr);
11845
11846 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11847 ins_encode %{
11848 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11849 %}
11850 ins_pipe(ialu_reg);
11851 %}
11852
11853 // Shift Left by 8-bit immediate
11854 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11855 %{
11856 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11857 effect(KILL cr);
11858
11859 format %{ "sall $dst, $shift" %}
11860 ins_encode %{
11861 __ sall($dst$$Address, $shift$$constant);
11862 %}
11863 ins_pipe(ialu_mem_imm);
11864 %}
11865
11866 // Shift Left by variable
11867 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11868 %{
11869 predicate(!VM_Version::supports_bmi2());
11870 match(Set dst (LShiftI dst shift));
11871 effect(KILL cr);
11872
11873 format %{ "sall $dst, $shift" %}
11874 ins_encode %{
11875 __ sall($dst$$Register);
11876 %}
11877 ins_pipe(ialu_reg_reg);
11878 %}
11879
11880 // Shift Left by variable
11881 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11882 %{
11883 predicate(!VM_Version::supports_bmi2());
11884 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11885 effect(KILL cr);
11886
11887 format %{ "sall $dst, $shift" %}
11888 ins_encode %{
11889 __ sall($dst$$Address);
11890 %}
11891 ins_pipe(ialu_mem_reg);
11892 %}
11893
11894 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11895 %{
11896 predicate(VM_Version::supports_bmi2());
11897 match(Set dst (LShiftI src shift));
11898
11899 format %{ "shlxl $dst, $src, $shift" %}
11900 ins_encode %{
11901 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11902 %}
11903 ins_pipe(ialu_reg_reg);
11904 %}
11905
11906 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11907 %{
11908 predicate(VM_Version::supports_bmi2());
11909 match(Set dst (LShiftI (LoadI src) shift));
11910 ins_cost(175);
11911 format %{ "shlxl $dst, $src, $shift" %}
11912 ins_encode %{
11913 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11914 %}
11915 ins_pipe(ialu_reg_mem);
11916 %}
11917
11918 // Arithmetic Shift Right by 8-bit immediate
11919 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11920 %{
11921 predicate(!UseAPX);
11922 match(Set dst (RShiftI dst shift));
11923 effect(KILL cr);
11924
11925 format %{ "sarl $dst, $shift" %}
11926 ins_encode %{
11927 __ sarl($dst$$Register, $shift$$constant);
11928 %}
11929 ins_pipe(ialu_mem_imm);
11930 %}
11931
11932 // Arithmetic Shift Right by 8-bit immediate
11933 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11934 %{
11935 predicate(UseAPX);
11936 match(Set dst (RShiftI src shift));
11937 effect(KILL cr);
11938
11939 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11940 ins_encode %{
11941 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11942 %}
11943 ins_pipe(ialu_mem_imm);
11944 %}
11945
11946 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11947 %{
11948 predicate(UseAPX);
11949 match(Set dst (RShiftI (LoadI src) shift));
11950 effect(KILL cr);
11951
11952 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11953 ins_encode %{
11954 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11955 %}
11956 ins_pipe(ialu_mem_imm);
11957 %}
11958
11959 // Arithmetic Shift Right by 8-bit immediate
11960 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11961 %{
11962 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11963 effect(KILL cr);
11964
11965 format %{ "sarl $dst, $shift" %}
11966 ins_encode %{
11967 __ sarl($dst$$Address, $shift$$constant);
11968 %}
11969 ins_pipe(ialu_mem_imm);
11970 %}
11971
11972 // Arithmetic Shift Right by variable
11973 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11974 %{
11975 predicate(!VM_Version::supports_bmi2());
11976 match(Set dst (RShiftI dst shift));
11977 effect(KILL cr);
11978
11979 format %{ "sarl $dst, $shift" %}
11980 ins_encode %{
11981 __ sarl($dst$$Register);
11982 %}
11983 ins_pipe(ialu_reg_reg);
11984 %}
11985
11986 // Arithmetic Shift Right by variable
11987 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11988 %{
11989 predicate(!VM_Version::supports_bmi2());
11990 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11991 effect(KILL cr);
11992
11993 format %{ "sarl $dst, $shift" %}
11994 ins_encode %{
11995 __ sarl($dst$$Address);
11996 %}
11997 ins_pipe(ialu_mem_reg);
11998 %}
11999
12000 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12001 %{
12002 predicate(VM_Version::supports_bmi2());
12003 match(Set dst (RShiftI src shift));
12004
12005 format %{ "sarxl $dst, $src, $shift" %}
12006 ins_encode %{
12007 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12008 %}
12009 ins_pipe(ialu_reg_reg);
12010 %}
12011
12012 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12013 %{
12014 predicate(VM_Version::supports_bmi2());
12015 match(Set dst (RShiftI (LoadI src) shift));
12016 ins_cost(175);
12017 format %{ "sarxl $dst, $src, $shift" %}
12018 ins_encode %{
12019 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12020 %}
12021 ins_pipe(ialu_reg_mem);
12022 %}
12023
12024 // Logical Shift Right by 8-bit immediate
12025 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12026 %{
12027 predicate(!UseAPX);
12028 match(Set dst (URShiftI dst shift));
12029 effect(KILL cr);
12030
12031 format %{ "shrl $dst, $shift" %}
12032 ins_encode %{
12033 __ shrl($dst$$Register, $shift$$constant);
12034 %}
12035 ins_pipe(ialu_reg);
12036 %}
12037
12038 // Logical Shift Right by 8-bit immediate
12039 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12040 %{
12041 predicate(UseAPX);
12042 match(Set dst (URShiftI src shift));
12043 effect(KILL cr);
12044
12045 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12046 ins_encode %{
12047 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12048 %}
12049 ins_pipe(ialu_reg);
12050 %}
12051
12052 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12053 %{
12054 predicate(UseAPX);
12055 match(Set dst (URShiftI (LoadI src) shift));
12056 effect(KILL cr);
12057
12058 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12059 ins_encode %{
12060 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12061 %}
12062 ins_pipe(ialu_reg);
12063 %}
12064
12065 // Logical Shift Right by 8-bit immediate
12066 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12067 %{
12068 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12069 effect(KILL cr);
12070
12071 format %{ "shrl $dst, $shift" %}
12072 ins_encode %{
12073 __ shrl($dst$$Address, $shift$$constant);
12074 %}
12075 ins_pipe(ialu_mem_imm);
12076 %}
12077
12078 // Logical Shift Right by variable
12079 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12080 %{
12081 predicate(!VM_Version::supports_bmi2());
12082 match(Set dst (URShiftI dst shift));
12083 effect(KILL cr);
12084
12085 format %{ "shrl $dst, $shift" %}
12086 ins_encode %{
12087 __ shrl($dst$$Register);
12088 %}
12089 ins_pipe(ialu_reg_reg);
12090 %}
12091
12092 // Logical Shift Right by variable
12093 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12094 %{
12095 predicate(!VM_Version::supports_bmi2());
12096 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12097 effect(KILL cr);
12098
12099 format %{ "shrl $dst, $shift" %}
12100 ins_encode %{
12101 __ shrl($dst$$Address);
12102 %}
12103 ins_pipe(ialu_mem_reg);
12104 %}
12105
12106 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12107 %{
12108 predicate(VM_Version::supports_bmi2());
12109 match(Set dst (URShiftI src shift));
12110
12111 format %{ "shrxl $dst, $src, $shift" %}
12112 ins_encode %{
12113 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12114 %}
12115 ins_pipe(ialu_reg_reg);
12116 %}
12117
12118 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12119 %{
12120 predicate(VM_Version::supports_bmi2());
12121 match(Set dst (URShiftI (LoadI src) shift));
12122 ins_cost(175);
12123 format %{ "shrxl $dst, $src, $shift" %}
12124 ins_encode %{
12125 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12126 %}
12127 ins_pipe(ialu_reg_mem);
12128 %}
12129
12130 // Long Shift Instructions
12131 // Shift Left by one, two, three
12132 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12133 %{
12134 predicate(!UseAPX);
12135 match(Set dst (LShiftL dst shift));
12136 effect(KILL cr);
12137
12138 format %{ "salq $dst, $shift" %}
12139 ins_encode %{
12140 __ salq($dst$$Register, $shift$$constant);
12141 %}
12142 ins_pipe(ialu_reg);
12143 %}
12144
12145 // Shift Left by one, two, three
12146 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12147 %{
12148 predicate(UseAPX);
12149 match(Set dst (LShiftL src shift));
12150 effect(KILL cr);
12151
12152 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12153 ins_encode %{
12154 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12155 %}
12156 ins_pipe(ialu_reg);
12157 %}
12158
12159 // Shift Left by 8-bit immediate
12160 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12161 %{
12162 predicate(!UseAPX);
12163 match(Set dst (LShiftL dst shift));
12164 effect(KILL cr);
12165
12166 format %{ "salq $dst, $shift" %}
12167 ins_encode %{
12168 __ salq($dst$$Register, $shift$$constant);
12169 %}
12170 ins_pipe(ialu_reg);
12171 %}
12172
12173 // Shift Left by 8-bit immediate
12174 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12175 %{
12176 predicate(UseAPX);
12177 match(Set dst (LShiftL src shift));
12178 effect(KILL cr);
12179
12180 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12181 ins_encode %{
12182 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12183 %}
12184 ins_pipe(ialu_reg);
12185 %}
12186
12187 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12188 %{
12189 predicate(UseAPX);
12190 match(Set dst (LShiftL (LoadL src) shift));
12191 effect(KILL cr);
12192
12193 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12194 ins_encode %{
12195 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12196 %}
12197 ins_pipe(ialu_reg);
12198 %}
12199
12200 // Shift Left by 8-bit immediate
12201 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12202 %{
12203 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12204 effect(KILL cr);
12205
12206 format %{ "salq $dst, $shift" %}
12207 ins_encode %{
12208 __ salq($dst$$Address, $shift$$constant);
12209 %}
12210 ins_pipe(ialu_mem_imm);
12211 %}
12212
12213 // Shift Left by variable
12214 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12215 %{
12216 predicate(!VM_Version::supports_bmi2());
12217 match(Set dst (LShiftL dst shift));
12218 effect(KILL cr);
12219
12220 format %{ "salq $dst, $shift" %}
12221 ins_encode %{
12222 __ salq($dst$$Register);
12223 %}
12224 ins_pipe(ialu_reg_reg);
12225 %}
12226
12227 // Shift Left by variable
12228 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12229 %{
12230 predicate(!VM_Version::supports_bmi2());
12231 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12232 effect(KILL cr);
12233
12234 format %{ "salq $dst, $shift" %}
12235 ins_encode %{
12236 __ salq($dst$$Address);
12237 %}
12238 ins_pipe(ialu_mem_reg);
12239 %}
12240
12241 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12242 %{
12243 predicate(VM_Version::supports_bmi2());
12244 match(Set dst (LShiftL src shift));
12245
12246 format %{ "shlxq $dst, $src, $shift" %}
12247 ins_encode %{
12248 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12249 %}
12250 ins_pipe(ialu_reg_reg);
12251 %}
12252
12253 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12254 %{
12255 predicate(VM_Version::supports_bmi2());
12256 match(Set dst (LShiftL (LoadL src) shift));
12257 ins_cost(175);
12258 format %{ "shlxq $dst, $src, $shift" %}
12259 ins_encode %{
12260 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12261 %}
12262 ins_pipe(ialu_reg_mem);
12263 %}
12264
12265 // Arithmetic Shift Right by 8-bit immediate
12266 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12267 %{
12268 predicate(!UseAPX);
12269 match(Set dst (RShiftL dst shift));
12270 effect(KILL cr);
12271
12272 format %{ "sarq $dst, $shift" %}
12273 ins_encode %{
12274 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12275 %}
12276 ins_pipe(ialu_mem_imm);
12277 %}
12278
12279 // Arithmetic Shift Right by 8-bit immediate
12280 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12281 %{
12282 predicate(UseAPX);
12283 match(Set dst (RShiftL src shift));
12284 effect(KILL cr);
12285
12286 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12287 ins_encode %{
12288 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12289 %}
12290 ins_pipe(ialu_mem_imm);
12291 %}
12292
12293 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12294 %{
12295 predicate(UseAPX);
12296 match(Set dst (RShiftL (LoadL src) shift));
12297 effect(KILL cr);
12298
12299 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12300 ins_encode %{
12301 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12302 %}
12303 ins_pipe(ialu_mem_imm);
12304 %}
12305
12306 // Arithmetic Shift Right by 8-bit immediate
12307 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12308 %{
12309 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12310 effect(KILL cr);
12311
12312 format %{ "sarq $dst, $shift" %}
12313 ins_encode %{
12314 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12315 %}
12316 ins_pipe(ialu_mem_imm);
12317 %}
12318
12319 // Arithmetic Shift Right by variable
12320 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12321 %{
12322 predicate(!VM_Version::supports_bmi2());
12323 match(Set dst (RShiftL dst shift));
12324 effect(KILL cr);
12325
12326 format %{ "sarq $dst, $shift" %}
12327 ins_encode %{
12328 __ sarq($dst$$Register);
12329 %}
12330 ins_pipe(ialu_reg_reg);
12331 %}
12332
12333 // Arithmetic Shift Right by variable
12334 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12335 %{
12336 predicate(!VM_Version::supports_bmi2());
12337 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12338 effect(KILL cr);
12339
12340 format %{ "sarq $dst, $shift" %}
12341 ins_encode %{
12342 __ sarq($dst$$Address);
12343 %}
12344 ins_pipe(ialu_mem_reg);
12345 %}
12346
12347 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12348 %{
12349 predicate(VM_Version::supports_bmi2());
12350 match(Set dst (RShiftL src shift));
12351
12352 format %{ "sarxq $dst, $src, $shift" %}
12353 ins_encode %{
12354 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12355 %}
12356 ins_pipe(ialu_reg_reg);
12357 %}
12358
12359 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12360 %{
12361 predicate(VM_Version::supports_bmi2());
12362 match(Set dst (RShiftL (LoadL src) shift));
12363 ins_cost(175);
12364 format %{ "sarxq $dst, $src, $shift" %}
12365 ins_encode %{
12366 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12367 %}
12368 ins_pipe(ialu_reg_mem);
12369 %}
12370
12371 // Logical Shift Right by 8-bit immediate
12372 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12373 %{
12374 predicate(!UseAPX);
12375 match(Set dst (URShiftL dst shift));
12376 effect(KILL cr);
12377
12378 format %{ "shrq $dst, $shift" %}
12379 ins_encode %{
12380 __ shrq($dst$$Register, $shift$$constant);
12381 %}
12382 ins_pipe(ialu_reg);
12383 %}
12384
12385 // Logical Shift Right by 8-bit immediate
12386 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12387 %{
12388 predicate(UseAPX);
12389 match(Set dst (URShiftL src shift));
12390 effect(KILL cr);
12391
12392 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12393 ins_encode %{
12394 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12395 %}
12396 ins_pipe(ialu_reg);
12397 %}
12398
12399 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12400 %{
12401 predicate(UseAPX);
12402 match(Set dst (URShiftL (LoadL src) shift));
12403 effect(KILL cr);
12404
12405 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12406 ins_encode %{
12407 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12408 %}
12409 ins_pipe(ialu_reg);
12410 %}
12411
12412 // Logical Shift Right by 8-bit immediate
12413 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12414 %{
12415 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12416 effect(KILL cr);
12417
12418 format %{ "shrq $dst, $shift" %}
12419 ins_encode %{
12420 __ shrq($dst$$Address, $shift$$constant);
12421 %}
12422 ins_pipe(ialu_mem_imm);
12423 %}
12424
12425 // Logical Shift Right by variable
12426 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12427 %{
12428 predicate(!VM_Version::supports_bmi2());
12429 match(Set dst (URShiftL dst shift));
12430 effect(KILL cr);
12431
12432 format %{ "shrq $dst, $shift" %}
12433 ins_encode %{
12434 __ shrq($dst$$Register);
12435 %}
12436 ins_pipe(ialu_reg_reg);
12437 %}
12438
12439 // Logical Shift Right by variable
12440 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12441 %{
12442 predicate(!VM_Version::supports_bmi2());
12443 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12444 effect(KILL cr);
12445
12446 format %{ "shrq $dst, $shift" %}
12447 ins_encode %{
12448 __ shrq($dst$$Address);
12449 %}
12450 ins_pipe(ialu_mem_reg);
12451 %}
12452
12453 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12454 %{
12455 predicate(VM_Version::supports_bmi2());
12456 match(Set dst (URShiftL src shift));
12457
12458 format %{ "shrxq $dst, $src, $shift" %}
12459 ins_encode %{
12460 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12461 %}
12462 ins_pipe(ialu_reg_reg);
12463 %}
12464
12465 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12466 %{
12467 predicate(VM_Version::supports_bmi2());
12468 match(Set dst (URShiftL (LoadL src) shift));
12469 ins_cost(175);
12470 format %{ "shrxq $dst, $src, $shift" %}
12471 ins_encode %{
12472 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12473 %}
12474 ins_pipe(ialu_reg_mem);
12475 %}
12476
12477 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12478 // This idiom is used by the compiler for the i2b bytecode.
12479 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12480 %{
12481 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12482
12483 format %{ "movsbl $dst, $src\t# i2b" %}
12484 ins_encode %{
12485 __ movsbl($dst$$Register, $src$$Register);
12486 %}
12487 ins_pipe(ialu_reg_reg);
12488 %}
12489
12490 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12491 // This idiom is used by the compiler the i2s bytecode.
12492 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12493 %{
12494 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12495
12496 format %{ "movswl $dst, $src\t# i2s" %}
12497 ins_encode %{
12498 __ movswl($dst$$Register, $src$$Register);
12499 %}
12500 ins_pipe(ialu_reg_reg);
12501 %}
12502
12503 // ROL/ROR instructions
12504
12505 // Rotate left by constant.
12506 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12507 %{
12508 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12509 match(Set dst (RotateLeft dst shift));
12510 effect(KILL cr);
12511 format %{ "roll $dst, $shift" %}
12512 ins_encode %{
12513 __ roll($dst$$Register, $shift$$constant);
12514 %}
12515 ins_pipe(ialu_reg);
12516 %}
12517
12518 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12519 %{
12520 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12521 match(Set dst (RotateLeft src shift));
12522 format %{ "rolxl $dst, $src, $shift" %}
12523 ins_encode %{
12524 int shift = 32 - ($shift$$constant & 31);
12525 __ rorxl($dst$$Register, $src$$Register, shift);
12526 %}
12527 ins_pipe(ialu_reg_reg);
12528 %}
12529
12530 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12531 %{
12532 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12533 match(Set dst (RotateLeft (LoadI src) shift));
12534 ins_cost(175);
12535 format %{ "rolxl $dst, $src, $shift" %}
12536 ins_encode %{
12537 int shift = 32 - ($shift$$constant & 31);
12538 __ rorxl($dst$$Register, $src$$Address, shift);
12539 %}
12540 ins_pipe(ialu_reg_mem);
12541 %}
12542
12543 // Rotate Left by variable
12544 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12545 %{
12546 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12547 match(Set dst (RotateLeft dst shift));
12548 effect(KILL cr);
12549 format %{ "roll $dst, $shift" %}
12550 ins_encode %{
12551 __ roll($dst$$Register);
12552 %}
12553 ins_pipe(ialu_reg_reg);
12554 %}
12555
12556 // Rotate Left by variable
12557 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12558 %{
12559 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12560 match(Set dst (RotateLeft src shift));
12561 effect(KILL cr);
12562
12563 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12564 ins_encode %{
12565 __ eroll($dst$$Register, $src$$Register, false);
12566 %}
12567 ins_pipe(ialu_reg_reg);
12568 %}
12569
12570 // Rotate Right by constant.
12571 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12572 %{
12573 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12574 match(Set dst (RotateRight dst shift));
12575 effect(KILL cr);
12576 format %{ "rorl $dst, $shift" %}
12577 ins_encode %{
12578 __ rorl($dst$$Register, $shift$$constant);
12579 %}
12580 ins_pipe(ialu_reg);
12581 %}
12582
12583 // Rotate Right by constant.
12584 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12585 %{
12586 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12587 match(Set dst (RotateRight src shift));
12588 format %{ "rorxl $dst, $src, $shift" %}
12589 ins_encode %{
12590 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12591 %}
12592 ins_pipe(ialu_reg_reg);
12593 %}
12594
12595 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12596 %{
12597 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12598 match(Set dst (RotateRight (LoadI src) shift));
12599 ins_cost(175);
12600 format %{ "rorxl $dst, $src, $shift" %}
12601 ins_encode %{
12602 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12603 %}
12604 ins_pipe(ialu_reg_mem);
12605 %}
12606
12607 // Rotate Right by variable
12608 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12609 %{
12610 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12611 match(Set dst (RotateRight dst shift));
12612 effect(KILL cr);
12613 format %{ "rorl $dst, $shift" %}
12614 ins_encode %{
12615 __ rorl($dst$$Register);
12616 %}
12617 ins_pipe(ialu_reg_reg);
12618 %}
12619
12620 // Rotate Right by variable
12621 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12622 %{
12623 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12624 match(Set dst (RotateRight src shift));
12625 effect(KILL cr);
12626
12627 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12628 ins_encode %{
12629 __ erorl($dst$$Register, $src$$Register, false);
12630 %}
12631 ins_pipe(ialu_reg_reg);
12632 %}
12633
12634 // Rotate Left by constant.
12635 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12636 %{
12637 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12638 match(Set dst (RotateLeft dst shift));
12639 effect(KILL cr);
12640 format %{ "rolq $dst, $shift" %}
12641 ins_encode %{
12642 __ rolq($dst$$Register, $shift$$constant);
12643 %}
12644 ins_pipe(ialu_reg);
12645 %}
12646
12647 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12648 %{
12649 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12650 match(Set dst (RotateLeft src shift));
12651 format %{ "rolxq $dst, $src, $shift" %}
12652 ins_encode %{
12653 int shift = 64 - ($shift$$constant & 63);
12654 __ rorxq($dst$$Register, $src$$Register, shift);
12655 %}
12656 ins_pipe(ialu_reg_reg);
12657 %}
12658
12659 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12660 %{
12661 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12662 match(Set dst (RotateLeft (LoadL src) shift));
12663 ins_cost(175);
12664 format %{ "rolxq $dst, $src, $shift" %}
12665 ins_encode %{
12666 int shift = 64 - ($shift$$constant & 63);
12667 __ rorxq($dst$$Register, $src$$Address, shift);
12668 %}
12669 ins_pipe(ialu_reg_mem);
12670 %}
12671
12672 // Rotate Left by variable
12673 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12676 match(Set dst (RotateLeft dst shift));
12677 effect(KILL cr);
12678 format %{ "rolq $dst, $shift" %}
12679 ins_encode %{
12680 __ rolq($dst$$Register);
12681 %}
12682 ins_pipe(ialu_reg_reg);
12683 %}
12684
12685 // Rotate Left by variable
12686 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12689 match(Set dst (RotateLeft src shift));
12690 effect(KILL cr);
12691
12692 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12693 ins_encode %{
12694 __ erolq($dst$$Register, $src$$Register, false);
12695 %}
12696 ins_pipe(ialu_reg_reg);
12697 %}
12698
12699 // Rotate Right by constant.
12700 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12701 %{
12702 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12703 match(Set dst (RotateRight dst shift));
12704 effect(KILL cr);
12705 format %{ "rorq $dst, $shift" %}
12706 ins_encode %{
12707 __ rorq($dst$$Register, $shift$$constant);
12708 %}
12709 ins_pipe(ialu_reg);
12710 %}
12711
12712 // Rotate Right by constant
12713 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12714 %{
12715 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12716 match(Set dst (RotateRight src shift));
12717 format %{ "rorxq $dst, $src, $shift" %}
12718 ins_encode %{
12719 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12720 %}
12721 ins_pipe(ialu_reg_reg);
12722 %}
12723
12724 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12725 %{
12726 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12727 match(Set dst (RotateRight (LoadL src) shift));
12728 ins_cost(175);
12729 format %{ "rorxq $dst, $src, $shift" %}
12730 ins_encode %{
12731 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12732 %}
12733 ins_pipe(ialu_reg_mem);
12734 %}
12735
12736 // Rotate Right by variable
12737 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12738 %{
12739 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12740 match(Set dst (RotateRight dst shift));
12741 effect(KILL cr);
12742 format %{ "rorq $dst, $shift" %}
12743 ins_encode %{
12744 __ rorq($dst$$Register);
12745 %}
12746 ins_pipe(ialu_reg_reg);
12747 %}
12748
12749 // Rotate Right by variable
12750 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12751 %{
12752 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12753 match(Set dst (RotateRight src shift));
12754 effect(KILL cr);
12755
12756 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12757 ins_encode %{
12758 __ erorq($dst$$Register, $src$$Register, false);
12759 %}
12760 ins_pipe(ialu_reg_reg);
12761 %}
12762
12763 //----------------------------- CompressBits/ExpandBits ------------------------
12764
12765 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12766 predicate(n->bottom_type()->isa_long());
12767 match(Set dst (CompressBits src mask));
12768 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12769 ins_encode %{
12770 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12771 %}
12772 ins_pipe( pipe_slow );
12773 %}
12774
12775 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12776 predicate(n->bottom_type()->isa_long());
12777 match(Set dst (ExpandBits src mask));
12778 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12779 ins_encode %{
12780 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12781 %}
12782 ins_pipe( pipe_slow );
12783 %}
12784
12785 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12786 predicate(n->bottom_type()->isa_long());
12787 match(Set dst (CompressBits src (LoadL mask)));
12788 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12789 ins_encode %{
12790 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12791 %}
12792 ins_pipe( pipe_slow );
12793 %}
12794
12795 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12796 predicate(n->bottom_type()->isa_long());
12797 match(Set dst (ExpandBits src (LoadL mask)));
12798 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12799 ins_encode %{
12800 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12801 %}
12802 ins_pipe( pipe_slow );
12803 %}
12804
12805
12806 // Logical Instructions
12807
12808 // Integer Logical Instructions
12809
12810 // And Instructions
12811 // And Register with Register
12812 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12813 %{
12814 predicate(!UseAPX);
12815 match(Set dst (AndI dst src));
12816 effect(KILL cr);
12817 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12818
12819 format %{ "andl $dst, $src\t# int" %}
12820 ins_encode %{
12821 __ andl($dst$$Register, $src$$Register);
12822 %}
12823 ins_pipe(ialu_reg_reg);
12824 %}
12825
12826 // And Register with Register using New Data Destination (NDD)
12827 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12828 %{
12829 predicate(UseAPX);
12830 match(Set dst (AndI src1 src2));
12831 effect(KILL cr);
12832 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12833
12834 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12835 ins_encode %{
12836 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12837
12838 %}
12839 ins_pipe(ialu_reg_reg);
12840 %}
12841
12842 // And Register with Immediate 255
12843 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12844 %{
12845 match(Set dst (AndI src mask));
12846
12847 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12848 ins_encode %{
12849 __ movzbl($dst$$Register, $src$$Register);
12850 %}
12851 ins_pipe(ialu_reg);
12852 %}
12853
12854 // And Register with Immediate 255 and promote to long
12855 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12856 %{
12857 match(Set dst (ConvI2L (AndI src mask)));
12858
12859 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12860 ins_encode %{
12861 __ movzbl($dst$$Register, $src$$Register);
12862 %}
12863 ins_pipe(ialu_reg);
12864 %}
12865
12866 // And Register with Immediate 65535
12867 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12868 %{
12869 match(Set dst (AndI src mask));
12870
12871 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12872 ins_encode %{
12873 __ movzwl($dst$$Register, $src$$Register);
12874 %}
12875 ins_pipe(ialu_reg);
12876 %}
12877
12878 // And Register with Immediate 65535 and promote to long
12879 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12880 %{
12881 match(Set dst (ConvI2L (AndI src mask)));
12882
12883 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12884 ins_encode %{
12885 __ movzwl($dst$$Register, $src$$Register);
12886 %}
12887 ins_pipe(ialu_reg);
12888 %}
12889
12890 // Can skip int2long conversions after AND with small bitmask
12891 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12892 %{
12893 predicate(VM_Version::supports_bmi2());
12894 ins_cost(125);
12895 effect(TEMP tmp, KILL cr);
12896 match(Set dst (ConvI2L (AndI src mask)));
12897 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12898 ins_encode %{
12899 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12900 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12901 %}
12902 ins_pipe(ialu_reg_reg);
12903 %}
12904
12905 // And Register with Immediate
12906 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12907 %{
12908 predicate(!UseAPX);
12909 match(Set dst (AndI dst src));
12910 effect(KILL cr);
12911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12912
12913 format %{ "andl $dst, $src\t# int" %}
12914 ins_encode %{
12915 __ andl($dst$$Register, $src$$constant);
12916 %}
12917 ins_pipe(ialu_reg);
12918 %}
12919
12920 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12921 %{
12922 predicate(UseAPX);
12923 match(Set dst (AndI src1 src2));
12924 effect(KILL cr);
12925 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12926
12927 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12928 ins_encode %{
12929 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12930 %}
12931 ins_pipe(ialu_reg);
12932 %}
12933
12934 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12935 %{
12936 predicate(UseAPX);
12937 match(Set dst (AndI (LoadI src1) src2));
12938 effect(KILL cr);
12939 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12940
12941 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12942 ins_encode %{
12943 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12944 %}
12945 ins_pipe(ialu_reg);
12946 %}
12947
12948 // And Register with Memory
12949 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12950 %{
12951 predicate(!UseAPX);
12952 match(Set dst (AndI dst (LoadI src)));
12953 effect(KILL cr);
12954 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12955
12956 ins_cost(150);
12957 format %{ "andl $dst, $src\t# int" %}
12958 ins_encode %{
12959 __ andl($dst$$Register, $src$$Address);
12960 %}
12961 ins_pipe(ialu_reg_mem);
12962 %}
12963
12964 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12965 %{
12966 predicate(UseAPX);
12967 match(Set dst (AndI src1 (LoadI src2)));
12968 effect(KILL cr);
12969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12970
12971 ins_cost(150);
12972 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12973 ins_encode %{
12974 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12975 %}
12976 ins_pipe(ialu_reg_mem);
12977 %}
12978
12979 // And Memory with Register
12980 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12981 %{
12982 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12983 effect(KILL cr);
12984 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12985
12986 ins_cost(150);
12987 format %{ "andb $dst, $src\t# byte" %}
12988 ins_encode %{
12989 __ andb($dst$$Address, $src$$Register);
12990 %}
12991 ins_pipe(ialu_mem_reg);
12992 %}
12993
12994 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12995 %{
12996 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12997 effect(KILL cr);
12998 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12999
13000 ins_cost(150);
13001 format %{ "andl $dst, $src\t# int" %}
13002 ins_encode %{
13003 __ andl($dst$$Address, $src$$Register);
13004 %}
13005 ins_pipe(ialu_mem_reg);
13006 %}
13007
13008 // And Memory with Immediate
13009 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13010 %{
13011 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13012 effect(KILL cr);
13013 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13014
13015 ins_cost(125);
13016 format %{ "andl $dst, $src\t# int" %}
13017 ins_encode %{
13018 __ andl($dst$$Address, $src$$constant);
13019 %}
13020 ins_pipe(ialu_mem_imm);
13021 %}
13022
13023 // BMI1 instructions
13024 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13025 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13026 predicate(UseBMI1Instructions);
13027 effect(KILL cr);
13028 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13029
13030 ins_cost(125);
13031 format %{ "andnl $dst, $src1, $src2" %}
13032
13033 ins_encode %{
13034 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13035 %}
13036 ins_pipe(ialu_reg_mem);
13037 %}
13038
13039 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13040 match(Set dst (AndI (XorI src1 minus_1) src2));
13041 predicate(UseBMI1Instructions);
13042 effect(KILL cr);
13043 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13044
13045 format %{ "andnl $dst, $src1, $src2" %}
13046
13047 ins_encode %{
13048 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13049 %}
13050 ins_pipe(ialu_reg);
13051 %}
13052
13053 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13054 match(Set dst (AndI (SubI imm_zero src) src));
13055 predicate(UseBMI1Instructions);
13056 effect(KILL cr);
13057 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13058
13059 format %{ "blsil $dst, $src" %}
13060
13061 ins_encode %{
13062 __ blsil($dst$$Register, $src$$Register);
13063 %}
13064 ins_pipe(ialu_reg);
13065 %}
13066
13067 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13068 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13069 predicate(UseBMI1Instructions);
13070 effect(KILL cr);
13071 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13072
13073 ins_cost(125);
13074 format %{ "blsil $dst, $src" %}
13075
13076 ins_encode %{
13077 __ blsil($dst$$Register, $src$$Address);
13078 %}
13079 ins_pipe(ialu_reg_mem);
13080 %}
13081
13082 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13083 %{
13084 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13085 predicate(UseBMI1Instructions);
13086 effect(KILL cr);
13087 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13088
13089 ins_cost(125);
13090 format %{ "blsmskl $dst, $src" %}
13091
13092 ins_encode %{
13093 __ blsmskl($dst$$Register, $src$$Address);
13094 %}
13095 ins_pipe(ialu_reg_mem);
13096 %}
13097
13098 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13099 %{
13100 match(Set dst (XorI (AddI src minus_1) src));
13101 predicate(UseBMI1Instructions);
13102 effect(KILL cr);
13103 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13104
13105 format %{ "blsmskl $dst, $src" %}
13106
13107 ins_encode %{
13108 __ blsmskl($dst$$Register, $src$$Register);
13109 %}
13110
13111 ins_pipe(ialu_reg);
13112 %}
13113
13114 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13115 %{
13116 match(Set dst (AndI (AddI src minus_1) src) );
13117 predicate(UseBMI1Instructions);
13118 effect(KILL cr);
13119 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13120
13121 format %{ "blsrl $dst, $src" %}
13122
13123 ins_encode %{
13124 __ blsrl($dst$$Register, $src$$Register);
13125 %}
13126
13127 ins_pipe(ialu_reg_mem);
13128 %}
13129
13130 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13131 %{
13132 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13133 predicate(UseBMI1Instructions);
13134 effect(KILL cr);
13135 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13136
13137 ins_cost(125);
13138 format %{ "blsrl $dst, $src" %}
13139
13140 ins_encode %{
13141 __ blsrl($dst$$Register, $src$$Address);
13142 %}
13143
13144 ins_pipe(ialu_reg);
13145 %}
13146
13147 // Or Instructions
13148 // Or Register with Register
13149 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13150 %{
13151 predicate(!UseAPX);
13152 match(Set dst (OrI dst src));
13153 effect(KILL cr);
13154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155
13156 format %{ "orl $dst, $src\t# int" %}
13157 ins_encode %{
13158 __ orl($dst$$Register, $src$$Register);
13159 %}
13160 ins_pipe(ialu_reg_reg);
13161 %}
13162
13163 // Or Register with Register using New Data Destination (NDD)
13164 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13165 %{
13166 predicate(UseAPX);
13167 match(Set dst (OrI src1 src2));
13168 effect(KILL cr);
13169 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13170
13171 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13172 ins_encode %{
13173 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13174 %}
13175 ins_pipe(ialu_reg_reg);
13176 %}
13177
13178 // Or Register with Immediate
13179 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13180 %{
13181 predicate(!UseAPX);
13182 match(Set dst (OrI dst src));
13183 effect(KILL cr);
13184 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13185
13186 format %{ "orl $dst, $src\t# int" %}
13187 ins_encode %{
13188 __ orl($dst$$Register, $src$$constant);
13189 %}
13190 ins_pipe(ialu_reg);
13191 %}
13192
13193 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13194 %{
13195 predicate(UseAPX);
13196 match(Set dst (OrI src1 src2));
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13199
13200 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13201 ins_encode %{
13202 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13203 %}
13204 ins_pipe(ialu_reg);
13205 %}
13206
13207 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13208 %{
13209 predicate(UseAPX);
13210 match(Set dst (OrI src1 src2));
13211 effect(KILL cr);
13212 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13213
13214 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13215 ins_encode %{
13216 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13217 %}
13218 ins_pipe(ialu_reg);
13219 %}
13220
13221 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13222 %{
13223 predicate(UseAPX);
13224 match(Set dst (OrI (LoadI src1) src2));
13225 effect(KILL cr);
13226 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13227
13228 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13229 ins_encode %{
13230 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13231 %}
13232 ins_pipe(ialu_reg);
13233 %}
13234
13235 // Or Register with Memory
13236 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13237 %{
13238 predicate(!UseAPX);
13239 match(Set dst (OrI dst (LoadI src)));
13240 effect(KILL cr);
13241 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13242
13243 ins_cost(150);
13244 format %{ "orl $dst, $src\t# int" %}
13245 ins_encode %{
13246 __ orl($dst$$Register, $src$$Address);
13247 %}
13248 ins_pipe(ialu_reg_mem);
13249 %}
13250
13251 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13252 %{
13253 predicate(UseAPX);
13254 match(Set dst (OrI src1 (LoadI src2)));
13255 effect(KILL cr);
13256 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13257
13258 ins_cost(150);
13259 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13260 ins_encode %{
13261 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13262 %}
13263 ins_pipe(ialu_reg_mem);
13264 %}
13265
13266 // Or Memory with Register
13267 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13268 %{
13269 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13270 effect(KILL cr);
13271 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13272
13273 ins_cost(150);
13274 format %{ "orb $dst, $src\t# byte" %}
13275 ins_encode %{
13276 __ orb($dst$$Address, $src$$Register);
13277 %}
13278 ins_pipe(ialu_mem_reg);
13279 %}
13280
13281 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13282 %{
13283 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13284 effect(KILL cr);
13285 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13286
13287 ins_cost(150);
13288 format %{ "orl $dst, $src\t# int" %}
13289 ins_encode %{
13290 __ orl($dst$$Address, $src$$Register);
13291 %}
13292 ins_pipe(ialu_mem_reg);
13293 %}
13294
13295 // Or Memory with Immediate
13296 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13297 %{
13298 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13299 effect(KILL cr);
13300 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13301
13302 ins_cost(125);
13303 format %{ "orl $dst, $src\t# int" %}
13304 ins_encode %{
13305 __ orl($dst$$Address, $src$$constant);
13306 %}
13307 ins_pipe(ialu_mem_imm);
13308 %}
13309
13310 // Xor Instructions
13311 // Xor Register with Register
13312 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13313 %{
13314 predicate(!UseAPX);
13315 match(Set dst (XorI dst src));
13316 effect(KILL cr);
13317 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318
13319 format %{ "xorl $dst, $src\t# int" %}
13320 ins_encode %{
13321 __ xorl($dst$$Register, $src$$Register);
13322 %}
13323 ins_pipe(ialu_reg_reg);
13324 %}
13325
13326 // Xor Register with Register using New Data Destination (NDD)
13327 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13328 %{
13329 predicate(UseAPX);
13330 match(Set dst (XorI src1 src2));
13331 effect(KILL cr);
13332 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13333
13334 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13335 ins_encode %{
13336 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13337 %}
13338 ins_pipe(ialu_reg_reg);
13339 %}
13340
13341 // Xor Register with Immediate -1
13342 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13343 %{
13344 predicate(!UseAPX);
13345 match(Set dst (XorI dst imm));
13346
13347 format %{ "notl $dst" %}
13348 ins_encode %{
13349 __ notl($dst$$Register);
13350 %}
13351 ins_pipe(ialu_reg);
13352 %}
13353
13354 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13355 %{
13356 match(Set dst (XorI src imm));
13357 predicate(UseAPX);
13358
13359 format %{ "enotl $dst, $src" %}
13360 ins_encode %{
13361 __ enotl($dst$$Register, $src$$Register);
13362 %}
13363 ins_pipe(ialu_reg);
13364 %}
13365
13366 // Xor Register with Immediate
13367 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13368 %{
13369 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13370 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13371 match(Set dst (XorI dst src));
13372 effect(KILL cr);
13373 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13374
13375 format %{ "xorl $dst, $src\t# int" %}
13376 ins_encode %{
13377 __ xorl($dst$$Register, $src$$constant);
13378 %}
13379 ins_pipe(ialu_reg);
13380 %}
13381
13382 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13383 %{
13384 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13385 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13386 match(Set dst (XorI src1 src2));
13387 effect(KILL cr);
13388 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13389
13390 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13391 ins_encode %{
13392 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13393 %}
13394 ins_pipe(ialu_reg);
13395 %}
13396
13397 // Xor Memory with Immediate
13398 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13399 %{
13400 predicate(UseAPX);
13401 match(Set dst (XorI (LoadI src1) src2));
13402 effect(KILL cr);
13403 ins_cost(150);
13404 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405
13406 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13407 ins_encode %{
13408 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13409 %}
13410 ins_pipe(ialu_reg);
13411 %}
13412
13413 // Xor Register with Memory
13414 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13415 %{
13416 predicate(!UseAPX);
13417 match(Set dst (XorI dst (LoadI src)));
13418 effect(KILL cr);
13419 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13420
13421 ins_cost(150);
13422 format %{ "xorl $dst, $src\t# int" %}
13423 ins_encode %{
13424 __ xorl($dst$$Register, $src$$Address);
13425 %}
13426 ins_pipe(ialu_reg_mem);
13427 %}
13428
13429 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13430 %{
13431 predicate(UseAPX);
13432 match(Set dst (XorI src1 (LoadI src2)));
13433 effect(KILL cr);
13434 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435
13436 ins_cost(150);
13437 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13438 ins_encode %{
13439 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13440 %}
13441 ins_pipe(ialu_reg_mem);
13442 %}
13443
13444 // Xor Memory with Register
13445 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13446 %{
13447 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13448 effect(KILL cr);
13449 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13450
13451 ins_cost(150);
13452 format %{ "xorb $dst, $src\t# byte" %}
13453 ins_encode %{
13454 __ xorb($dst$$Address, $src$$Register);
13455 %}
13456 ins_pipe(ialu_mem_reg);
13457 %}
13458
13459 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13460 %{
13461 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13462 effect(KILL cr);
13463 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13464
13465 ins_cost(150);
13466 format %{ "xorl $dst, $src\t# int" %}
13467 ins_encode %{
13468 __ xorl($dst$$Address, $src$$Register);
13469 %}
13470 ins_pipe(ialu_mem_reg);
13471 %}
13472
13473 // Xor Memory with Immediate
13474 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13475 %{
13476 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13477 effect(KILL cr);
13478 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13479
13480 ins_cost(125);
13481 format %{ "xorl $dst, $src\t# int" %}
13482 ins_encode %{
13483 __ xorl($dst$$Address, $src$$constant);
13484 %}
13485 ins_pipe(ialu_mem_imm);
13486 %}
13487
13488
13489 // Long Logical Instructions
13490
13491 // And Instructions
13492 // And Register with Register
13493 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13494 %{
13495 predicate(!UseAPX);
13496 match(Set dst (AndL dst src));
13497 effect(KILL cr);
13498 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13499
13500 format %{ "andq $dst, $src\t# long" %}
13501 ins_encode %{
13502 __ andq($dst$$Register, $src$$Register);
13503 %}
13504 ins_pipe(ialu_reg_reg);
13505 %}
13506
13507 // And Register with Register using New Data Destination (NDD)
13508 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13509 %{
13510 predicate(UseAPX);
13511 match(Set dst (AndL src1 src2));
13512 effect(KILL cr);
13513 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13514
13515 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13516 ins_encode %{
13517 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13518
13519 %}
13520 ins_pipe(ialu_reg_reg);
13521 %}
13522
13523 // And Register with Immediate 255
13524 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13525 %{
13526 match(Set dst (AndL src mask));
13527
13528 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13529 ins_encode %{
13530 // movzbl zeroes out the upper 32-bit and does not need REX.W
13531 __ movzbl($dst$$Register, $src$$Register);
13532 %}
13533 ins_pipe(ialu_reg);
13534 %}
13535
13536 // And Register with Immediate 65535
13537 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13538 %{
13539 match(Set dst (AndL src mask));
13540
13541 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13542 ins_encode %{
13543 // movzwl zeroes out the upper 32-bit and does not need REX.W
13544 __ movzwl($dst$$Register, $src$$Register);
13545 %}
13546 ins_pipe(ialu_reg);
13547 %}
13548
13549 // And Register with Immediate
13550 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13551 %{
13552 predicate(!UseAPX);
13553 match(Set dst (AndL dst src));
13554 effect(KILL cr);
13555 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13556
13557 format %{ "andq $dst, $src\t# long" %}
13558 ins_encode %{
13559 __ andq($dst$$Register, $src$$constant);
13560 %}
13561 ins_pipe(ialu_reg);
13562 %}
13563
13564 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13565 %{
13566 predicate(UseAPX);
13567 match(Set dst (AndL src1 src2));
13568 effect(KILL cr);
13569 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13570
13571 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13572 ins_encode %{
13573 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13574 %}
13575 ins_pipe(ialu_reg);
13576 %}
13577
13578 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13579 %{
13580 predicate(UseAPX);
13581 match(Set dst (AndL (LoadL src1) src2));
13582 effect(KILL cr);
13583 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13584
13585 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13586 ins_encode %{
13587 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13588 %}
13589 ins_pipe(ialu_reg);
13590 %}
13591
13592 // And Register with Memory
13593 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13594 %{
13595 predicate(!UseAPX);
13596 match(Set dst (AndL dst (LoadL src)));
13597 effect(KILL cr);
13598 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599
13600 ins_cost(150);
13601 format %{ "andq $dst, $src\t# long" %}
13602 ins_encode %{
13603 __ andq($dst$$Register, $src$$Address);
13604 %}
13605 ins_pipe(ialu_reg_mem);
13606 %}
13607
13608 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13609 %{
13610 predicate(UseAPX);
13611 match(Set dst (AndL src1 (LoadL src2)));
13612 effect(KILL cr);
13613 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13614
13615 ins_cost(150);
13616 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13617 ins_encode %{
13618 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13619 %}
13620 ins_pipe(ialu_reg_mem);
13621 %}
13622
13623 // And Memory with Register
13624 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13625 %{
13626 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13627 effect(KILL cr);
13628 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13629
13630 ins_cost(150);
13631 format %{ "andq $dst, $src\t# long" %}
13632 ins_encode %{
13633 __ andq($dst$$Address, $src$$Register);
13634 %}
13635 ins_pipe(ialu_mem_reg);
13636 %}
13637
13638 // And Memory with Immediate
13639 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13640 %{
13641 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13642 effect(KILL cr);
13643 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13644
13645 ins_cost(125);
13646 format %{ "andq $dst, $src\t# long" %}
13647 ins_encode %{
13648 __ andq($dst$$Address, $src$$constant);
13649 %}
13650 ins_pipe(ialu_mem_imm);
13651 %}
13652
13653 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13654 %{
13655 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13656 // because AND/OR works well enough for 8/32-bit values.
13657 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13658
13659 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13660 effect(KILL cr);
13661
13662 ins_cost(125);
13663 format %{ "btrq $dst, log2(not($con))\t# long" %}
13664 ins_encode %{
13665 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13666 %}
13667 ins_pipe(ialu_mem_imm);
13668 %}
13669
13670 // BMI1 instructions
13671 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13672 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13673 predicate(UseBMI1Instructions);
13674 effect(KILL cr);
13675 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13676
13677 ins_cost(125);
13678 format %{ "andnq $dst, $src1, $src2" %}
13679
13680 ins_encode %{
13681 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13682 %}
13683 ins_pipe(ialu_reg_mem);
13684 %}
13685
13686 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13687 match(Set dst (AndL (XorL src1 minus_1) src2));
13688 predicate(UseBMI1Instructions);
13689 effect(KILL cr);
13690 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691
13692 format %{ "andnq $dst, $src1, $src2" %}
13693
13694 ins_encode %{
13695 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13696 %}
13697 ins_pipe(ialu_reg_mem);
13698 %}
13699
13700 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13701 match(Set dst (AndL (SubL imm_zero src) src));
13702 predicate(UseBMI1Instructions);
13703 effect(KILL cr);
13704 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13705
13706 format %{ "blsiq $dst, $src" %}
13707
13708 ins_encode %{
13709 __ blsiq($dst$$Register, $src$$Register);
13710 %}
13711 ins_pipe(ialu_reg);
13712 %}
13713
13714 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13715 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13716 predicate(UseBMI1Instructions);
13717 effect(KILL cr);
13718 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13719
13720 ins_cost(125);
13721 format %{ "blsiq $dst, $src" %}
13722
13723 ins_encode %{
13724 __ blsiq($dst$$Register, $src$$Address);
13725 %}
13726 ins_pipe(ialu_reg_mem);
13727 %}
13728
13729 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13730 %{
13731 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13732 predicate(UseBMI1Instructions);
13733 effect(KILL cr);
13734 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13735
13736 ins_cost(125);
13737 format %{ "blsmskq $dst, $src" %}
13738
13739 ins_encode %{
13740 __ blsmskq($dst$$Register, $src$$Address);
13741 %}
13742 ins_pipe(ialu_reg_mem);
13743 %}
13744
13745 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13746 %{
13747 match(Set dst (XorL (AddL src minus_1) src));
13748 predicate(UseBMI1Instructions);
13749 effect(KILL cr);
13750 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13751
13752 format %{ "blsmskq $dst, $src" %}
13753
13754 ins_encode %{
13755 __ blsmskq($dst$$Register, $src$$Register);
13756 %}
13757
13758 ins_pipe(ialu_reg);
13759 %}
13760
13761 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13762 %{
13763 match(Set dst (AndL (AddL src minus_1) src) );
13764 predicate(UseBMI1Instructions);
13765 effect(KILL cr);
13766 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13767
13768 format %{ "blsrq $dst, $src" %}
13769
13770 ins_encode %{
13771 __ blsrq($dst$$Register, $src$$Register);
13772 %}
13773
13774 ins_pipe(ialu_reg);
13775 %}
13776
13777 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13778 %{
13779 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13780 predicate(UseBMI1Instructions);
13781 effect(KILL cr);
13782 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13783
13784 ins_cost(125);
13785 format %{ "blsrq $dst, $src" %}
13786
13787 ins_encode %{
13788 __ blsrq($dst$$Register, $src$$Address);
13789 %}
13790
13791 ins_pipe(ialu_reg);
13792 %}
13793
13794 // Or Instructions
13795 // Or Register with Register
13796 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13797 %{
13798 predicate(!UseAPX);
13799 match(Set dst (OrL dst src));
13800 effect(KILL cr);
13801 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13802
13803 format %{ "orq $dst, $src\t# long" %}
13804 ins_encode %{
13805 __ orq($dst$$Register, $src$$Register);
13806 %}
13807 ins_pipe(ialu_reg_reg);
13808 %}
13809
13810 // Or Register with Register using New Data Destination (NDD)
13811 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13812 %{
13813 predicate(UseAPX);
13814 match(Set dst (OrL src1 src2));
13815 effect(KILL cr);
13816 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13817
13818 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13819 ins_encode %{
13820 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13821
13822 %}
13823 ins_pipe(ialu_reg_reg);
13824 %}
13825
13826 // Use any_RegP to match R15 (TLS register) without spilling.
13827 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13828 match(Set dst (OrL dst (CastP2X src)));
13829 effect(KILL cr);
13830 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13831
13832 format %{ "orq $dst, $src\t# long" %}
13833 ins_encode %{
13834 __ orq($dst$$Register, $src$$Register);
13835 %}
13836 ins_pipe(ialu_reg_reg);
13837 %}
13838
13839 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13840 match(Set dst (OrL src1 (CastP2X src2)));
13841 effect(KILL cr);
13842 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13843
13844 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13845 ins_encode %{
13846 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13847 %}
13848 ins_pipe(ialu_reg_reg);
13849 %}
13850
13851 // Or Register with Immediate
13852 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13853 %{
13854 predicate(!UseAPX);
13855 match(Set dst (OrL dst src));
13856 effect(KILL cr);
13857 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13858
13859 format %{ "orq $dst, $src\t# long" %}
13860 ins_encode %{
13861 __ orq($dst$$Register, $src$$constant);
13862 %}
13863 ins_pipe(ialu_reg);
13864 %}
13865
13866 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13867 %{
13868 predicate(UseAPX);
13869 match(Set dst (OrL src1 src2));
13870 effect(KILL cr);
13871 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13872
13873 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13874 ins_encode %{
13875 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13876 %}
13877 ins_pipe(ialu_reg);
13878 %}
13879
13880 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13881 %{
13882 predicate(UseAPX);
13883 match(Set dst (OrL src1 src2));
13884 effect(KILL cr);
13885 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13886
13887 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13888 ins_encode %{
13889 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13890 %}
13891 ins_pipe(ialu_reg);
13892 %}
13893
13894 // Or Memory with Immediate
13895 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13896 %{
13897 predicate(UseAPX);
13898 match(Set dst (OrL (LoadL src1) src2));
13899 effect(KILL cr);
13900 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901
13902 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13903 ins_encode %{
13904 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13905 %}
13906 ins_pipe(ialu_reg);
13907 %}
13908
13909 // Or Register with Memory
13910 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13911 %{
13912 predicate(!UseAPX);
13913 match(Set dst (OrL dst (LoadL src)));
13914 effect(KILL cr);
13915 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13916
13917 ins_cost(150);
13918 format %{ "orq $dst, $src\t# long" %}
13919 ins_encode %{
13920 __ orq($dst$$Register, $src$$Address);
13921 %}
13922 ins_pipe(ialu_reg_mem);
13923 %}
13924
13925 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13926 %{
13927 predicate(UseAPX);
13928 match(Set dst (OrL src1 (LoadL src2)));
13929 effect(KILL cr);
13930 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13931
13932 ins_cost(150);
13933 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13934 ins_encode %{
13935 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13936 %}
13937 ins_pipe(ialu_reg_mem);
13938 %}
13939
13940 // Or Memory with Register
13941 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13942 %{
13943 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13944 effect(KILL cr);
13945 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13946
13947 ins_cost(150);
13948 format %{ "orq $dst, $src\t# long" %}
13949 ins_encode %{
13950 __ orq($dst$$Address, $src$$Register);
13951 %}
13952 ins_pipe(ialu_mem_reg);
13953 %}
13954
13955 // Or Memory with Immediate
13956 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13957 %{
13958 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13959 effect(KILL cr);
13960 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13961
13962 ins_cost(125);
13963 format %{ "orq $dst, $src\t# long" %}
13964 ins_encode %{
13965 __ orq($dst$$Address, $src$$constant);
13966 %}
13967 ins_pipe(ialu_mem_imm);
13968 %}
13969
13970 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13971 %{
13972 // con should be a pure 64-bit power of 2 immediate
13973 // because AND/OR works well enough for 8/32-bit values.
13974 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13975
13976 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13977 effect(KILL cr);
13978
13979 ins_cost(125);
13980 format %{ "btsq $dst, log2($con)\t# long" %}
13981 ins_encode %{
13982 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13983 %}
13984 ins_pipe(ialu_mem_imm);
13985 %}
13986
13987 // Xor Instructions
13988 // Xor Register with Register
13989 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13990 %{
13991 predicate(!UseAPX);
13992 match(Set dst (XorL dst src));
13993 effect(KILL cr);
13994 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13995
13996 format %{ "xorq $dst, $src\t# long" %}
13997 ins_encode %{
13998 __ xorq($dst$$Register, $src$$Register);
13999 %}
14000 ins_pipe(ialu_reg_reg);
14001 %}
14002
14003 // Xor Register with Register using New Data Destination (NDD)
14004 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14005 %{
14006 predicate(UseAPX);
14007 match(Set dst (XorL src1 src2));
14008 effect(KILL cr);
14009 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14010
14011 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14012 ins_encode %{
14013 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14014 %}
14015 ins_pipe(ialu_reg_reg);
14016 %}
14017
14018 // Xor Register with Immediate -1
14019 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14020 %{
14021 predicate(!UseAPX);
14022 match(Set dst (XorL dst imm));
14023
14024 format %{ "notq $dst" %}
14025 ins_encode %{
14026 __ notq($dst$$Register);
14027 %}
14028 ins_pipe(ialu_reg);
14029 %}
14030
14031 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14032 %{
14033 predicate(UseAPX);
14034 match(Set dst (XorL src imm));
14035
14036 format %{ "enotq $dst, $src" %}
14037 ins_encode %{
14038 __ enotq($dst$$Register, $src$$Register);
14039 %}
14040 ins_pipe(ialu_reg);
14041 %}
14042
14043 // Xor Register with Immediate
14044 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14045 %{
14046 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14047 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14048 match(Set dst (XorL dst src));
14049 effect(KILL cr);
14050 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14051
14052 format %{ "xorq $dst, $src\t# long" %}
14053 ins_encode %{
14054 __ xorq($dst$$Register, $src$$constant);
14055 %}
14056 ins_pipe(ialu_reg);
14057 %}
14058
14059 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14060 %{
14061 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14062 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14063 match(Set dst (XorL src1 src2));
14064 effect(KILL cr);
14065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066
14067 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14068 ins_encode %{
14069 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14070 %}
14071 ins_pipe(ialu_reg);
14072 %}
14073
14074 // Xor Memory with Immediate
14075 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14076 %{
14077 predicate(UseAPX);
14078 match(Set dst (XorL (LoadL src1) src2));
14079 effect(KILL cr);
14080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081 ins_cost(150);
14082
14083 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14084 ins_encode %{
14085 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14086 %}
14087 ins_pipe(ialu_reg);
14088 %}
14089
14090 // Xor Register with Memory
14091 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14092 %{
14093 predicate(!UseAPX);
14094 match(Set dst (XorL dst (LoadL src)));
14095 effect(KILL cr);
14096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14097
14098 ins_cost(150);
14099 format %{ "xorq $dst, $src\t# long" %}
14100 ins_encode %{
14101 __ xorq($dst$$Register, $src$$Address);
14102 %}
14103 ins_pipe(ialu_reg_mem);
14104 %}
14105
14106 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14107 %{
14108 predicate(UseAPX);
14109 match(Set dst (XorL src1 (LoadL src2)));
14110 effect(KILL cr);
14111 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14112
14113 ins_cost(150);
14114 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14115 ins_encode %{
14116 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14117 %}
14118 ins_pipe(ialu_reg_mem);
14119 %}
14120
14121 // Xor Memory with Register
14122 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14123 %{
14124 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14125 effect(KILL cr);
14126 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14127
14128 ins_cost(150);
14129 format %{ "xorq $dst, $src\t# long" %}
14130 ins_encode %{
14131 __ xorq($dst$$Address, $src$$Register);
14132 %}
14133 ins_pipe(ialu_mem_reg);
14134 %}
14135
14136 // Xor Memory with Immediate
14137 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14138 %{
14139 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14140 effect(KILL cr);
14141 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14142
14143 ins_cost(125);
14144 format %{ "xorq $dst, $src\t# long" %}
14145 ins_encode %{
14146 __ xorq($dst$$Address, $src$$constant);
14147 %}
14148 ins_pipe(ialu_mem_imm);
14149 %}
14150
14151 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14152 %{
14153 match(Set dst (CmpLTMask p q));
14154 effect(KILL cr);
14155
14156 ins_cost(400);
14157 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14158 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14159 "negl $dst" %}
14160 ins_encode %{
14161 __ cmpl($p$$Register, $q$$Register);
14162 __ setcc(Assembler::less, $dst$$Register);
14163 __ negl($dst$$Register);
14164 %}
14165 ins_pipe(pipe_slow);
14166 %}
14167
14168 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14169 %{
14170 match(Set dst (CmpLTMask dst zero));
14171 effect(KILL cr);
14172
14173 ins_cost(100);
14174 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14175 ins_encode %{
14176 __ sarl($dst$$Register, 31);
14177 %}
14178 ins_pipe(ialu_reg);
14179 %}
14180
14181 /* Better to save a register than avoid a branch */
14182 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14183 %{
14184 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14185 effect(KILL cr);
14186 ins_cost(300);
14187 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14188 "jge done\n\t"
14189 "addl $p,$y\n"
14190 "done: " %}
14191 ins_encode %{
14192 Register Rp = $p$$Register;
14193 Register Rq = $q$$Register;
14194 Register Ry = $y$$Register;
14195 Label done;
14196 __ subl(Rp, Rq);
14197 __ jccb(Assembler::greaterEqual, done);
14198 __ addl(Rp, Ry);
14199 __ bind(done);
14200 %}
14201 ins_pipe(pipe_cmplt);
14202 %}
14203
14204 /* Better to save a register than avoid a branch */
14205 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14206 %{
14207 match(Set y (AndI (CmpLTMask p q) y));
14208 effect(KILL cr);
14209
14210 ins_cost(300);
14211
14212 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14213 "jlt done\n\t"
14214 "xorl $y, $y\n"
14215 "done: " %}
14216 ins_encode %{
14217 Register Rp = $p$$Register;
14218 Register Rq = $q$$Register;
14219 Register Ry = $y$$Register;
14220 Label done;
14221 __ cmpl(Rp, Rq);
14222 __ jccb(Assembler::less, done);
14223 __ xorl(Ry, Ry);
14224 __ bind(done);
14225 %}
14226 ins_pipe(pipe_cmplt);
14227 %}
14228
14229
14230 //---------- FP Instructions------------------------------------------------
14231
14232 // Really expensive, avoid
14233 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14234 %{
14235 match(Set cr (CmpF src1 src2));
14236
14237 ins_cost(500);
14238 format %{ "ucomiss $src1, $src2\n\t"
14239 "jnp,s exit\n\t"
14240 "pushfq\t# saw NaN, set CF\n\t"
14241 "andq [rsp], #0xffffff2b\n\t"
14242 "popfq\n"
14243 "exit:" %}
14244 ins_encode %{
14245 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14246 emit_cmpfp_fixup(masm);
14247 %}
14248 ins_pipe(pipe_slow);
14249 %}
14250
14251 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14252 match(Set cr (CmpF src1 src2));
14253
14254 ins_cost(100);
14255 format %{ "ucomiss $src1, $src2" %}
14256 ins_encode %{
14257 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14258 %}
14259 ins_pipe(pipe_slow);
14260 %}
14261
14262 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14263 match(Set cr (CmpF src1 (LoadF src2)));
14264
14265 ins_cost(100);
14266 format %{ "ucomiss $src1, $src2" %}
14267 ins_encode %{
14268 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14269 %}
14270 ins_pipe(pipe_slow);
14271 %}
14272
14273 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14274 match(Set cr (CmpF src con));
14275 ins_cost(100);
14276 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14277 ins_encode %{
14278 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14279 %}
14280 ins_pipe(pipe_slow);
14281 %}
14282
14283 // Really expensive, avoid
14284 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14285 %{
14286 match(Set cr (CmpD src1 src2));
14287
14288 ins_cost(500);
14289 format %{ "ucomisd $src1, $src2\n\t"
14290 "jnp,s exit\n\t"
14291 "pushfq\t# saw NaN, set CF\n\t"
14292 "andq [rsp], #0xffffff2b\n\t"
14293 "popfq\n"
14294 "exit:" %}
14295 ins_encode %{
14296 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14297 emit_cmpfp_fixup(masm);
14298 %}
14299 ins_pipe(pipe_slow);
14300 %}
14301
14302 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14303 match(Set cr (CmpD src1 src2));
14304
14305 ins_cost(100);
14306 format %{ "ucomisd $src1, $src2 test" %}
14307 ins_encode %{
14308 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14309 %}
14310 ins_pipe(pipe_slow);
14311 %}
14312
14313 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14314 match(Set cr (CmpD src1 (LoadD src2)));
14315
14316 ins_cost(100);
14317 format %{ "ucomisd $src1, $src2" %}
14318 ins_encode %{
14319 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14320 %}
14321 ins_pipe(pipe_slow);
14322 %}
14323
14324 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14325 match(Set cr (CmpD src con));
14326 ins_cost(100);
14327 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14328 ins_encode %{
14329 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14330 %}
14331 ins_pipe(pipe_slow);
14332 %}
14333
14334 // Compare into -1,0,1
14335 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14336 %{
14337 match(Set dst (CmpF3 src1 src2));
14338 effect(KILL cr);
14339
14340 ins_cost(275);
14341 format %{ "ucomiss $src1, $src2\n\t"
14342 "movl $dst, #-1\n\t"
14343 "jp,s done\n\t"
14344 "jb,s done\n\t"
14345 "setne $dst\n\t"
14346 "movzbl $dst, $dst\n"
14347 "done:" %}
14348 ins_encode %{
14349 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14350 emit_cmpfp3(masm, $dst$$Register);
14351 %}
14352 ins_pipe(pipe_slow);
14353 %}
14354
14355 // Compare into -1,0,1
14356 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14357 %{
14358 match(Set dst (CmpF3 src1 (LoadF src2)));
14359 effect(KILL cr);
14360
14361 ins_cost(275);
14362 format %{ "ucomiss $src1, $src2\n\t"
14363 "movl $dst, #-1\n\t"
14364 "jp,s done\n\t"
14365 "jb,s done\n\t"
14366 "setne $dst\n\t"
14367 "movzbl $dst, $dst\n"
14368 "done:" %}
14369 ins_encode %{
14370 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14371 emit_cmpfp3(masm, $dst$$Register);
14372 %}
14373 ins_pipe(pipe_slow);
14374 %}
14375
14376 // Compare into -1,0,1
14377 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14378 match(Set dst (CmpF3 src con));
14379 effect(KILL cr);
14380
14381 ins_cost(275);
14382 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14383 "movl $dst, #-1\n\t"
14384 "jp,s done\n\t"
14385 "jb,s done\n\t"
14386 "setne $dst\n\t"
14387 "movzbl $dst, $dst\n"
14388 "done:" %}
14389 ins_encode %{
14390 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14391 emit_cmpfp3(masm, $dst$$Register);
14392 %}
14393 ins_pipe(pipe_slow);
14394 %}
14395
14396 // Compare into -1,0,1
14397 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14398 %{
14399 match(Set dst (CmpD3 src1 src2));
14400 effect(KILL cr);
14401
14402 ins_cost(275);
14403 format %{ "ucomisd $src1, $src2\n\t"
14404 "movl $dst, #-1\n\t"
14405 "jp,s done\n\t"
14406 "jb,s done\n\t"
14407 "setne $dst\n\t"
14408 "movzbl $dst, $dst\n"
14409 "done:" %}
14410 ins_encode %{
14411 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14412 emit_cmpfp3(masm, $dst$$Register);
14413 %}
14414 ins_pipe(pipe_slow);
14415 %}
14416
14417 // Compare into -1,0,1
14418 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14419 %{
14420 match(Set dst (CmpD3 src1 (LoadD src2)));
14421 effect(KILL cr);
14422
14423 ins_cost(275);
14424 format %{ "ucomisd $src1, $src2\n\t"
14425 "movl $dst, #-1\n\t"
14426 "jp,s done\n\t"
14427 "jb,s done\n\t"
14428 "setne $dst\n\t"
14429 "movzbl $dst, $dst\n"
14430 "done:" %}
14431 ins_encode %{
14432 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14433 emit_cmpfp3(masm, $dst$$Register);
14434 %}
14435 ins_pipe(pipe_slow);
14436 %}
14437
14438 // Compare into -1,0,1
14439 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14440 match(Set dst (CmpD3 src con));
14441 effect(KILL cr);
14442
14443 ins_cost(275);
14444 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14445 "movl $dst, #-1\n\t"
14446 "jp,s done\n\t"
14447 "jb,s done\n\t"
14448 "setne $dst\n\t"
14449 "movzbl $dst, $dst\n"
14450 "done:" %}
14451 ins_encode %{
14452 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14453 emit_cmpfp3(masm, $dst$$Register);
14454 %}
14455 ins_pipe(pipe_slow);
14456 %}
14457
14458 //----------Arithmetic Conversion Instructions---------------------------------
14459
14460 instruct convF2D_reg_reg(regD dst, regF src)
14461 %{
14462 match(Set dst (ConvF2D src));
14463
14464 format %{ "cvtss2sd $dst, $src" %}
14465 ins_encode %{
14466 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14467 %}
14468 ins_pipe(pipe_slow); // XXX
14469 %}
14470
14471 instruct convF2D_reg_mem(regD dst, memory src)
14472 %{
14473 predicate(UseAVX == 0);
14474 match(Set dst (ConvF2D (LoadF src)));
14475
14476 format %{ "cvtss2sd $dst, $src" %}
14477 ins_encode %{
14478 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14479 %}
14480 ins_pipe(pipe_slow); // XXX
14481 %}
14482
14483 instruct convD2F_reg_reg(regF dst, regD src)
14484 %{
14485 match(Set dst (ConvD2F src));
14486
14487 format %{ "cvtsd2ss $dst, $src" %}
14488 ins_encode %{
14489 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14490 %}
14491 ins_pipe(pipe_slow); // XXX
14492 %}
14493
14494 instruct convD2F_reg_mem(regF dst, memory src)
14495 %{
14496 predicate(UseAVX == 0);
14497 match(Set dst (ConvD2F (LoadD src)));
14498
14499 format %{ "cvtsd2ss $dst, $src" %}
14500 ins_encode %{
14501 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14502 %}
14503 ins_pipe(pipe_slow); // XXX
14504 %}
14505
14506 // XXX do mem variants
14507 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14508 %{
14509 predicate(!VM_Version::supports_avx10_2());
14510 match(Set dst (ConvF2I src));
14511 effect(KILL cr);
14512 format %{ "convert_f2i $dst, $src" %}
14513 ins_encode %{
14514 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14515 %}
14516 ins_pipe(pipe_slow);
14517 %}
14518
14519 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14520 %{
14521 predicate(VM_Version::supports_avx10_2());
14522 match(Set dst (ConvF2I src));
14523 format %{ "evcvttss2sisl $dst, $src" %}
14524 ins_encode %{
14525 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14526 %}
14527 ins_pipe(pipe_slow);
14528 %}
14529
14530 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14531 %{
14532 predicate(VM_Version::supports_avx10_2());
14533 match(Set dst (ConvF2I (LoadF src)));
14534 format %{ "evcvttss2sisl $dst, $src" %}
14535 ins_encode %{
14536 __ evcvttss2sisl($dst$$Register, $src$$Address);
14537 %}
14538 ins_pipe(pipe_slow);
14539 %}
14540
14541 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14542 %{
14543 predicate(!VM_Version::supports_avx10_2());
14544 match(Set dst (ConvF2L src));
14545 effect(KILL cr);
14546 format %{ "convert_f2l $dst, $src"%}
14547 ins_encode %{
14548 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14549 %}
14550 ins_pipe(pipe_slow);
14551 %}
14552
14553 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14554 %{
14555 predicate(VM_Version::supports_avx10_2());
14556 match(Set dst (ConvF2L src));
14557 format %{ "evcvttss2sisq $dst, $src" %}
14558 ins_encode %{
14559 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14560 %}
14561 ins_pipe(pipe_slow);
14562 %}
14563
14564 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14565 %{
14566 predicate(VM_Version::supports_avx10_2());
14567 match(Set dst (ConvF2L (LoadF src)));
14568 format %{ "evcvttss2sisq $dst, $src" %}
14569 ins_encode %{
14570 __ evcvttss2sisq($dst$$Register, $src$$Address);
14571 %}
14572 ins_pipe(pipe_slow);
14573 %}
14574
14575 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14576 %{
14577 predicate(!VM_Version::supports_avx10_2());
14578 match(Set dst (ConvD2I src));
14579 effect(KILL cr);
14580 format %{ "convert_d2i $dst, $src"%}
14581 ins_encode %{
14582 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14583 %}
14584 ins_pipe(pipe_slow);
14585 %}
14586
14587 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14588 %{
14589 predicate(VM_Version::supports_avx10_2());
14590 match(Set dst (ConvD2I src));
14591 format %{ "evcvttsd2sisl $dst, $src" %}
14592 ins_encode %{
14593 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14594 %}
14595 ins_pipe(pipe_slow);
14596 %}
14597
14598 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14599 %{
14600 predicate(VM_Version::supports_avx10_2());
14601 match(Set dst (ConvD2I (LoadD src)));
14602 format %{ "evcvttsd2sisl $dst, $src" %}
14603 ins_encode %{
14604 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14605 %}
14606 ins_pipe(pipe_slow);
14607 %}
14608
14609 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14610 %{
14611 predicate(!VM_Version::supports_avx10_2());
14612 match(Set dst (ConvD2L src));
14613 effect(KILL cr);
14614 format %{ "convert_d2l $dst, $src"%}
14615 ins_encode %{
14616 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14617 %}
14618 ins_pipe(pipe_slow);
14619 %}
14620
14621 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14622 %{
14623 predicate(VM_Version::supports_avx10_2());
14624 match(Set dst (ConvD2L src));
14625 format %{ "evcvttsd2sisq $dst, $src" %}
14626 ins_encode %{
14627 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14628 %}
14629 ins_pipe(pipe_slow);
14630 %}
14631
14632 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14633 %{
14634 predicate(VM_Version::supports_avx10_2());
14635 match(Set dst (ConvD2L (LoadD src)));
14636 format %{ "evcvttsd2sisq $dst, $src" %}
14637 ins_encode %{
14638 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14639 %}
14640 ins_pipe(pipe_slow);
14641 %}
14642
14643 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14644 %{
14645 match(Set dst (RoundD src));
14646 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14647 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14648 ins_encode %{
14649 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14650 %}
14651 ins_pipe(pipe_slow);
14652 %}
14653
14654 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14655 %{
14656 match(Set dst (RoundF src));
14657 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14658 format %{ "round_float $dst,$src" %}
14659 ins_encode %{
14660 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14661 %}
14662 ins_pipe(pipe_slow);
14663 %}
14664
14665 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14666 %{
14667 predicate(!UseXmmI2F);
14668 match(Set dst (ConvI2F src));
14669
14670 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14671 ins_encode %{
14672 if (UseAVX > 0) {
14673 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14674 }
14675 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14676 %}
14677 ins_pipe(pipe_slow); // XXX
14678 %}
14679
14680 instruct convI2F_reg_mem(regF dst, memory src)
14681 %{
14682 predicate(UseAVX == 0);
14683 match(Set dst (ConvI2F (LoadI src)));
14684
14685 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14686 ins_encode %{
14687 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14688 %}
14689 ins_pipe(pipe_slow); // XXX
14690 %}
14691
14692 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14693 %{
14694 predicate(!UseXmmI2D);
14695 match(Set dst (ConvI2D src));
14696
14697 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14698 ins_encode %{
14699 if (UseAVX > 0) {
14700 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14701 }
14702 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14703 %}
14704 ins_pipe(pipe_slow); // XXX
14705 %}
14706
14707 instruct convI2D_reg_mem(regD dst, memory src)
14708 %{
14709 predicate(UseAVX == 0);
14710 match(Set dst (ConvI2D (LoadI src)));
14711
14712 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14713 ins_encode %{
14714 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14715 %}
14716 ins_pipe(pipe_slow); // XXX
14717 %}
14718
14719 instruct convXI2F_reg(regF dst, rRegI src)
14720 %{
14721 predicate(UseXmmI2F);
14722 match(Set dst (ConvI2F src));
14723
14724 format %{ "movdl $dst, $src\n\t"
14725 "cvtdq2psl $dst, $dst\t# i2f" %}
14726 ins_encode %{
14727 __ movdl($dst$$XMMRegister, $src$$Register);
14728 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14729 %}
14730 ins_pipe(pipe_slow); // XXX
14731 %}
14732
14733 instruct convXI2D_reg(regD dst, rRegI src)
14734 %{
14735 predicate(UseXmmI2D);
14736 match(Set dst (ConvI2D src));
14737
14738 format %{ "movdl $dst, $src\n\t"
14739 "cvtdq2pdl $dst, $dst\t# i2d" %}
14740 ins_encode %{
14741 __ movdl($dst$$XMMRegister, $src$$Register);
14742 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14743 %}
14744 ins_pipe(pipe_slow); // XXX
14745 %}
14746
14747 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14748 %{
14749 match(Set dst (ConvL2F src));
14750
14751 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14752 ins_encode %{
14753 if (UseAVX > 0) {
14754 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14755 }
14756 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14757 %}
14758 ins_pipe(pipe_slow); // XXX
14759 %}
14760
14761 instruct convL2F_reg_mem(regF dst, memory src)
14762 %{
14763 predicate(UseAVX == 0);
14764 match(Set dst (ConvL2F (LoadL src)));
14765
14766 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14767 ins_encode %{
14768 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14769 %}
14770 ins_pipe(pipe_slow); // XXX
14771 %}
14772
14773 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14774 %{
14775 match(Set dst (ConvL2D src));
14776
14777 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14778 ins_encode %{
14779 if (UseAVX > 0) {
14780 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14781 }
14782 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14783 %}
14784 ins_pipe(pipe_slow); // XXX
14785 %}
14786
14787 instruct convL2D_reg_mem(regD dst, memory src)
14788 %{
14789 predicate(UseAVX == 0);
14790 match(Set dst (ConvL2D (LoadL src)));
14791
14792 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14793 ins_encode %{
14794 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14795 %}
14796 ins_pipe(pipe_slow); // XXX
14797 %}
14798
14799 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14800 %{
14801 match(Set dst (ConvI2L src));
14802
14803 ins_cost(125);
14804 format %{ "movslq $dst, $src\t# i2l" %}
14805 ins_encode %{
14806 __ movslq($dst$$Register, $src$$Register);
14807 %}
14808 ins_pipe(ialu_reg_reg);
14809 %}
14810
14811 // Zero-extend convert int to long
14812 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14813 %{
14814 match(Set dst (AndL (ConvI2L src) mask));
14815
14816 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14817 ins_encode %{
14818 if ($dst$$reg != $src$$reg) {
14819 __ movl($dst$$Register, $src$$Register);
14820 }
14821 %}
14822 ins_pipe(ialu_reg_reg);
14823 %}
14824
14825 // Zero-extend convert int to long
14826 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14827 %{
14828 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14829
14830 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14831 ins_encode %{
14832 __ movl($dst$$Register, $src$$Address);
14833 %}
14834 ins_pipe(ialu_reg_mem);
14835 %}
14836
14837 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14838 %{
14839 match(Set dst (AndL src mask));
14840
14841 format %{ "movl $dst, $src\t# zero-extend long" %}
14842 ins_encode %{
14843 __ movl($dst$$Register, $src$$Register);
14844 %}
14845 ins_pipe(ialu_reg_reg);
14846 %}
14847
14848 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14849 %{
14850 match(Set dst (ConvL2I src));
14851
14852 format %{ "movl $dst, $src\t# l2i" %}
14853 ins_encode %{
14854 __ movl($dst$$Register, $src$$Register);
14855 %}
14856 ins_pipe(ialu_reg_reg);
14857 %}
14858
14859
14860 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14861 match(Set dst (MoveF2I src));
14862 effect(DEF dst, USE src);
14863
14864 ins_cost(125);
14865 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14866 ins_encode %{
14867 __ movl($dst$$Register, Address(rsp, $src$$disp));
14868 %}
14869 ins_pipe(ialu_reg_mem);
14870 %}
14871
14872 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14873 match(Set dst (MoveI2F src));
14874 effect(DEF dst, USE src);
14875
14876 ins_cost(125);
14877 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14878 ins_encode %{
14879 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14880 %}
14881 ins_pipe(pipe_slow);
14882 %}
14883
14884 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14885 match(Set dst (MoveD2L src));
14886 effect(DEF dst, USE src);
14887
14888 ins_cost(125);
14889 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14890 ins_encode %{
14891 __ movq($dst$$Register, Address(rsp, $src$$disp));
14892 %}
14893 ins_pipe(ialu_reg_mem);
14894 %}
14895
14896 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14897 predicate(!UseXmmLoadAndClearUpper);
14898 match(Set dst (MoveL2D src));
14899 effect(DEF dst, USE src);
14900
14901 ins_cost(125);
14902 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14903 ins_encode %{
14904 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14905 %}
14906 ins_pipe(pipe_slow);
14907 %}
14908
14909 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14910 predicate(UseXmmLoadAndClearUpper);
14911 match(Set dst (MoveL2D src));
14912 effect(DEF dst, USE src);
14913
14914 ins_cost(125);
14915 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14916 ins_encode %{
14917 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14918 %}
14919 ins_pipe(pipe_slow);
14920 %}
14921
14922
14923 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14924 match(Set dst (MoveF2I src));
14925 effect(DEF dst, USE src);
14926
14927 ins_cost(95); // XXX
14928 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14929 ins_encode %{
14930 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14931 %}
14932 ins_pipe(pipe_slow);
14933 %}
14934
14935 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14936 match(Set dst (MoveI2F src));
14937 effect(DEF dst, USE src);
14938
14939 ins_cost(100);
14940 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14941 ins_encode %{
14942 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14943 %}
14944 ins_pipe( ialu_mem_reg );
14945 %}
14946
14947 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14948 match(Set dst (MoveD2L src));
14949 effect(DEF dst, USE src);
14950
14951 ins_cost(95); // XXX
14952 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14953 ins_encode %{
14954 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14955 %}
14956 ins_pipe(pipe_slow);
14957 %}
14958
14959 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14960 match(Set dst (MoveL2D src));
14961 effect(DEF dst, USE src);
14962
14963 ins_cost(100);
14964 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14965 ins_encode %{
14966 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14967 %}
14968 ins_pipe(ialu_mem_reg);
14969 %}
14970
14971 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14972 match(Set dst (MoveF2I src));
14973 effect(DEF dst, USE src);
14974 ins_cost(85);
14975 format %{ "movd $dst,$src\t# MoveF2I" %}
14976 ins_encode %{
14977 __ movdl($dst$$Register, $src$$XMMRegister);
14978 %}
14979 ins_pipe( pipe_slow );
14980 %}
14981
14982 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14983 match(Set dst (MoveD2L src));
14984 effect(DEF dst, USE src);
14985 ins_cost(85);
14986 format %{ "movd $dst,$src\t# MoveD2L" %}
14987 ins_encode %{
14988 __ movdq($dst$$Register, $src$$XMMRegister);
14989 %}
14990 ins_pipe( pipe_slow );
14991 %}
14992
14993 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14994 match(Set dst (MoveI2F src));
14995 effect(DEF dst, USE src);
14996 ins_cost(100);
14997 format %{ "movd $dst,$src\t# MoveI2F" %}
14998 ins_encode %{
14999 __ movdl($dst$$XMMRegister, $src$$Register);
15000 %}
15001 ins_pipe( pipe_slow );
15002 %}
15003
15004 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15005 match(Set dst (MoveL2D src));
15006 effect(DEF dst, USE src);
15007 ins_cost(100);
15008 format %{ "movd $dst,$src\t# MoveL2D" %}
15009 ins_encode %{
15010 __ movdq($dst$$XMMRegister, $src$$Register);
15011 %}
15012 ins_pipe( pipe_slow );
15013 %}
15014
15015 // Fast clearing of an array
15016 // Small non-constant lenght ClearArray for non-AVX512 targets.
15017 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15018 Universe dummy, rFlagsReg cr)
15019 %{
15020 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15021 match(Set dummy (ClearArray cnt base));
15022 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15023
15024 format %{ $$template
15025 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15026 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15027 $$emit$$"jg LARGE\n\t"
15028 $$emit$$"dec rcx\n\t"
15029 $$emit$$"js DONE\t# Zero length\n\t"
15030 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15031 $$emit$$"dec rcx\n\t"
15032 $$emit$$"jge LOOP\n\t"
15033 $$emit$$"jmp DONE\n\t"
15034 $$emit$$"# LARGE:\n\t"
15035 if (UseFastStosb) {
15036 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15037 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15038 } else if (UseXMMForObjInit) {
15039 $$emit$$"mov rdi,rax\n\t"
15040 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15041 $$emit$$"jmpq L_zero_64_bytes\n\t"
15042 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15043 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15044 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15045 $$emit$$"add 0x40,rax\n\t"
15046 $$emit$$"# L_zero_64_bytes:\n\t"
15047 $$emit$$"sub 0x8,rcx\n\t"
15048 $$emit$$"jge L_loop\n\t"
15049 $$emit$$"add 0x4,rcx\n\t"
15050 $$emit$$"jl L_tail\n\t"
15051 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15052 $$emit$$"add 0x20,rax\n\t"
15053 $$emit$$"sub 0x4,rcx\n\t"
15054 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15055 $$emit$$"add 0x4,rcx\n\t"
15056 $$emit$$"jle L_end\n\t"
15057 $$emit$$"dec rcx\n\t"
15058 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15059 $$emit$$"vmovq xmm0,(rax)\n\t"
15060 $$emit$$"add 0x8,rax\n\t"
15061 $$emit$$"dec rcx\n\t"
15062 $$emit$$"jge L_sloop\n\t"
15063 $$emit$$"# L_end:\n\t"
15064 } else {
15065 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15066 }
15067 $$emit$$"# DONE"
15068 %}
15069 ins_encode %{
15070 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15071 $tmp$$XMMRegister, false, knoreg);
15072 %}
15073 ins_pipe(pipe_slow);
15074 %}
15075
15076 // Small non-constant length ClearArray for AVX512 targets.
15077 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15078 Universe dummy, rFlagsReg cr)
15079 %{
15080 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15081 match(Set dummy (ClearArray cnt base));
15082 ins_cost(125);
15083 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15084
15085 format %{ $$template
15086 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15087 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15088 $$emit$$"jg LARGE\n\t"
15089 $$emit$$"dec rcx\n\t"
15090 $$emit$$"js DONE\t# Zero length\n\t"
15091 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15092 $$emit$$"dec rcx\n\t"
15093 $$emit$$"jge LOOP\n\t"
15094 $$emit$$"jmp DONE\n\t"
15095 $$emit$$"# LARGE:\n\t"
15096 if (UseFastStosb) {
15097 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15098 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15099 } else if (UseXMMForObjInit) {
15100 $$emit$$"mov rdi,rax\n\t"
15101 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15102 $$emit$$"jmpq L_zero_64_bytes\n\t"
15103 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15104 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15105 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15106 $$emit$$"add 0x40,rax\n\t"
15107 $$emit$$"# L_zero_64_bytes:\n\t"
15108 $$emit$$"sub 0x8,rcx\n\t"
15109 $$emit$$"jge L_loop\n\t"
15110 $$emit$$"add 0x4,rcx\n\t"
15111 $$emit$$"jl L_tail\n\t"
15112 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15113 $$emit$$"add 0x20,rax\n\t"
15114 $$emit$$"sub 0x4,rcx\n\t"
15115 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15116 $$emit$$"add 0x4,rcx\n\t"
15117 $$emit$$"jle L_end\n\t"
15118 $$emit$$"dec rcx\n\t"
15119 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15120 $$emit$$"vmovq xmm0,(rax)\n\t"
15121 $$emit$$"add 0x8,rax\n\t"
15122 $$emit$$"dec rcx\n\t"
15123 $$emit$$"jge L_sloop\n\t"
15124 $$emit$$"# L_end:\n\t"
15125 } else {
15126 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15127 }
15128 $$emit$$"# DONE"
15129 %}
15130 ins_encode %{
15131 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15132 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15133 %}
15134 ins_pipe(pipe_slow);
15135 %}
15136
15137 // Large non-constant length ClearArray for non-AVX512 targets.
15138 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15139 Universe dummy, rFlagsReg cr)
15140 %{
15141 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15142 match(Set dummy (ClearArray cnt base));
15143 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15144
15145 format %{ $$template
15146 if (UseFastStosb) {
15147 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15148 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15149 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15150 } else if (UseXMMForObjInit) {
15151 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15152 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15153 $$emit$$"jmpq L_zero_64_bytes\n\t"
15154 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15155 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15156 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15157 $$emit$$"add 0x40,rax\n\t"
15158 $$emit$$"# L_zero_64_bytes:\n\t"
15159 $$emit$$"sub 0x8,rcx\n\t"
15160 $$emit$$"jge L_loop\n\t"
15161 $$emit$$"add 0x4,rcx\n\t"
15162 $$emit$$"jl L_tail\n\t"
15163 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15164 $$emit$$"add 0x20,rax\n\t"
15165 $$emit$$"sub 0x4,rcx\n\t"
15166 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15167 $$emit$$"add 0x4,rcx\n\t"
15168 $$emit$$"jle L_end\n\t"
15169 $$emit$$"dec rcx\n\t"
15170 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15171 $$emit$$"vmovq xmm0,(rax)\n\t"
15172 $$emit$$"add 0x8,rax\n\t"
15173 $$emit$$"dec rcx\n\t"
15174 $$emit$$"jge L_sloop\n\t"
15175 $$emit$$"# L_end:\n\t"
15176 } else {
15177 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15178 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15179 }
15180 %}
15181 ins_encode %{
15182 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15183 $tmp$$XMMRegister, true, knoreg);
15184 %}
15185 ins_pipe(pipe_slow);
15186 %}
15187
15188 // Large non-constant length ClearArray for AVX512 targets.
15189 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15190 Universe dummy, rFlagsReg cr)
15191 %{
15192 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15193 match(Set dummy (ClearArray cnt base));
15194 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15195
15196 format %{ $$template
15197 if (UseFastStosb) {
15198 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15199 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15200 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15201 } else if (UseXMMForObjInit) {
15202 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15203 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15204 $$emit$$"jmpq L_zero_64_bytes\n\t"
15205 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15206 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15207 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15208 $$emit$$"add 0x40,rax\n\t"
15209 $$emit$$"# L_zero_64_bytes:\n\t"
15210 $$emit$$"sub 0x8,rcx\n\t"
15211 $$emit$$"jge L_loop\n\t"
15212 $$emit$$"add 0x4,rcx\n\t"
15213 $$emit$$"jl L_tail\n\t"
15214 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15215 $$emit$$"add 0x20,rax\n\t"
15216 $$emit$$"sub 0x4,rcx\n\t"
15217 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15218 $$emit$$"add 0x4,rcx\n\t"
15219 $$emit$$"jle L_end\n\t"
15220 $$emit$$"dec rcx\n\t"
15221 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15222 $$emit$$"vmovq xmm0,(rax)\n\t"
15223 $$emit$$"add 0x8,rax\n\t"
15224 $$emit$$"dec rcx\n\t"
15225 $$emit$$"jge L_sloop\n\t"
15226 $$emit$$"# L_end:\n\t"
15227 } else {
15228 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15229 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15230 }
15231 %}
15232 ins_encode %{
15233 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15234 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15235 %}
15236 ins_pipe(pipe_slow);
15237 %}
15238
15239 // Small constant length ClearArray for AVX512 targets.
15240 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15241 %{
15242 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15243 match(Set dummy (ClearArray cnt base));
15244 ins_cost(100);
15245 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15246 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15247 ins_encode %{
15248 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15249 %}
15250 ins_pipe(pipe_slow);
15251 %}
15252
15253 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15254 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15255 %{
15256 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15257 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15258 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15259
15260 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15261 ins_encode %{
15262 __ string_compare($str1$$Register, $str2$$Register,
15263 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15264 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15265 %}
15266 ins_pipe( pipe_slow );
15267 %}
15268
15269 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15270 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15271 %{
15272 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15273 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15274 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15275
15276 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15277 ins_encode %{
15278 __ string_compare($str1$$Register, $str2$$Register,
15279 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15280 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15281 %}
15282 ins_pipe( pipe_slow );
15283 %}
15284
15285 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15286 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15287 %{
15288 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15289 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15290 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15291
15292 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15293 ins_encode %{
15294 __ string_compare($str1$$Register, $str2$$Register,
15295 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15296 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15297 %}
15298 ins_pipe( pipe_slow );
15299 %}
15300
15301 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15302 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15303 %{
15304 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15305 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15306 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15307
15308 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15309 ins_encode %{
15310 __ string_compare($str1$$Register, $str2$$Register,
15311 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15312 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15313 %}
15314 ins_pipe( pipe_slow );
15315 %}
15316
15317 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15318 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15319 %{
15320 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15321 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15322 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15323
15324 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15325 ins_encode %{
15326 __ string_compare($str1$$Register, $str2$$Register,
15327 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15328 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15329 %}
15330 ins_pipe( pipe_slow );
15331 %}
15332
15333 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15334 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15335 %{
15336 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15337 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15338 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15339
15340 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15341 ins_encode %{
15342 __ string_compare($str1$$Register, $str2$$Register,
15343 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15344 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15345 %}
15346 ins_pipe( pipe_slow );
15347 %}
15348
15349 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15350 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15351 %{
15352 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15353 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15354 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15355
15356 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15357 ins_encode %{
15358 __ string_compare($str2$$Register, $str1$$Register,
15359 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15360 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15361 %}
15362 ins_pipe( pipe_slow );
15363 %}
15364
15365 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15366 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15367 %{
15368 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15369 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15370 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15371
15372 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15373 ins_encode %{
15374 __ string_compare($str2$$Register, $str1$$Register,
15375 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15376 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15377 %}
15378 ins_pipe( pipe_slow );
15379 %}
15380
15381 // fast search of substring with known size.
15382 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15383 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15384 %{
15385 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15386 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15387 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15388
15389 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15390 ins_encode %{
15391 int icnt2 = (int)$int_cnt2$$constant;
15392 if (icnt2 >= 16) {
15393 // IndexOf for constant substrings with size >= 16 elements
15394 // which don't need to be loaded through stack.
15395 __ string_indexofC8($str1$$Register, $str2$$Register,
15396 $cnt1$$Register, $cnt2$$Register,
15397 icnt2, $result$$Register,
15398 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15399 } else {
15400 // Small strings are loaded through stack if they cross page boundary.
15401 __ string_indexof($str1$$Register, $str2$$Register,
15402 $cnt1$$Register, $cnt2$$Register,
15403 icnt2, $result$$Register,
15404 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15405 }
15406 %}
15407 ins_pipe( pipe_slow );
15408 %}
15409
15410 // fast search of substring with known size.
15411 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15412 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15413 %{
15414 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15415 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15416 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15417
15418 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15419 ins_encode %{
15420 int icnt2 = (int)$int_cnt2$$constant;
15421 if (icnt2 >= 8) {
15422 // IndexOf for constant substrings with size >= 8 elements
15423 // which don't need to be loaded through stack.
15424 __ string_indexofC8($str1$$Register, $str2$$Register,
15425 $cnt1$$Register, $cnt2$$Register,
15426 icnt2, $result$$Register,
15427 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15428 } else {
15429 // Small strings are loaded through stack if they cross page boundary.
15430 __ string_indexof($str1$$Register, $str2$$Register,
15431 $cnt1$$Register, $cnt2$$Register,
15432 icnt2, $result$$Register,
15433 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15434 }
15435 %}
15436 ins_pipe( pipe_slow );
15437 %}
15438
15439 // fast search of substring with known size.
15440 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15441 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15442 %{
15443 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15444 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15445 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15446
15447 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15448 ins_encode %{
15449 int icnt2 = (int)$int_cnt2$$constant;
15450 if (icnt2 >= 8) {
15451 // IndexOf for constant substrings with size >= 8 elements
15452 // which don't need to be loaded through stack.
15453 __ string_indexofC8($str1$$Register, $str2$$Register,
15454 $cnt1$$Register, $cnt2$$Register,
15455 icnt2, $result$$Register,
15456 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15457 } else {
15458 // Small strings are loaded through stack if they cross page boundary.
15459 __ string_indexof($str1$$Register, $str2$$Register,
15460 $cnt1$$Register, $cnt2$$Register,
15461 icnt2, $result$$Register,
15462 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15463 }
15464 %}
15465 ins_pipe( pipe_slow );
15466 %}
15467
15468 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15469 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15470 %{
15471 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15472 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15473 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15474
15475 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15476 ins_encode %{
15477 __ string_indexof($str1$$Register, $str2$$Register,
15478 $cnt1$$Register, $cnt2$$Register,
15479 (-1), $result$$Register,
15480 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15481 %}
15482 ins_pipe( pipe_slow );
15483 %}
15484
15485 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15486 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15487 %{
15488 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15489 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15490 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15491
15492 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15493 ins_encode %{
15494 __ string_indexof($str1$$Register, $str2$$Register,
15495 $cnt1$$Register, $cnt2$$Register,
15496 (-1), $result$$Register,
15497 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15498 %}
15499 ins_pipe( pipe_slow );
15500 %}
15501
15502 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15503 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15504 %{
15505 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15506 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15507 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15508
15509 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15510 ins_encode %{
15511 __ string_indexof($str1$$Register, $str2$$Register,
15512 $cnt1$$Register, $cnt2$$Register,
15513 (-1), $result$$Register,
15514 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15515 %}
15516 ins_pipe( pipe_slow );
15517 %}
15518
15519 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15520 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15521 %{
15522 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15523 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15524 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15525 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15526 ins_encode %{
15527 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15528 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15529 %}
15530 ins_pipe( pipe_slow );
15531 %}
15532
15533 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15534 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15535 %{
15536 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15537 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15538 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15539 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15540 ins_encode %{
15541 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15542 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15543 %}
15544 ins_pipe( pipe_slow );
15545 %}
15546
15547 // fast string equals
15548 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15549 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15550 %{
15551 predicate(!VM_Version::supports_avx512vlbw());
15552 match(Set result (StrEquals (Binary str1 str2) cnt));
15553 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15554
15555 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15556 ins_encode %{
15557 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15558 $cnt$$Register, $result$$Register, $tmp3$$Register,
15559 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15560 %}
15561 ins_pipe( pipe_slow );
15562 %}
15563
15564 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15565 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15566 %{
15567 predicate(VM_Version::supports_avx512vlbw());
15568 match(Set result (StrEquals (Binary str1 str2) cnt));
15569 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15570
15571 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15572 ins_encode %{
15573 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15574 $cnt$$Register, $result$$Register, $tmp3$$Register,
15575 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15576 %}
15577 ins_pipe( pipe_slow );
15578 %}
15579
15580 // fast array equals
15581 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15582 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15583 %{
15584 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15585 match(Set result (AryEq ary1 ary2));
15586 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15587
15588 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15589 ins_encode %{
15590 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15591 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15592 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15593 %}
15594 ins_pipe( pipe_slow );
15595 %}
15596
15597 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15598 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15599 %{
15600 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15601 match(Set result (AryEq ary1 ary2));
15602 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15603
15604 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15605 ins_encode %{
15606 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15607 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15608 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15609 %}
15610 ins_pipe( pipe_slow );
15611 %}
15612
15613 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15614 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15615 %{
15616 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15617 match(Set result (AryEq ary1 ary2));
15618 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15619
15620 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15621 ins_encode %{
15622 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15623 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15624 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15625 %}
15626 ins_pipe( pipe_slow );
15627 %}
15628
15629 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15630 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15631 %{
15632 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15633 match(Set result (AryEq ary1 ary2));
15634 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15635
15636 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15637 ins_encode %{
15638 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15639 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15640 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15641 %}
15642 ins_pipe( pipe_slow );
15643 %}
15644
15645 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15646 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15647 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15648 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15649 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15650 %{
15651 predicate(UseAVX >= 2);
15652 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15653 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15654 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15655 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15656 USE basic_type, KILL cr);
15657
15658 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15659 ins_encode %{
15660 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15661 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15662 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15663 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15664 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15665 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15666 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15667 %}
15668 ins_pipe( pipe_slow );
15669 %}
15670
15671 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15672 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15673 %{
15674 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15675 match(Set result (CountPositives ary1 len));
15676 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15677
15678 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15679 ins_encode %{
15680 __ count_positives($ary1$$Register, $len$$Register,
15681 $result$$Register, $tmp3$$Register,
15682 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15683 %}
15684 ins_pipe( pipe_slow );
15685 %}
15686
15687 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15688 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15689 %{
15690 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15691 match(Set result (CountPositives ary1 len));
15692 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15693
15694 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15695 ins_encode %{
15696 __ count_positives($ary1$$Register, $len$$Register,
15697 $result$$Register, $tmp3$$Register,
15698 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15699 %}
15700 ins_pipe( pipe_slow );
15701 %}
15702
15703 // fast char[] to byte[] compression
15704 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15705 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15706 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15707 match(Set result (StrCompressedCopy src (Binary dst len)));
15708 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15709 USE_KILL len, KILL tmp5, KILL cr);
15710
15711 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15712 ins_encode %{
15713 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15714 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15715 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15716 knoreg, knoreg);
15717 %}
15718 ins_pipe( pipe_slow );
15719 %}
15720
15721 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15722 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15723 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15724 match(Set result (StrCompressedCopy src (Binary dst len)));
15725 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15726 USE_KILL len, KILL tmp5, KILL cr);
15727
15728 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15729 ins_encode %{
15730 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15731 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15732 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15733 $ktmp1$$KRegister, $ktmp2$$KRegister);
15734 %}
15735 ins_pipe( pipe_slow );
15736 %}
15737 // fast byte[] to char[] inflation
15738 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15739 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15740 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15741 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15742 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15743
15744 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15745 ins_encode %{
15746 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15747 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15748 %}
15749 ins_pipe( pipe_slow );
15750 %}
15751
15752 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15753 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15754 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15755 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15756 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15757
15758 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15759 ins_encode %{
15760 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15761 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15762 %}
15763 ins_pipe( pipe_slow );
15764 %}
15765
15766 // encode char[] to byte[] in ISO_8859_1
15767 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15768 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15769 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15770 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15771 match(Set result (EncodeISOArray src (Binary dst len)));
15772 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15773
15774 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15775 ins_encode %{
15776 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15777 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15778 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15779 %}
15780 ins_pipe( pipe_slow );
15781 %}
15782
15783 // encode char[] to byte[] in ASCII
15784 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15785 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15786 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15787 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15788 match(Set result (EncodeISOArray src (Binary dst len)));
15789 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15790
15791 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15792 ins_encode %{
15793 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15794 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15795 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15796 %}
15797 ins_pipe( pipe_slow );
15798 %}
15799
15800 //----------Overflow Math Instructions-----------------------------------------
15801
15802 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15803 %{
15804 match(Set cr (OverflowAddI op1 op2));
15805 effect(DEF cr, USE_KILL op1, USE op2);
15806
15807 format %{ "addl $op1, $op2\t# overflow check int" %}
15808
15809 ins_encode %{
15810 __ addl($op1$$Register, $op2$$Register);
15811 %}
15812 ins_pipe(ialu_reg_reg);
15813 %}
15814
15815 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15816 %{
15817 match(Set cr (OverflowAddI op1 op2));
15818 effect(DEF cr, USE_KILL op1, USE op2);
15819
15820 format %{ "addl $op1, $op2\t# overflow check int" %}
15821
15822 ins_encode %{
15823 __ addl($op1$$Register, $op2$$constant);
15824 %}
15825 ins_pipe(ialu_reg_reg);
15826 %}
15827
15828 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15829 %{
15830 match(Set cr (OverflowAddL op1 op2));
15831 effect(DEF cr, USE_KILL op1, USE op2);
15832
15833 format %{ "addq $op1, $op2\t# overflow check long" %}
15834 ins_encode %{
15835 __ addq($op1$$Register, $op2$$Register);
15836 %}
15837 ins_pipe(ialu_reg_reg);
15838 %}
15839
15840 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15841 %{
15842 match(Set cr (OverflowAddL op1 op2));
15843 effect(DEF cr, USE_KILL op1, USE op2);
15844
15845 format %{ "addq $op1, $op2\t# overflow check long" %}
15846 ins_encode %{
15847 __ addq($op1$$Register, $op2$$constant);
15848 %}
15849 ins_pipe(ialu_reg_reg);
15850 %}
15851
15852 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15853 %{
15854 match(Set cr (OverflowSubI op1 op2));
15855
15856 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15857 ins_encode %{
15858 __ cmpl($op1$$Register, $op2$$Register);
15859 %}
15860 ins_pipe(ialu_reg_reg);
15861 %}
15862
15863 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15864 %{
15865 match(Set cr (OverflowSubI op1 op2));
15866
15867 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15868 ins_encode %{
15869 __ cmpl($op1$$Register, $op2$$constant);
15870 %}
15871 ins_pipe(ialu_reg_reg);
15872 %}
15873
15874 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15875 %{
15876 match(Set cr (OverflowSubL op1 op2));
15877
15878 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15879 ins_encode %{
15880 __ cmpq($op1$$Register, $op2$$Register);
15881 %}
15882 ins_pipe(ialu_reg_reg);
15883 %}
15884
15885 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15886 %{
15887 match(Set cr (OverflowSubL op1 op2));
15888
15889 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15890 ins_encode %{
15891 __ cmpq($op1$$Register, $op2$$constant);
15892 %}
15893 ins_pipe(ialu_reg_reg);
15894 %}
15895
15896 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15897 %{
15898 match(Set cr (OverflowSubI zero op2));
15899 effect(DEF cr, USE_KILL op2);
15900
15901 format %{ "negl $op2\t# overflow check int" %}
15902 ins_encode %{
15903 __ negl($op2$$Register);
15904 %}
15905 ins_pipe(ialu_reg_reg);
15906 %}
15907
15908 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15909 %{
15910 match(Set cr (OverflowSubL zero op2));
15911 effect(DEF cr, USE_KILL op2);
15912
15913 format %{ "negq $op2\t# overflow check long" %}
15914 ins_encode %{
15915 __ negq($op2$$Register);
15916 %}
15917 ins_pipe(ialu_reg_reg);
15918 %}
15919
15920 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15921 %{
15922 match(Set cr (OverflowMulI op1 op2));
15923 effect(DEF cr, USE_KILL op1, USE op2);
15924
15925 format %{ "imull $op1, $op2\t# overflow check int" %}
15926 ins_encode %{
15927 __ imull($op1$$Register, $op2$$Register);
15928 %}
15929 ins_pipe(ialu_reg_reg_alu0);
15930 %}
15931
15932 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15933 %{
15934 match(Set cr (OverflowMulI op1 op2));
15935 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15936
15937 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15938 ins_encode %{
15939 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15940 %}
15941 ins_pipe(ialu_reg_reg_alu0);
15942 %}
15943
15944 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15945 %{
15946 match(Set cr (OverflowMulL op1 op2));
15947 effect(DEF cr, USE_KILL op1, USE op2);
15948
15949 format %{ "imulq $op1, $op2\t# overflow check long" %}
15950 ins_encode %{
15951 __ imulq($op1$$Register, $op2$$Register);
15952 %}
15953 ins_pipe(ialu_reg_reg_alu0);
15954 %}
15955
15956 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15957 %{
15958 match(Set cr (OverflowMulL op1 op2));
15959 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15960
15961 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15962 ins_encode %{
15963 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15964 %}
15965 ins_pipe(ialu_reg_reg_alu0);
15966 %}
15967
15968
15969 //----------Control Flow Instructions------------------------------------------
15970 // Signed compare Instructions
15971
15972 // XXX more variants!!
15973 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15974 %{
15975 match(Set cr (CmpI op1 op2));
15976 effect(DEF cr, USE op1, USE op2);
15977
15978 format %{ "cmpl $op1, $op2" %}
15979 ins_encode %{
15980 __ cmpl($op1$$Register, $op2$$Register);
15981 %}
15982 ins_pipe(ialu_cr_reg_reg);
15983 %}
15984
15985 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15986 %{
15987 match(Set cr (CmpI op1 op2));
15988
15989 format %{ "cmpl $op1, $op2" %}
15990 ins_encode %{
15991 __ cmpl($op1$$Register, $op2$$constant);
15992 %}
15993 ins_pipe(ialu_cr_reg_imm);
15994 %}
15995
15996 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15997 %{
15998 match(Set cr (CmpI op1 (LoadI op2)));
15999
16000 ins_cost(500); // XXX
16001 format %{ "cmpl $op1, $op2" %}
16002 ins_encode %{
16003 __ cmpl($op1$$Register, $op2$$Address);
16004 %}
16005 ins_pipe(ialu_cr_reg_mem);
16006 %}
16007
16008 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16009 %{
16010 match(Set cr (CmpI src zero));
16011
16012 format %{ "testl $src, $src" %}
16013 ins_encode %{
16014 __ testl($src$$Register, $src$$Register);
16015 %}
16016 ins_pipe(ialu_cr_reg_imm);
16017 %}
16018
16019 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16020 %{
16021 match(Set cr (CmpI (AndI src con) zero));
16022
16023 format %{ "testl $src, $con" %}
16024 ins_encode %{
16025 __ testl($src$$Register, $con$$constant);
16026 %}
16027 ins_pipe(ialu_cr_reg_imm);
16028 %}
16029
16030 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16031 %{
16032 match(Set cr (CmpI (AndI src1 src2) zero));
16033
16034 format %{ "testl $src1, $src2" %}
16035 ins_encode %{
16036 __ testl($src1$$Register, $src2$$Register);
16037 %}
16038 ins_pipe(ialu_cr_reg_imm);
16039 %}
16040
16041 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16042 %{
16043 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16044
16045 format %{ "testl $src, $mem" %}
16046 ins_encode %{
16047 __ testl($src$$Register, $mem$$Address);
16048 %}
16049 ins_pipe(ialu_cr_reg_mem);
16050 %}
16051
16052 // Unsigned compare Instructions; really, same as signed except they
16053 // produce an rFlagsRegU instead of rFlagsReg.
16054 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16055 %{
16056 match(Set cr (CmpU op1 op2));
16057
16058 format %{ "cmpl $op1, $op2\t# unsigned" %}
16059 ins_encode %{
16060 __ cmpl($op1$$Register, $op2$$Register);
16061 %}
16062 ins_pipe(ialu_cr_reg_reg);
16063 %}
16064
16065 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16066 %{
16067 match(Set cr (CmpU op1 op2));
16068
16069 format %{ "cmpl $op1, $op2\t# unsigned" %}
16070 ins_encode %{
16071 __ cmpl($op1$$Register, $op2$$constant);
16072 %}
16073 ins_pipe(ialu_cr_reg_imm);
16074 %}
16075
16076 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16077 %{
16078 match(Set cr (CmpU op1 (LoadI op2)));
16079
16080 ins_cost(500); // XXX
16081 format %{ "cmpl $op1, $op2\t# unsigned" %}
16082 ins_encode %{
16083 __ cmpl($op1$$Register, $op2$$Address);
16084 %}
16085 ins_pipe(ialu_cr_reg_mem);
16086 %}
16087
16088 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16089 %{
16090 match(Set cr (CmpU src zero));
16091
16092 format %{ "testl $src, $src\t# unsigned" %}
16093 ins_encode %{
16094 __ testl($src$$Register, $src$$Register);
16095 %}
16096 ins_pipe(ialu_cr_reg_imm);
16097 %}
16098
16099 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16100 %{
16101 match(Set cr (CmpP op1 op2));
16102
16103 format %{ "cmpq $op1, $op2\t# ptr" %}
16104 ins_encode %{
16105 __ cmpq($op1$$Register, $op2$$Register);
16106 %}
16107 ins_pipe(ialu_cr_reg_reg);
16108 %}
16109
16110 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16111 %{
16112 match(Set cr (CmpP op1 (LoadP op2)));
16113 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16114
16115 ins_cost(500); // XXX
16116 format %{ "cmpq $op1, $op2\t# ptr" %}
16117 ins_encode %{
16118 __ cmpq($op1$$Register, $op2$$Address);
16119 %}
16120 ins_pipe(ialu_cr_reg_mem);
16121 %}
16122
16123 // XXX this is generalized by compP_rReg_mem???
16124 // Compare raw pointer (used in out-of-heap check).
16125 // Only works because non-oop pointers must be raw pointers
16126 // and raw pointers have no anti-dependencies.
16127 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16128 %{
16129 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16130 n->in(2)->as_Load()->barrier_data() == 0);
16131 match(Set cr (CmpP op1 (LoadP op2)));
16132
16133 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16134 ins_encode %{
16135 __ cmpq($op1$$Register, $op2$$Address);
16136 %}
16137 ins_pipe(ialu_cr_reg_mem);
16138 %}
16139
16140 // This will generate a signed flags result. This should be OK since
16141 // any compare to a zero should be eq/neq.
16142 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16143 %{
16144 match(Set cr (CmpP src zero));
16145
16146 format %{ "testq $src, $src\t# ptr" %}
16147 ins_encode %{
16148 __ testq($src$$Register, $src$$Register);
16149 %}
16150 ins_pipe(ialu_cr_reg_imm);
16151 %}
16152
16153 // This will generate a signed flags result. This should be OK since
16154 // any compare to a zero should be eq/neq.
16155 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16156 %{
16157 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16158 n->in(1)->as_Load()->barrier_data() == 0);
16159 match(Set cr (CmpP (LoadP op) zero));
16160
16161 ins_cost(500); // XXX
16162 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16163 ins_encode %{
16164 __ testq($op$$Address, 0xFFFFFFFF);
16165 %}
16166 ins_pipe(ialu_cr_reg_imm);
16167 %}
16168
16169 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16170 %{
16171 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16172 n->in(1)->as_Load()->barrier_data() == 0);
16173 match(Set cr (CmpP (LoadP mem) zero));
16174
16175 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16176 ins_encode %{
16177 __ cmpq(r12, $mem$$Address);
16178 %}
16179 ins_pipe(ialu_cr_reg_mem);
16180 %}
16181
16182 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16183 %{
16184 match(Set cr (CmpN op1 op2));
16185
16186 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16187 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16188 ins_pipe(ialu_cr_reg_reg);
16189 %}
16190
16191 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16192 %{
16193 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16194 match(Set cr (CmpN src (LoadN mem)));
16195
16196 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16197 ins_encode %{
16198 __ cmpl($src$$Register, $mem$$Address);
16199 %}
16200 ins_pipe(ialu_cr_reg_mem);
16201 %}
16202
16203 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16204 match(Set cr (CmpN op1 op2));
16205
16206 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16207 ins_encode %{
16208 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16209 %}
16210 ins_pipe(ialu_cr_reg_imm);
16211 %}
16212
16213 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16214 %{
16215 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16216 match(Set cr (CmpN src (LoadN mem)));
16217
16218 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16219 ins_encode %{
16220 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16221 %}
16222 ins_pipe(ialu_cr_reg_mem);
16223 %}
16224
16225 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16226 match(Set cr (CmpN op1 op2));
16227
16228 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16229 ins_encode %{
16230 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16231 %}
16232 ins_pipe(ialu_cr_reg_imm);
16233 %}
16234
16235 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16236 %{
16237 predicate(!UseCompactObjectHeaders);
16238 match(Set cr (CmpN src (LoadNKlass mem)));
16239
16240 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16241 ins_encode %{
16242 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16243 %}
16244 ins_pipe(ialu_cr_reg_mem);
16245 %}
16246
16247 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16248 match(Set cr (CmpN src zero));
16249
16250 format %{ "testl $src, $src\t# compressed ptr" %}
16251 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16252 ins_pipe(ialu_cr_reg_imm);
16253 %}
16254
16255 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16256 %{
16257 predicate(CompressedOops::base() != nullptr &&
16258 n->in(1)->as_Load()->barrier_data() == 0);
16259 match(Set cr (CmpN (LoadN mem) zero));
16260
16261 ins_cost(500); // XXX
16262 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16263 ins_encode %{
16264 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16265 %}
16266 ins_pipe(ialu_cr_reg_mem);
16267 %}
16268
16269 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16270 %{
16271 predicate(CompressedOops::base() == nullptr &&
16272 n->in(1)->as_Load()->barrier_data() == 0);
16273 match(Set cr (CmpN (LoadN mem) zero));
16274
16275 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16276 ins_encode %{
16277 __ cmpl(r12, $mem$$Address);
16278 %}
16279 ins_pipe(ialu_cr_reg_mem);
16280 %}
16281
16282 // Yanked all unsigned pointer compare operations.
16283 // Pointer compares are done with CmpP which is already unsigned.
16284
16285 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16286 %{
16287 match(Set cr (CmpL op1 op2));
16288
16289 format %{ "cmpq $op1, $op2" %}
16290 ins_encode %{
16291 __ cmpq($op1$$Register, $op2$$Register);
16292 %}
16293 ins_pipe(ialu_cr_reg_reg);
16294 %}
16295
16296 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16297 %{
16298 match(Set cr (CmpL op1 op2));
16299
16300 format %{ "cmpq $op1, $op2" %}
16301 ins_encode %{
16302 __ cmpq($op1$$Register, $op2$$constant);
16303 %}
16304 ins_pipe(ialu_cr_reg_imm);
16305 %}
16306
16307 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16308 %{
16309 match(Set cr (CmpL op1 (LoadL op2)));
16310
16311 format %{ "cmpq $op1, $op2" %}
16312 ins_encode %{
16313 __ cmpq($op1$$Register, $op2$$Address);
16314 %}
16315 ins_pipe(ialu_cr_reg_mem);
16316 %}
16317
16318 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16319 %{
16320 match(Set cr (CmpL src zero));
16321
16322 format %{ "testq $src, $src" %}
16323 ins_encode %{
16324 __ testq($src$$Register, $src$$Register);
16325 %}
16326 ins_pipe(ialu_cr_reg_imm);
16327 %}
16328
16329 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16330 %{
16331 match(Set cr (CmpL (AndL src con) zero));
16332
16333 format %{ "testq $src, $con\t# long" %}
16334 ins_encode %{
16335 __ testq($src$$Register, $con$$constant);
16336 %}
16337 ins_pipe(ialu_cr_reg_imm);
16338 %}
16339
16340 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16341 %{
16342 match(Set cr (CmpL (AndL src1 src2) zero));
16343
16344 format %{ "testq $src1, $src2\t# long" %}
16345 ins_encode %{
16346 __ testq($src1$$Register, $src2$$Register);
16347 %}
16348 ins_pipe(ialu_cr_reg_imm);
16349 %}
16350
16351 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16352 %{
16353 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16354
16355 format %{ "testq $src, $mem" %}
16356 ins_encode %{
16357 __ testq($src$$Register, $mem$$Address);
16358 %}
16359 ins_pipe(ialu_cr_reg_mem);
16360 %}
16361
16362 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16363 %{
16364 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16365
16366 format %{ "testq $src, $mem" %}
16367 ins_encode %{
16368 __ testq($src$$Register, $mem$$Address);
16369 %}
16370 ins_pipe(ialu_cr_reg_mem);
16371 %}
16372
16373 // Manifest a CmpU result in an integer register. Very painful.
16374 // This is the test to avoid.
16375 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16376 %{
16377 match(Set dst (CmpU3 src1 src2));
16378 effect(KILL flags);
16379
16380 ins_cost(275); // XXX
16381 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16382 "movl $dst, -1\n\t"
16383 "jb,u done\n\t"
16384 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16385 "done:" %}
16386 ins_encode %{
16387 Label done;
16388 __ cmpl($src1$$Register, $src2$$Register);
16389 __ movl($dst$$Register, -1);
16390 __ jccb(Assembler::below, done);
16391 __ setcc(Assembler::notZero, $dst$$Register);
16392 __ bind(done);
16393 %}
16394 ins_pipe(pipe_slow);
16395 %}
16396
16397 // Manifest a CmpL result in an integer register. Very painful.
16398 // This is the test to avoid.
16399 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16400 %{
16401 match(Set dst (CmpL3 src1 src2));
16402 effect(KILL flags);
16403
16404 ins_cost(275); // XXX
16405 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16406 "movl $dst, -1\n\t"
16407 "jl,s done\n\t"
16408 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16409 "done:" %}
16410 ins_encode %{
16411 Label done;
16412 __ cmpq($src1$$Register, $src2$$Register);
16413 __ movl($dst$$Register, -1);
16414 __ jccb(Assembler::less, done);
16415 __ setcc(Assembler::notZero, $dst$$Register);
16416 __ bind(done);
16417 %}
16418 ins_pipe(pipe_slow);
16419 %}
16420
16421 // Manifest a CmpUL result in an integer register. Very painful.
16422 // This is the test to avoid.
16423 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16424 %{
16425 match(Set dst (CmpUL3 src1 src2));
16426 effect(KILL flags);
16427
16428 ins_cost(275); // XXX
16429 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16430 "movl $dst, -1\n\t"
16431 "jb,u done\n\t"
16432 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16433 "done:" %}
16434 ins_encode %{
16435 Label done;
16436 __ cmpq($src1$$Register, $src2$$Register);
16437 __ movl($dst$$Register, -1);
16438 __ jccb(Assembler::below, done);
16439 __ setcc(Assembler::notZero, $dst$$Register);
16440 __ bind(done);
16441 %}
16442 ins_pipe(pipe_slow);
16443 %}
16444
16445 // Unsigned long compare Instructions; really, same as signed long except they
16446 // produce an rFlagsRegU instead of rFlagsReg.
16447 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16448 %{
16449 match(Set cr (CmpUL op1 op2));
16450
16451 format %{ "cmpq $op1, $op2\t# unsigned" %}
16452 ins_encode %{
16453 __ cmpq($op1$$Register, $op2$$Register);
16454 %}
16455 ins_pipe(ialu_cr_reg_reg);
16456 %}
16457
16458 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16459 %{
16460 match(Set cr (CmpUL op1 op2));
16461
16462 format %{ "cmpq $op1, $op2\t# unsigned" %}
16463 ins_encode %{
16464 __ cmpq($op1$$Register, $op2$$constant);
16465 %}
16466 ins_pipe(ialu_cr_reg_imm);
16467 %}
16468
16469 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16470 %{
16471 match(Set cr (CmpUL op1 (LoadL op2)));
16472
16473 format %{ "cmpq $op1, $op2\t# unsigned" %}
16474 ins_encode %{
16475 __ cmpq($op1$$Register, $op2$$Address);
16476 %}
16477 ins_pipe(ialu_cr_reg_mem);
16478 %}
16479
16480 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16481 %{
16482 match(Set cr (CmpUL src zero));
16483
16484 format %{ "testq $src, $src\t# unsigned" %}
16485 ins_encode %{
16486 __ testq($src$$Register, $src$$Register);
16487 %}
16488 ins_pipe(ialu_cr_reg_imm);
16489 %}
16490
16491 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16492 %{
16493 match(Set cr (CmpI (LoadB mem) imm));
16494
16495 ins_cost(125);
16496 format %{ "cmpb $mem, $imm" %}
16497 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16498 ins_pipe(ialu_cr_reg_mem);
16499 %}
16500
16501 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16502 %{
16503 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16504
16505 ins_cost(125);
16506 format %{ "testb $mem, $imm\t# ubyte" %}
16507 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16508 ins_pipe(ialu_cr_reg_mem);
16509 %}
16510
16511 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16512 %{
16513 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16514
16515 ins_cost(125);
16516 format %{ "testb $mem, $imm\t# byte" %}
16517 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16518 ins_pipe(ialu_cr_reg_mem);
16519 %}
16520
16521 //----------Max and Min--------------------------------------------------------
16522 // Min Instructions
16523
16524 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16525 %{
16526 predicate(!UseAPX);
16527 effect(USE_DEF dst, USE src, USE cr);
16528
16529 format %{ "cmovlgt $dst, $src\t# min" %}
16530 ins_encode %{
16531 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16532 %}
16533 ins_pipe(pipe_cmov_reg);
16534 %}
16535
16536 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16537 %{
16538 predicate(UseAPX);
16539 effect(DEF dst, USE src1, USE src2, USE cr);
16540
16541 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16542 ins_encode %{
16543 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16544 %}
16545 ins_pipe(pipe_cmov_reg);
16546 %}
16547
16548 instruct minI_rReg(rRegI dst, rRegI src)
16549 %{
16550 predicate(!UseAPX);
16551 match(Set dst (MinI dst src));
16552
16553 ins_cost(200);
16554 expand %{
16555 rFlagsReg cr;
16556 compI_rReg(cr, dst, src);
16557 cmovI_reg_g(dst, src, cr);
16558 %}
16559 %}
16560
16561 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16562 %{
16563 predicate(UseAPX);
16564 match(Set dst (MinI src1 src2));
16565 effect(DEF dst, USE src1, USE src2);
16566
16567 ins_cost(200);
16568 expand %{
16569 rFlagsReg cr;
16570 compI_rReg(cr, src1, src2);
16571 cmovI_reg_g_ndd(dst, src1, src2, cr);
16572 %}
16573 %}
16574
16575 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16576 %{
16577 predicate(!UseAPX);
16578 effect(USE_DEF dst, USE src, USE cr);
16579
16580 format %{ "cmovllt $dst, $src\t# max" %}
16581 ins_encode %{
16582 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16583 %}
16584 ins_pipe(pipe_cmov_reg);
16585 %}
16586
16587 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16588 %{
16589 predicate(UseAPX);
16590 effect(DEF dst, USE src1, USE src2, USE cr);
16591
16592 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16593 ins_encode %{
16594 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16595 %}
16596 ins_pipe(pipe_cmov_reg);
16597 %}
16598
16599 instruct maxI_rReg(rRegI dst, rRegI src)
16600 %{
16601 predicate(!UseAPX);
16602 match(Set dst (MaxI dst src));
16603
16604 ins_cost(200);
16605 expand %{
16606 rFlagsReg cr;
16607 compI_rReg(cr, dst, src);
16608 cmovI_reg_l(dst, src, cr);
16609 %}
16610 %}
16611
16612 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16613 %{
16614 predicate(UseAPX);
16615 match(Set dst (MaxI src1 src2));
16616 effect(DEF dst, USE src1, USE src2);
16617
16618 ins_cost(200);
16619 expand %{
16620 rFlagsReg cr;
16621 compI_rReg(cr, src1, src2);
16622 cmovI_reg_l_ndd(dst, src1, src2, cr);
16623 %}
16624 %}
16625
16626 // ============================================================================
16627 // Branch Instructions
16628
16629 // Jump Direct - Label defines a relative address from JMP+1
16630 instruct jmpDir(label labl)
16631 %{
16632 match(Goto);
16633 effect(USE labl);
16634
16635 ins_cost(300);
16636 format %{ "jmp $labl" %}
16637 size(5);
16638 ins_encode %{
16639 Label* L = $labl$$label;
16640 __ jmp(*L, false); // Always long jump
16641 %}
16642 ins_pipe(pipe_jmp);
16643 %}
16644
16645 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16646 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16647 %{
16648 match(If cop cr);
16649 effect(USE labl);
16650
16651 ins_cost(300);
16652 format %{ "j$cop $labl" %}
16653 size(6);
16654 ins_encode %{
16655 Label* L = $labl$$label;
16656 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16657 %}
16658 ins_pipe(pipe_jcc);
16659 %}
16660
16661 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16662 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16663 %{
16664 match(CountedLoopEnd cop cr);
16665 effect(USE labl);
16666
16667 ins_cost(300);
16668 format %{ "j$cop $labl\t# loop end" %}
16669 size(6);
16670 ins_encode %{
16671 Label* L = $labl$$label;
16672 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16673 %}
16674 ins_pipe(pipe_jcc);
16675 %}
16676
16677 // Jump Direct Conditional - using unsigned comparison
16678 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16679 match(If cop cmp);
16680 effect(USE labl);
16681
16682 ins_cost(300);
16683 format %{ "j$cop,u $labl" %}
16684 size(6);
16685 ins_encode %{
16686 Label* L = $labl$$label;
16687 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16688 %}
16689 ins_pipe(pipe_jcc);
16690 %}
16691
16692 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16693 match(If cop cmp);
16694 effect(USE labl);
16695
16696 ins_cost(200);
16697 format %{ "j$cop,u $labl" %}
16698 size(6);
16699 ins_encode %{
16700 Label* L = $labl$$label;
16701 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16702 %}
16703 ins_pipe(pipe_jcc);
16704 %}
16705
16706 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16707 match(If cop cmp);
16708 effect(USE labl);
16709
16710 ins_cost(200);
16711 format %{ $$template
16712 if ($cop$$cmpcode == Assembler::notEqual) {
16713 $$emit$$"jp,u $labl\n\t"
16714 $$emit$$"j$cop,u $labl"
16715 } else {
16716 $$emit$$"jp,u done\n\t"
16717 $$emit$$"j$cop,u $labl\n\t"
16718 $$emit$$"done:"
16719 }
16720 %}
16721 ins_encode %{
16722 Label* l = $labl$$label;
16723 if ($cop$$cmpcode == Assembler::notEqual) {
16724 __ jcc(Assembler::parity, *l, false);
16725 __ jcc(Assembler::notEqual, *l, false);
16726 } else if ($cop$$cmpcode == Assembler::equal) {
16727 Label done;
16728 __ jccb(Assembler::parity, done);
16729 __ jcc(Assembler::equal, *l, false);
16730 __ bind(done);
16731 } else {
16732 ShouldNotReachHere();
16733 }
16734 %}
16735 ins_pipe(pipe_jcc);
16736 %}
16737
16738 // ============================================================================
16739 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16740 // superklass array for an instance of the superklass. Set a hidden
16741 // internal cache on a hit (cache is checked with exposed code in
16742 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16743 // encoding ALSO sets flags.
16744
16745 instruct partialSubtypeCheck(rdi_RegP result,
16746 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16747 rFlagsReg cr)
16748 %{
16749 match(Set result (PartialSubtypeCheck sub super));
16750 predicate(!UseSecondarySupersTable);
16751 effect(KILL rcx, KILL cr);
16752
16753 ins_cost(1100); // slightly larger than the next version
16754 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16755 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16756 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16757 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16758 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16759 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16760 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16761 "miss:\t" %}
16762
16763 ins_encode %{
16764 Label miss;
16765 // NB: Callers may assume that, when $result is a valid register,
16766 // check_klass_subtype_slow_path_linear sets it to a nonzero
16767 // value.
16768 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16769 $rcx$$Register, $result$$Register,
16770 nullptr, &miss,
16771 /*set_cond_codes:*/ true);
16772 __ xorptr($result$$Register, $result$$Register);
16773 __ bind(miss);
16774 %}
16775
16776 ins_pipe(pipe_slow);
16777 %}
16778
16779 // ============================================================================
16780 // Two versions of hashtable-based partialSubtypeCheck, both used when
16781 // we need to search for a super class in the secondary supers array.
16782 // The first is used when we don't know _a priori_ the class being
16783 // searched for. The second, far more common, is used when we do know:
16784 // this is used for instanceof, checkcast, and any case where C2 can
16785 // determine it by constant propagation.
16786
16787 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16788 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16789 rFlagsReg cr)
16790 %{
16791 match(Set result (PartialSubtypeCheck sub super));
16792 predicate(UseSecondarySupersTable);
16793 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16794
16795 ins_cost(1000);
16796 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16797
16798 ins_encode %{
16799 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16800 $temp3$$Register, $temp4$$Register, $result$$Register);
16801 %}
16802
16803 ins_pipe(pipe_slow);
16804 %}
16805
16806 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16807 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16808 rFlagsReg cr)
16809 %{
16810 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16811 predicate(UseSecondarySupersTable);
16812 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16813
16814 ins_cost(700); // smaller than the next version
16815 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16816
16817 ins_encode %{
16818 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16819 if (InlineSecondarySupersTest) {
16820 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16821 $temp3$$Register, $temp4$$Register, $result$$Register,
16822 super_klass_slot);
16823 } else {
16824 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16825 }
16826 %}
16827
16828 ins_pipe(pipe_slow);
16829 %}
16830
16831 // ============================================================================
16832 // Branch Instructions -- short offset versions
16833 //
16834 // These instructions are used to replace jumps of a long offset (the default
16835 // match) with jumps of a shorter offset. These instructions are all tagged
16836 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16837 // match rules in general matching. Instead, the ADLC generates a conversion
16838 // method in the MachNode which can be used to do in-place replacement of the
16839 // long variant with the shorter variant. The compiler will determine if a
16840 // branch can be taken by the is_short_branch_offset() predicate in the machine
16841 // specific code section of the file.
16842
16843 // Jump Direct - Label defines a relative address from JMP+1
16844 instruct jmpDir_short(label labl) %{
16845 match(Goto);
16846 effect(USE labl);
16847
16848 ins_cost(300);
16849 format %{ "jmp,s $labl" %}
16850 size(2);
16851 ins_encode %{
16852 Label* L = $labl$$label;
16853 __ jmpb(*L);
16854 %}
16855 ins_pipe(pipe_jmp);
16856 ins_short_branch(1);
16857 %}
16858
16859 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16860 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16861 match(If cop cr);
16862 effect(USE labl);
16863
16864 ins_cost(300);
16865 format %{ "j$cop,s $labl" %}
16866 size(2);
16867 ins_encode %{
16868 Label* L = $labl$$label;
16869 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16870 %}
16871 ins_pipe(pipe_jcc);
16872 ins_short_branch(1);
16873 %}
16874
16875 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16876 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16877 match(CountedLoopEnd cop cr);
16878 effect(USE labl);
16879
16880 ins_cost(300);
16881 format %{ "j$cop,s $labl\t# loop end" %}
16882 size(2);
16883 ins_encode %{
16884 Label* L = $labl$$label;
16885 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16886 %}
16887 ins_pipe(pipe_jcc);
16888 ins_short_branch(1);
16889 %}
16890
16891 // Jump Direct Conditional - using unsigned comparison
16892 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16893 match(If cop cmp);
16894 effect(USE labl);
16895
16896 ins_cost(300);
16897 format %{ "j$cop,us $labl" %}
16898 size(2);
16899 ins_encode %{
16900 Label* L = $labl$$label;
16901 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16902 %}
16903 ins_pipe(pipe_jcc);
16904 ins_short_branch(1);
16905 %}
16906
16907 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16908 match(If cop cmp);
16909 effect(USE labl);
16910
16911 ins_cost(300);
16912 format %{ "j$cop,us $labl" %}
16913 size(2);
16914 ins_encode %{
16915 Label* L = $labl$$label;
16916 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16917 %}
16918 ins_pipe(pipe_jcc);
16919 ins_short_branch(1);
16920 %}
16921
16922 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16923 match(If cop cmp);
16924 effect(USE labl);
16925
16926 ins_cost(300);
16927 format %{ $$template
16928 if ($cop$$cmpcode == Assembler::notEqual) {
16929 $$emit$$"jp,u,s $labl\n\t"
16930 $$emit$$"j$cop,u,s $labl"
16931 } else {
16932 $$emit$$"jp,u,s done\n\t"
16933 $$emit$$"j$cop,u,s $labl\n\t"
16934 $$emit$$"done:"
16935 }
16936 %}
16937 size(4);
16938 ins_encode %{
16939 Label* l = $labl$$label;
16940 if ($cop$$cmpcode == Assembler::notEqual) {
16941 __ jccb(Assembler::parity, *l);
16942 __ jccb(Assembler::notEqual, *l);
16943 } else if ($cop$$cmpcode == Assembler::equal) {
16944 Label done;
16945 __ jccb(Assembler::parity, done);
16946 __ jccb(Assembler::equal, *l);
16947 __ bind(done);
16948 } else {
16949 ShouldNotReachHere();
16950 }
16951 %}
16952 ins_pipe(pipe_jcc);
16953 ins_short_branch(1);
16954 %}
16955
16956 // ============================================================================
16957 // inlined locking and unlocking
16958
16959 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16960 match(Set cr (FastLock object box));
16961 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16962 ins_cost(300);
16963 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16964 ins_encode %{
16965 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16966 %}
16967 ins_pipe(pipe_slow);
16968 %}
16969
16970 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16971 match(Set cr (FastUnlock object rax_reg));
16972 effect(TEMP tmp, USE_KILL rax_reg);
16973 ins_cost(300);
16974 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16975 ins_encode %{
16976 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16977 %}
16978 ins_pipe(pipe_slow);
16979 %}
16980
16981
16982 // ============================================================================
16983 // Safepoint Instructions
16984 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16985 %{
16986 match(SafePoint poll);
16987 effect(KILL cr, USE poll);
16988
16989 format %{ "testl rax, [$poll]\t"
16990 "# Safepoint: poll for GC" %}
16991 ins_cost(125);
16992 ins_encode %{
16993 __ relocate(relocInfo::poll_type);
16994 address pre_pc = __ pc();
16995 __ testl(rax, Address($poll$$Register, 0));
16996 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16997 %}
16998 ins_pipe(ialu_reg_mem);
16999 %}
17000
17001 instruct mask_all_evexL(kReg dst, rRegL src) %{
17002 match(Set dst (MaskAll src));
17003 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17004 ins_encode %{
17005 int mask_len = Matcher::vector_length(this);
17006 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17007 %}
17008 ins_pipe( pipe_slow );
17009 %}
17010
17011 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17012 predicate(Matcher::vector_length(n) > 32);
17013 match(Set dst (MaskAll src));
17014 effect(TEMP tmp);
17015 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17016 ins_encode %{
17017 int mask_len = Matcher::vector_length(this);
17018 __ movslq($tmp$$Register, $src$$Register);
17019 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17020 %}
17021 ins_pipe( pipe_slow );
17022 %}
17023
17024 // ============================================================================
17025 // Procedure Call/Return Instructions
17026 // Call Java Static Instruction
17027 // Note: If this code changes, the corresponding ret_addr_offset() and
17028 // compute_padding() functions will have to be adjusted.
17029 instruct CallStaticJavaDirect(method meth) %{
17030 match(CallStaticJava);
17031 effect(USE meth);
17032
17033 ins_cost(300);
17034 format %{ "call,static " %}
17035 opcode(0xE8); /* E8 cd */
17036 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17037 ins_pipe(pipe_slow);
17038 ins_alignment(4);
17039 %}
17040
17041 // Call Java Dynamic Instruction
17042 // Note: If this code changes, the corresponding ret_addr_offset() and
17043 // compute_padding() functions will have to be adjusted.
17044 instruct CallDynamicJavaDirect(method meth)
17045 %{
17046 match(CallDynamicJava);
17047 effect(USE meth);
17048
17049 ins_cost(300);
17050 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17051 "call,dynamic " %}
17052 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17053 ins_pipe(pipe_slow);
17054 ins_alignment(4);
17055 %}
17056
17057 // Call Runtime Instruction
17058 instruct CallRuntimeDirect(method meth)
17059 %{
17060 match(CallRuntime);
17061 effect(USE meth);
17062
17063 ins_cost(300);
17064 format %{ "call,runtime " %}
17065 ins_encode(clear_avx, Java_To_Runtime(meth));
17066 ins_pipe(pipe_slow);
17067 %}
17068
17069 // Call runtime without safepoint
17070 instruct CallLeafDirect(method meth)
17071 %{
17072 match(CallLeaf);
17073 effect(USE meth);
17074
17075 ins_cost(300);
17076 format %{ "call_leaf,runtime " %}
17077 ins_encode(clear_avx, Java_To_Runtime(meth));
17078 ins_pipe(pipe_slow);
17079 %}
17080
17081 // Call runtime without safepoint and with vector arguments
17082 instruct CallLeafDirectVector(method meth)
17083 %{
17084 match(CallLeafVector);
17085 effect(USE meth);
17086
17087 ins_cost(300);
17088 format %{ "call_leaf,vector " %}
17089 ins_encode(Java_To_Runtime(meth));
17090 ins_pipe(pipe_slow);
17091 %}
17092
17093 // Call runtime without safepoint
17094 instruct CallLeafNoFPDirect(method meth)
17095 %{
17096 match(CallLeafNoFP);
17097 effect(USE meth);
17098
17099 ins_cost(300);
17100 format %{ "call_leaf_nofp,runtime " %}
17101 ins_encode(clear_avx, Java_To_Runtime(meth));
17102 ins_pipe(pipe_slow);
17103 %}
17104
17105 // Return Instruction
17106 // Remove the return address & jump to it.
17107 // Notice: We always emit a nop after a ret to make sure there is room
17108 // for safepoint patching
17109 instruct Ret()
17110 %{
17111 match(Return);
17112
17113 format %{ "ret" %}
17114 ins_encode %{
17115 __ ret(0);
17116 %}
17117 ins_pipe(pipe_jmp);
17118 %}
17119
17120 // Tail Call; Jump from runtime stub to Java code.
17121 // Also known as an 'interprocedural jump'.
17122 // Target of jump will eventually return to caller.
17123 // TailJump below removes the return address.
17124 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17125 // emitted just above the TailCall which has reset rbp to the caller state.
17126 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17127 %{
17128 match(TailCall jump_target method_ptr);
17129
17130 ins_cost(300);
17131 format %{ "jmp $jump_target\t# rbx holds method" %}
17132 ins_encode %{
17133 __ jmp($jump_target$$Register);
17134 %}
17135 ins_pipe(pipe_jmp);
17136 %}
17137
17138 // Tail Jump; remove the return address; jump to target.
17139 // TailCall above leaves the return address around.
17140 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17141 %{
17142 match(TailJump jump_target ex_oop);
17143
17144 ins_cost(300);
17145 format %{ "popq rdx\t# pop return address\n\t"
17146 "jmp $jump_target" %}
17147 ins_encode %{
17148 __ popq(as_Register(RDX_enc));
17149 __ jmp($jump_target$$Register);
17150 %}
17151 ins_pipe(pipe_jmp);
17152 %}
17153
17154 // Forward exception.
17155 instruct ForwardExceptionjmp()
17156 %{
17157 match(ForwardException);
17158
17159 format %{ "jmp forward_exception_stub" %}
17160 ins_encode %{
17161 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17162 %}
17163 ins_pipe(pipe_jmp);
17164 %}
17165
17166 // Create exception oop: created by stack-crawling runtime code.
17167 // Created exception is now available to this handler, and is setup
17168 // just prior to jumping to this handler. No code emitted.
17169 instruct CreateException(rax_RegP ex_oop)
17170 %{
17171 match(Set ex_oop (CreateEx));
17172
17173 size(0);
17174 // use the following format syntax
17175 format %{ "# exception oop is in rax; no code emitted" %}
17176 ins_encode();
17177 ins_pipe(empty);
17178 %}
17179
17180 // Rethrow exception:
17181 // The exception oop will come in the first argument position.
17182 // Then JUMP (not call) to the rethrow stub code.
17183 instruct RethrowException()
17184 %{
17185 match(Rethrow);
17186
17187 // use the following format syntax
17188 format %{ "jmp rethrow_stub" %}
17189 ins_encode %{
17190 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17191 %}
17192 ins_pipe(pipe_jmp);
17193 %}
17194
17195 // ============================================================================
17196 // This name is KNOWN by the ADLC and cannot be changed.
17197 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17198 // for this guy.
17199 instruct tlsLoadP(r15_RegP dst) %{
17200 match(Set dst (ThreadLocal));
17201 effect(DEF dst);
17202
17203 size(0);
17204 format %{ "# TLS is in R15" %}
17205 ins_encode( /*empty encoding*/ );
17206 ins_pipe(ialu_reg_reg);
17207 %}
17208
17209 instruct addF_reg(regF dst, regF src) %{
17210 predicate(UseAVX == 0);
17211 match(Set dst (AddF dst src));
17212
17213 format %{ "addss $dst, $src" %}
17214 ins_cost(150);
17215 ins_encode %{
17216 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17217 %}
17218 ins_pipe(pipe_slow);
17219 %}
17220
17221 instruct addF_mem(regF dst, memory src) %{
17222 predicate(UseAVX == 0);
17223 match(Set dst (AddF dst (LoadF src)));
17224
17225 format %{ "addss $dst, $src" %}
17226 ins_cost(150);
17227 ins_encode %{
17228 __ addss($dst$$XMMRegister, $src$$Address);
17229 %}
17230 ins_pipe(pipe_slow);
17231 %}
17232
17233 instruct addF_imm(regF dst, immF con) %{
17234 predicate(UseAVX == 0);
17235 match(Set dst (AddF dst con));
17236 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17237 ins_cost(150);
17238 ins_encode %{
17239 __ addss($dst$$XMMRegister, $constantaddress($con));
17240 %}
17241 ins_pipe(pipe_slow);
17242 %}
17243
17244 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17245 predicate(UseAVX > 0);
17246 match(Set dst (AddF src1 src2));
17247
17248 format %{ "vaddss $dst, $src1, $src2" %}
17249 ins_cost(150);
17250 ins_encode %{
17251 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17252 %}
17253 ins_pipe(pipe_slow);
17254 %}
17255
17256 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17257 predicate(UseAVX > 0);
17258 match(Set dst (AddF src1 (LoadF src2)));
17259
17260 format %{ "vaddss $dst, $src1, $src2" %}
17261 ins_cost(150);
17262 ins_encode %{
17263 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17264 %}
17265 ins_pipe(pipe_slow);
17266 %}
17267
17268 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17269 predicate(UseAVX > 0);
17270 match(Set dst (AddF src con));
17271
17272 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17273 ins_cost(150);
17274 ins_encode %{
17275 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17276 %}
17277 ins_pipe(pipe_slow);
17278 %}
17279
17280 instruct addD_reg(regD dst, regD src) %{
17281 predicate(UseAVX == 0);
17282 match(Set dst (AddD dst src));
17283
17284 format %{ "addsd $dst, $src" %}
17285 ins_cost(150);
17286 ins_encode %{
17287 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17288 %}
17289 ins_pipe(pipe_slow);
17290 %}
17291
17292 instruct addD_mem(regD dst, memory src) %{
17293 predicate(UseAVX == 0);
17294 match(Set dst (AddD dst (LoadD src)));
17295
17296 format %{ "addsd $dst, $src" %}
17297 ins_cost(150);
17298 ins_encode %{
17299 __ addsd($dst$$XMMRegister, $src$$Address);
17300 %}
17301 ins_pipe(pipe_slow);
17302 %}
17303
17304 instruct addD_imm(regD dst, immD con) %{
17305 predicate(UseAVX == 0);
17306 match(Set dst (AddD dst con));
17307 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17308 ins_cost(150);
17309 ins_encode %{
17310 __ addsd($dst$$XMMRegister, $constantaddress($con));
17311 %}
17312 ins_pipe(pipe_slow);
17313 %}
17314
17315 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17316 predicate(UseAVX > 0);
17317 match(Set dst (AddD src1 src2));
17318
17319 format %{ "vaddsd $dst, $src1, $src2" %}
17320 ins_cost(150);
17321 ins_encode %{
17322 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17323 %}
17324 ins_pipe(pipe_slow);
17325 %}
17326
17327 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17328 predicate(UseAVX > 0);
17329 match(Set dst (AddD src1 (LoadD src2)));
17330
17331 format %{ "vaddsd $dst, $src1, $src2" %}
17332 ins_cost(150);
17333 ins_encode %{
17334 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17335 %}
17336 ins_pipe(pipe_slow);
17337 %}
17338
17339 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17340 predicate(UseAVX > 0);
17341 match(Set dst (AddD src con));
17342
17343 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17344 ins_cost(150);
17345 ins_encode %{
17346 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17347 %}
17348 ins_pipe(pipe_slow);
17349 %}
17350
17351 instruct subF_reg(regF dst, regF src) %{
17352 predicate(UseAVX == 0);
17353 match(Set dst (SubF dst src));
17354
17355 format %{ "subss $dst, $src" %}
17356 ins_cost(150);
17357 ins_encode %{
17358 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17359 %}
17360 ins_pipe(pipe_slow);
17361 %}
17362
17363 instruct subF_mem(regF dst, memory src) %{
17364 predicate(UseAVX == 0);
17365 match(Set dst (SubF dst (LoadF src)));
17366
17367 format %{ "subss $dst, $src" %}
17368 ins_cost(150);
17369 ins_encode %{
17370 __ subss($dst$$XMMRegister, $src$$Address);
17371 %}
17372 ins_pipe(pipe_slow);
17373 %}
17374
17375 instruct subF_imm(regF dst, immF con) %{
17376 predicate(UseAVX == 0);
17377 match(Set dst (SubF dst con));
17378 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17379 ins_cost(150);
17380 ins_encode %{
17381 __ subss($dst$$XMMRegister, $constantaddress($con));
17382 %}
17383 ins_pipe(pipe_slow);
17384 %}
17385
17386 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17387 predicate(UseAVX > 0);
17388 match(Set dst (SubF src1 src2));
17389
17390 format %{ "vsubss $dst, $src1, $src2" %}
17391 ins_cost(150);
17392 ins_encode %{
17393 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17394 %}
17395 ins_pipe(pipe_slow);
17396 %}
17397
17398 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17399 predicate(UseAVX > 0);
17400 match(Set dst (SubF src1 (LoadF src2)));
17401
17402 format %{ "vsubss $dst, $src1, $src2" %}
17403 ins_cost(150);
17404 ins_encode %{
17405 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17406 %}
17407 ins_pipe(pipe_slow);
17408 %}
17409
17410 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17411 predicate(UseAVX > 0);
17412 match(Set dst (SubF src con));
17413
17414 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17415 ins_cost(150);
17416 ins_encode %{
17417 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17418 %}
17419 ins_pipe(pipe_slow);
17420 %}
17421
17422 instruct subD_reg(regD dst, regD src) %{
17423 predicate(UseAVX == 0);
17424 match(Set dst (SubD dst src));
17425
17426 format %{ "subsd $dst, $src" %}
17427 ins_cost(150);
17428 ins_encode %{
17429 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17430 %}
17431 ins_pipe(pipe_slow);
17432 %}
17433
17434 instruct subD_mem(regD dst, memory src) %{
17435 predicate(UseAVX == 0);
17436 match(Set dst (SubD dst (LoadD src)));
17437
17438 format %{ "subsd $dst, $src" %}
17439 ins_cost(150);
17440 ins_encode %{
17441 __ subsd($dst$$XMMRegister, $src$$Address);
17442 %}
17443 ins_pipe(pipe_slow);
17444 %}
17445
17446 instruct subD_imm(regD dst, immD con) %{
17447 predicate(UseAVX == 0);
17448 match(Set dst (SubD dst con));
17449 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17450 ins_cost(150);
17451 ins_encode %{
17452 __ subsd($dst$$XMMRegister, $constantaddress($con));
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17458 predicate(UseAVX > 0);
17459 match(Set dst (SubD src1 src2));
17460
17461 format %{ "vsubsd $dst, $src1, $src2" %}
17462 ins_cost(150);
17463 ins_encode %{
17464 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17465 %}
17466 ins_pipe(pipe_slow);
17467 %}
17468
17469 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17470 predicate(UseAVX > 0);
17471 match(Set dst (SubD src1 (LoadD src2)));
17472
17473 format %{ "vsubsd $dst, $src1, $src2" %}
17474 ins_cost(150);
17475 ins_encode %{
17476 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17477 %}
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17482 predicate(UseAVX > 0);
17483 match(Set dst (SubD src con));
17484
17485 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17486 ins_cost(150);
17487 ins_encode %{
17488 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 instruct mulF_reg(regF dst, regF src) %{
17494 predicate(UseAVX == 0);
17495 match(Set dst (MulF dst src));
17496
17497 format %{ "mulss $dst, $src" %}
17498 ins_cost(150);
17499 ins_encode %{
17500 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17501 %}
17502 ins_pipe(pipe_slow);
17503 %}
17504
17505 instruct mulF_mem(regF dst, memory src) %{
17506 predicate(UseAVX == 0);
17507 match(Set dst (MulF dst (LoadF src)));
17508
17509 format %{ "mulss $dst, $src" %}
17510 ins_cost(150);
17511 ins_encode %{
17512 __ mulss($dst$$XMMRegister, $src$$Address);
17513 %}
17514 ins_pipe(pipe_slow);
17515 %}
17516
17517 instruct mulF_imm(regF dst, immF con) %{
17518 predicate(UseAVX == 0);
17519 match(Set dst (MulF dst con));
17520 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17521 ins_cost(150);
17522 ins_encode %{
17523 __ mulss($dst$$XMMRegister, $constantaddress($con));
17524 %}
17525 ins_pipe(pipe_slow);
17526 %}
17527
17528 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17529 predicate(UseAVX > 0);
17530 match(Set dst (MulF src1 src2));
17531
17532 format %{ "vmulss $dst, $src1, $src2" %}
17533 ins_cost(150);
17534 ins_encode %{
17535 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17536 %}
17537 ins_pipe(pipe_slow);
17538 %}
17539
17540 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17541 predicate(UseAVX > 0);
17542 match(Set dst (MulF src1 (LoadF src2)));
17543
17544 format %{ "vmulss $dst, $src1, $src2" %}
17545 ins_cost(150);
17546 ins_encode %{
17547 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17553 predicate(UseAVX > 0);
17554 match(Set dst (MulF src con));
17555
17556 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17557 ins_cost(150);
17558 ins_encode %{
17559 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17560 %}
17561 ins_pipe(pipe_slow);
17562 %}
17563
17564 instruct mulD_reg(regD dst, regD src) %{
17565 predicate(UseAVX == 0);
17566 match(Set dst (MulD dst src));
17567
17568 format %{ "mulsd $dst, $src" %}
17569 ins_cost(150);
17570 ins_encode %{
17571 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17572 %}
17573 ins_pipe(pipe_slow);
17574 %}
17575
17576 instruct mulD_mem(regD dst, memory src) %{
17577 predicate(UseAVX == 0);
17578 match(Set dst (MulD dst (LoadD src)));
17579
17580 format %{ "mulsd $dst, $src" %}
17581 ins_cost(150);
17582 ins_encode %{
17583 __ mulsd($dst$$XMMRegister, $src$$Address);
17584 %}
17585 ins_pipe(pipe_slow);
17586 %}
17587
17588 instruct mulD_imm(regD dst, immD con) %{
17589 predicate(UseAVX == 0);
17590 match(Set dst (MulD dst con));
17591 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17592 ins_cost(150);
17593 ins_encode %{
17594 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17595 %}
17596 ins_pipe(pipe_slow);
17597 %}
17598
17599 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17600 predicate(UseAVX > 0);
17601 match(Set dst (MulD src1 src2));
17602
17603 format %{ "vmulsd $dst, $src1, $src2" %}
17604 ins_cost(150);
17605 ins_encode %{
17606 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17607 %}
17608 ins_pipe(pipe_slow);
17609 %}
17610
17611 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17612 predicate(UseAVX > 0);
17613 match(Set dst (MulD src1 (LoadD src2)));
17614
17615 format %{ "vmulsd $dst, $src1, $src2" %}
17616 ins_cost(150);
17617 ins_encode %{
17618 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17619 %}
17620 ins_pipe(pipe_slow);
17621 %}
17622
17623 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17624 predicate(UseAVX > 0);
17625 match(Set dst (MulD src con));
17626
17627 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17628 ins_cost(150);
17629 ins_encode %{
17630 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17631 %}
17632 ins_pipe(pipe_slow);
17633 %}
17634
17635 instruct divF_reg(regF dst, regF src) %{
17636 predicate(UseAVX == 0);
17637 match(Set dst (DivF dst src));
17638
17639 format %{ "divss $dst, $src" %}
17640 ins_cost(150);
17641 ins_encode %{
17642 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17643 %}
17644 ins_pipe(pipe_slow);
17645 %}
17646
17647 instruct divF_mem(regF dst, memory src) %{
17648 predicate(UseAVX == 0);
17649 match(Set dst (DivF dst (LoadF src)));
17650
17651 format %{ "divss $dst, $src" %}
17652 ins_cost(150);
17653 ins_encode %{
17654 __ divss($dst$$XMMRegister, $src$$Address);
17655 %}
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 instruct divF_imm(regF dst, immF con) %{
17660 predicate(UseAVX == 0);
17661 match(Set dst (DivF dst con));
17662 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17663 ins_cost(150);
17664 ins_encode %{
17665 __ divss($dst$$XMMRegister, $constantaddress($con));
17666 %}
17667 ins_pipe(pipe_slow);
17668 %}
17669
17670 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17671 predicate(UseAVX > 0);
17672 match(Set dst (DivF src1 src2));
17673
17674 format %{ "vdivss $dst, $src1, $src2" %}
17675 ins_cost(150);
17676 ins_encode %{
17677 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17678 %}
17679 ins_pipe(pipe_slow);
17680 %}
17681
17682 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17683 predicate(UseAVX > 0);
17684 match(Set dst (DivF src1 (LoadF src2)));
17685
17686 format %{ "vdivss $dst, $src1, $src2" %}
17687 ins_cost(150);
17688 ins_encode %{
17689 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17690 %}
17691 ins_pipe(pipe_slow);
17692 %}
17693
17694 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17695 predicate(UseAVX > 0);
17696 match(Set dst (DivF src con));
17697
17698 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17699 ins_cost(150);
17700 ins_encode %{
17701 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17702 %}
17703 ins_pipe(pipe_slow);
17704 %}
17705
17706 instruct divD_reg(regD dst, regD src) %{
17707 predicate(UseAVX == 0);
17708 match(Set dst (DivD dst src));
17709
17710 format %{ "divsd $dst, $src" %}
17711 ins_cost(150);
17712 ins_encode %{
17713 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17714 %}
17715 ins_pipe(pipe_slow);
17716 %}
17717
17718 instruct divD_mem(regD dst, memory src) %{
17719 predicate(UseAVX == 0);
17720 match(Set dst (DivD dst (LoadD src)));
17721
17722 format %{ "divsd $dst, $src" %}
17723 ins_cost(150);
17724 ins_encode %{
17725 __ divsd($dst$$XMMRegister, $src$$Address);
17726 %}
17727 ins_pipe(pipe_slow);
17728 %}
17729
17730 instruct divD_imm(regD dst, immD con) %{
17731 predicate(UseAVX == 0);
17732 match(Set dst (DivD dst con));
17733 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17734 ins_cost(150);
17735 ins_encode %{
17736 __ divsd($dst$$XMMRegister, $constantaddress($con));
17737 %}
17738 ins_pipe(pipe_slow);
17739 %}
17740
17741 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17742 predicate(UseAVX > 0);
17743 match(Set dst (DivD src1 src2));
17744
17745 format %{ "vdivsd $dst, $src1, $src2" %}
17746 ins_cost(150);
17747 ins_encode %{
17748 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17749 %}
17750 ins_pipe(pipe_slow);
17751 %}
17752
17753 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17754 predicate(UseAVX > 0);
17755 match(Set dst (DivD src1 (LoadD src2)));
17756
17757 format %{ "vdivsd $dst, $src1, $src2" %}
17758 ins_cost(150);
17759 ins_encode %{
17760 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17761 %}
17762 ins_pipe(pipe_slow);
17763 %}
17764
17765 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17766 predicate(UseAVX > 0);
17767 match(Set dst (DivD src con));
17768
17769 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17770 ins_cost(150);
17771 ins_encode %{
17772 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct absF_reg(regF dst) %{
17778 predicate(UseAVX == 0);
17779 match(Set dst (AbsF dst));
17780 ins_cost(150);
17781 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17782 ins_encode %{
17783 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17784 %}
17785 ins_pipe(pipe_slow);
17786 %}
17787
17788 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17789 predicate(UseAVX > 0);
17790 match(Set dst (AbsF src));
17791 ins_cost(150);
17792 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17793 ins_encode %{
17794 int vlen_enc = Assembler::AVX_128bit;
17795 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17796 ExternalAddress(float_signmask()), vlen_enc);
17797 %}
17798 ins_pipe(pipe_slow);
17799 %}
17800
17801 instruct absD_reg(regD dst) %{
17802 predicate(UseAVX == 0);
17803 match(Set dst (AbsD dst));
17804 ins_cost(150);
17805 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17806 "# abs double by sign masking" %}
17807 ins_encode %{
17808 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17809 %}
17810 ins_pipe(pipe_slow);
17811 %}
17812
17813 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17814 predicate(UseAVX > 0);
17815 match(Set dst (AbsD src));
17816 ins_cost(150);
17817 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17818 "# abs double by sign masking" %}
17819 ins_encode %{
17820 int vlen_enc = Assembler::AVX_128bit;
17821 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17822 ExternalAddress(double_signmask()), vlen_enc);
17823 %}
17824 ins_pipe(pipe_slow);
17825 %}
17826
17827 instruct negF_reg(regF dst) %{
17828 predicate(UseAVX == 0);
17829 match(Set dst (NegF dst));
17830 ins_cost(150);
17831 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17832 ins_encode %{
17833 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17839 predicate(UseAVX > 0);
17840 match(Set dst (NegF src));
17841 ins_cost(150);
17842 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17843 ins_encode %{
17844 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17845 ExternalAddress(float_signflip()));
17846 %}
17847 ins_pipe(pipe_slow);
17848 %}
17849
17850 instruct negD_reg(regD dst) %{
17851 predicate(UseAVX == 0);
17852 match(Set dst (NegD dst));
17853 ins_cost(150);
17854 format %{ "xorpd $dst, [0x8000000000000000]\t"
17855 "# neg double by sign flipping" %}
17856 ins_encode %{
17857 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17858 %}
17859 ins_pipe(pipe_slow);
17860 %}
17861
17862 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17863 predicate(UseAVX > 0);
17864 match(Set dst (NegD src));
17865 ins_cost(150);
17866 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17867 "# neg double by sign flipping" %}
17868 ins_encode %{
17869 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17870 ExternalAddress(double_signflip()));
17871 %}
17872 ins_pipe(pipe_slow);
17873 %}
17874
17875 // sqrtss instruction needs destination register to be pre initialized for best performance
17876 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17877 instruct sqrtF_reg(regF dst) %{
17878 match(Set dst (SqrtF dst));
17879 format %{ "sqrtss $dst, $dst" %}
17880 ins_encode %{
17881 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17882 %}
17883 ins_pipe(pipe_slow);
17884 %}
17885
17886 // sqrtsd instruction needs destination register to be pre initialized for best performance
17887 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17888 instruct sqrtD_reg(regD dst) %{
17889 match(Set dst (SqrtD dst));
17890 format %{ "sqrtsd $dst, $dst" %}
17891 ins_encode %{
17892 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17893 %}
17894 ins_pipe(pipe_slow);
17895 %}
17896
17897 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17898 effect(TEMP tmp);
17899 match(Set dst (ConvF2HF src));
17900 ins_cost(125);
17901 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17902 ins_encode %{
17903 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17904 %}
17905 ins_pipe( pipe_slow );
17906 %}
17907
17908 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17909 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17910 effect(TEMP ktmp, TEMP rtmp);
17911 match(Set mem (StoreC mem (ConvF2HF src)));
17912 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17913 ins_encode %{
17914 __ movl($rtmp$$Register, 0x1);
17915 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17916 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17917 %}
17918 ins_pipe( pipe_slow );
17919 %}
17920
17921 instruct vconvF2HF(vec dst, vec src) %{
17922 match(Set dst (VectorCastF2HF src));
17923 format %{ "vector_conv_F2HF $dst $src" %}
17924 ins_encode %{
17925 int vlen_enc = vector_length_encoding(this, $src);
17926 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17927 %}
17928 ins_pipe( pipe_slow );
17929 %}
17930
17931 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17932 predicate(n->as_StoreVector()->memory_size() >= 16);
17933 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17934 format %{ "vcvtps2ph $mem,$src" %}
17935 ins_encode %{
17936 int vlen_enc = vector_length_encoding(this, $src);
17937 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17938 %}
17939 ins_pipe( pipe_slow );
17940 %}
17941
17942 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17943 match(Set dst (ConvHF2F src));
17944 format %{ "vcvtph2ps $dst,$src" %}
17945 ins_encode %{
17946 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17947 %}
17948 ins_pipe( pipe_slow );
17949 %}
17950
17951 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17952 match(Set dst (VectorCastHF2F (LoadVector mem)));
17953 format %{ "vcvtph2ps $dst,$mem" %}
17954 ins_encode %{
17955 int vlen_enc = vector_length_encoding(this);
17956 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17957 %}
17958 ins_pipe( pipe_slow );
17959 %}
17960
17961 instruct vconvHF2F(vec dst, vec src) %{
17962 match(Set dst (VectorCastHF2F src));
17963 ins_cost(125);
17964 format %{ "vector_conv_HF2F $dst,$src" %}
17965 ins_encode %{
17966 int vlen_enc = vector_length_encoding(this);
17967 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17968 %}
17969 ins_pipe( pipe_slow );
17970 %}
17971
17972 // ---------------------------------------- VectorReinterpret ------------------------------------
17973 instruct reinterpret_mask(kReg dst) %{
17974 predicate(n->bottom_type()->isa_vectmask() &&
17975 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17976 match(Set dst (VectorReinterpret dst));
17977 ins_cost(125);
17978 format %{ "vector_reinterpret $dst\t!" %}
17979 ins_encode %{
17980 // empty
17981 %}
17982 ins_pipe( pipe_slow );
17983 %}
17984
17985 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17986 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17987 n->bottom_type()->isa_vectmask() &&
17988 n->in(1)->bottom_type()->isa_vectmask() &&
17989 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17990 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17991 match(Set dst (VectorReinterpret src));
17992 effect(TEMP xtmp);
17993 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17994 ins_encode %{
17995 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17996 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17997 assert(src_sz == dst_sz , "src and dst size mismatch");
17998 int vlen_enc = vector_length_encoding(src_sz);
17999 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18000 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18001 %}
18002 ins_pipe( pipe_slow );
18003 %}
18004
18005 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18006 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18007 n->bottom_type()->isa_vectmask() &&
18008 n->in(1)->bottom_type()->isa_vectmask() &&
18009 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18010 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18011 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18012 match(Set dst (VectorReinterpret src));
18013 effect(TEMP xtmp);
18014 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18015 ins_encode %{
18016 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18017 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18018 assert(src_sz == dst_sz , "src and dst size mismatch");
18019 int vlen_enc = vector_length_encoding(src_sz);
18020 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18021 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18022 %}
18023 ins_pipe( pipe_slow );
18024 %}
18025
18026 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18027 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18028 n->bottom_type()->isa_vectmask() &&
18029 n->in(1)->bottom_type()->isa_vectmask() &&
18030 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18031 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18032 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18033 match(Set dst (VectorReinterpret src));
18034 effect(TEMP xtmp);
18035 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18036 ins_encode %{
18037 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18038 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18039 assert(src_sz == dst_sz , "src and dst size mismatch");
18040 int vlen_enc = vector_length_encoding(src_sz);
18041 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18042 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18043 %}
18044 ins_pipe( pipe_slow );
18045 %}
18046
18047 instruct reinterpret(vec dst) %{
18048 predicate(!n->bottom_type()->isa_vectmask() &&
18049 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18050 match(Set dst (VectorReinterpret dst));
18051 ins_cost(125);
18052 format %{ "vector_reinterpret $dst\t!" %}
18053 ins_encode %{
18054 // empty
18055 %}
18056 ins_pipe( pipe_slow );
18057 %}
18058
18059 instruct reinterpret_expand(vec dst, vec src) %{
18060 predicate(UseAVX == 0 &&
18061 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18062 match(Set dst (VectorReinterpret src));
18063 ins_cost(125);
18064 effect(TEMP dst);
18065 format %{ "vector_reinterpret_expand $dst,$src" %}
18066 ins_encode %{
18067 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18068 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18069
18070 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18071 if (src_vlen_in_bytes == 4) {
18072 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18073 } else {
18074 assert(src_vlen_in_bytes == 8, "");
18075 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18076 }
18077 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18078 %}
18079 ins_pipe( pipe_slow );
18080 %}
18081
18082 instruct vreinterpret_expand4(legVec dst, vec src) %{
18083 predicate(UseAVX > 0 &&
18084 !n->bottom_type()->isa_vectmask() &&
18085 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18086 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18087 match(Set dst (VectorReinterpret src));
18088 ins_cost(125);
18089 format %{ "vector_reinterpret_expand $dst,$src" %}
18090 ins_encode %{
18091 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18092 %}
18093 ins_pipe( pipe_slow );
18094 %}
18095
18096
18097 instruct vreinterpret_expand(legVec dst, vec src) %{
18098 predicate(UseAVX > 0 &&
18099 !n->bottom_type()->isa_vectmask() &&
18100 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18101 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18102 match(Set dst (VectorReinterpret src));
18103 ins_cost(125);
18104 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18105 ins_encode %{
18106 switch (Matcher::vector_length_in_bytes(this, $src)) {
18107 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18108 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18109 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18110 default: ShouldNotReachHere();
18111 }
18112 %}
18113 ins_pipe( pipe_slow );
18114 %}
18115
18116 instruct reinterpret_shrink(vec dst, legVec src) %{
18117 predicate(!n->bottom_type()->isa_vectmask() &&
18118 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18119 match(Set dst (VectorReinterpret src));
18120 ins_cost(125);
18121 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18122 ins_encode %{
18123 switch (Matcher::vector_length_in_bytes(this)) {
18124 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18125 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18126 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18127 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18128 default: ShouldNotReachHere();
18129 }
18130 %}
18131 ins_pipe( pipe_slow );
18132 %}
18133
18134 // ----------------------------------------------------------------------------------------------------
18135
18136 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18137 match(Set dst (RoundDoubleMode src rmode));
18138 format %{ "roundsd $dst,$src" %}
18139 ins_cost(150);
18140 ins_encode %{
18141 assert(UseSSE >= 4, "required");
18142 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18143 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18144 }
18145 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18146 %}
18147 ins_pipe(pipe_slow);
18148 %}
18149
18150 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18151 match(Set dst (RoundDoubleMode con rmode));
18152 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18153 ins_cost(150);
18154 ins_encode %{
18155 assert(UseSSE >= 4, "required");
18156 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18157 %}
18158 ins_pipe(pipe_slow);
18159 %}
18160
18161 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18162 predicate(Matcher::vector_length(n) < 8);
18163 match(Set dst (RoundDoubleModeV src rmode));
18164 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18165 ins_encode %{
18166 assert(UseAVX > 0, "required");
18167 int vlen_enc = vector_length_encoding(this);
18168 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18169 %}
18170 ins_pipe( pipe_slow );
18171 %}
18172
18173 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18174 predicate(Matcher::vector_length(n) == 8);
18175 match(Set dst (RoundDoubleModeV src rmode));
18176 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18177 ins_encode %{
18178 assert(UseAVX > 2, "required");
18179 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18180 %}
18181 ins_pipe( pipe_slow );
18182 %}
18183
18184 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18185 predicate(Matcher::vector_length(n) < 8);
18186 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18187 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18188 ins_encode %{
18189 assert(UseAVX > 0, "required");
18190 int vlen_enc = vector_length_encoding(this);
18191 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18192 %}
18193 ins_pipe( pipe_slow );
18194 %}
18195
18196 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18197 predicate(Matcher::vector_length(n) == 8);
18198 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18199 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18200 ins_encode %{
18201 assert(UseAVX > 2, "required");
18202 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18203 %}
18204 ins_pipe( pipe_slow );
18205 %}
18206
18207 instruct onspinwait() %{
18208 match(OnSpinWait);
18209 ins_cost(200);
18210
18211 format %{
18212 $$template
18213 $$emit$$"pause\t! membar_onspinwait"
18214 %}
18215 ins_encode %{
18216 __ pause();
18217 %}
18218 ins_pipe(pipe_slow);
18219 %}
18220
18221 // a * b + c
18222 instruct fmaD_reg(regD a, regD b, regD c) %{
18223 match(Set c (FmaD c (Binary a b)));
18224 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18225 ins_cost(150);
18226 ins_encode %{
18227 assert(UseFMA, "Needs FMA instructions support.");
18228 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18229 %}
18230 ins_pipe( pipe_slow );
18231 %}
18232
18233 // a * b + c
18234 instruct fmaF_reg(regF a, regF b, regF c) %{
18235 match(Set c (FmaF c (Binary a b)));
18236 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18237 ins_cost(150);
18238 ins_encode %{
18239 assert(UseFMA, "Needs FMA instructions support.");
18240 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18241 %}
18242 ins_pipe( pipe_slow );
18243 %}
18244
18245 // ====================VECTOR INSTRUCTIONS=====================================
18246
18247 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18248 instruct MoveVec2Leg(legVec dst, vec src) %{
18249 match(Set dst src);
18250 format %{ "" %}
18251 ins_encode %{
18252 ShouldNotReachHere();
18253 %}
18254 ins_pipe( fpu_reg_reg );
18255 %}
18256
18257 instruct MoveLeg2Vec(vec dst, legVec src) %{
18258 match(Set dst src);
18259 format %{ "" %}
18260 ins_encode %{
18261 ShouldNotReachHere();
18262 %}
18263 ins_pipe( fpu_reg_reg );
18264 %}
18265
18266 // ============================================================================
18267
18268 // Load vectors generic operand pattern
18269 instruct loadV(vec dst, memory mem) %{
18270 match(Set dst (LoadVector mem));
18271 ins_cost(125);
18272 format %{ "load_vector $dst,$mem" %}
18273 ins_encode %{
18274 BasicType bt = Matcher::vector_element_basic_type(this);
18275 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18276 %}
18277 ins_pipe( pipe_slow );
18278 %}
18279
18280 // Store vectors generic operand pattern.
18281 instruct storeV(memory mem, vec src) %{
18282 match(Set mem (StoreVector mem src));
18283 ins_cost(145);
18284 format %{ "store_vector $mem,$src\n\t" %}
18285 ins_encode %{
18286 switch (Matcher::vector_length_in_bytes(this, $src)) {
18287 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18288 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18289 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18290 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18291 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18292 default: ShouldNotReachHere();
18293 }
18294 %}
18295 ins_pipe( pipe_slow );
18296 %}
18297
18298 // ---------------------------------------- Gather ------------------------------------
18299
18300 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18301
18302 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18303 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18304 Matcher::vector_length_in_bytes(n) <= 32);
18305 match(Set dst (LoadVectorGather mem idx));
18306 effect(TEMP dst, TEMP tmp, TEMP mask);
18307 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18308 ins_encode %{
18309 int vlen_enc = vector_length_encoding(this);
18310 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18311 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18312 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18313 __ lea($tmp$$Register, $mem$$Address);
18314 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18315 %}
18316 ins_pipe( pipe_slow );
18317 %}
18318
18319
18320 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18321 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18322 !is_subword_type(Matcher::vector_element_basic_type(n)));
18323 match(Set dst (LoadVectorGather mem idx));
18324 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18325 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18326 ins_encode %{
18327 int vlen_enc = vector_length_encoding(this);
18328 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18329 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18330 __ lea($tmp$$Register, $mem$$Address);
18331 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18332 %}
18333 ins_pipe( pipe_slow );
18334 %}
18335
18336 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18337 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18338 !is_subword_type(Matcher::vector_element_basic_type(n)));
18339 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18340 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18341 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18342 ins_encode %{
18343 assert(UseAVX > 2, "sanity");
18344 int vlen_enc = vector_length_encoding(this);
18345 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18346 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18347 // Note: Since gather instruction partially updates the opmask register used
18348 // for predication hense moving mask operand to a temporary.
18349 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18350 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18351 __ lea($tmp$$Register, $mem$$Address);
18352 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18353 %}
18354 ins_pipe( pipe_slow );
18355 %}
18356
18357 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18358 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18359 match(Set dst (LoadVectorGather mem idx_base));
18360 effect(TEMP tmp, TEMP rtmp);
18361 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18362 ins_encode %{
18363 int vlen_enc = vector_length_encoding(this);
18364 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18365 __ lea($tmp$$Register, $mem$$Address);
18366 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18367 %}
18368 ins_pipe( pipe_slow );
18369 %}
18370
18371 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18372 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18373 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18374 match(Set dst (LoadVectorGather mem idx_base));
18375 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18376 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18377 ins_encode %{
18378 int vlen_enc = vector_length_encoding(this);
18379 int vector_len = Matcher::vector_length(this);
18380 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18381 __ lea($tmp$$Register, $mem$$Address);
18382 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18383 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18384 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18385 %}
18386 ins_pipe( pipe_slow );
18387 %}
18388
18389 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18390 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18391 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18392 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18393 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18394 ins_encode %{
18395 int vlen_enc = vector_length_encoding(this);
18396 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18397 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18398 __ lea($tmp$$Register, $mem$$Address);
18399 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18400 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18401 %}
18402 ins_pipe( pipe_slow );
18403 %}
18404
18405 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18406 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18407 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18408 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18409 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18410 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18411 ins_encode %{
18412 int vlen_enc = vector_length_encoding(this);
18413 int vector_len = Matcher::vector_length(this);
18414 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18415 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18416 __ lea($tmp$$Register, $mem$$Address);
18417 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18418 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18419 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18420 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18421 %}
18422 ins_pipe( pipe_slow );
18423 %}
18424
18425 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18426 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18427 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18428 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18429 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18430 ins_encode %{
18431 int vlen_enc = vector_length_encoding(this);
18432 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18433 __ lea($tmp$$Register, $mem$$Address);
18434 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18435 if (elem_bt == T_SHORT) {
18436 __ movl($mask_idx$$Register, 0x55555555);
18437 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18438 }
18439 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18440 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18441 %}
18442 ins_pipe( pipe_slow );
18443 %}
18444
18445 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18446 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18447 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18448 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18449 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18450 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18451 ins_encode %{
18452 int vlen_enc = vector_length_encoding(this);
18453 int vector_len = Matcher::vector_length(this);
18454 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18455 __ lea($tmp$$Register, $mem$$Address);
18456 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18457 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18458 if (elem_bt == T_SHORT) {
18459 __ movl($mask_idx$$Register, 0x55555555);
18460 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18461 }
18462 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18463 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18464 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18465 %}
18466 ins_pipe( pipe_slow );
18467 %}
18468
18469 // ====================Scatter=======================================
18470
18471 // Scatter INT, LONG, FLOAT, DOUBLE
18472
18473 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18474 predicate(UseAVX > 2);
18475 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18476 effect(TEMP tmp, TEMP ktmp);
18477 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18478 ins_encode %{
18479 int vlen_enc = vector_length_encoding(this, $src);
18480 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18481
18482 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18483 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18484
18485 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18486 __ lea($tmp$$Register, $mem$$Address);
18487 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18488 %}
18489 ins_pipe( pipe_slow );
18490 %}
18491
18492 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18493 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18494 effect(TEMP tmp, TEMP ktmp);
18495 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18496 ins_encode %{
18497 int vlen_enc = vector_length_encoding(this, $src);
18498 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18499 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18500 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18501 // Note: Since scatter instruction partially updates the opmask register used
18502 // for predication hense moving mask operand to a temporary.
18503 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18504 __ lea($tmp$$Register, $mem$$Address);
18505 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18506 %}
18507 ins_pipe( pipe_slow );
18508 %}
18509
18510 // ====================REPLICATE=======================================
18511
18512 // Replicate byte scalar to be vector
18513 instruct vReplB_reg(vec dst, rRegI src) %{
18514 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18515 match(Set dst (Replicate src));
18516 format %{ "replicateB $dst,$src" %}
18517 ins_encode %{
18518 uint vlen = Matcher::vector_length(this);
18519 if (UseAVX >= 2) {
18520 int vlen_enc = vector_length_encoding(this);
18521 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18522 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18523 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18524 } else {
18525 __ movdl($dst$$XMMRegister, $src$$Register);
18526 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18527 }
18528 } else {
18529 assert(UseAVX < 2, "");
18530 __ movdl($dst$$XMMRegister, $src$$Register);
18531 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18532 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18533 if (vlen >= 16) {
18534 assert(vlen == 16, "");
18535 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18536 }
18537 }
18538 %}
18539 ins_pipe( pipe_slow );
18540 %}
18541
18542 instruct ReplB_mem(vec dst, memory mem) %{
18543 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18544 match(Set dst (Replicate (LoadB mem)));
18545 format %{ "replicateB $dst,$mem" %}
18546 ins_encode %{
18547 int vlen_enc = vector_length_encoding(this);
18548 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18549 %}
18550 ins_pipe( pipe_slow );
18551 %}
18552
18553 // ====================ReplicateS=======================================
18554
18555 instruct vReplS_reg(vec dst, rRegI src) %{
18556 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18557 match(Set dst (Replicate src));
18558 format %{ "replicateS $dst,$src" %}
18559 ins_encode %{
18560 uint vlen = Matcher::vector_length(this);
18561 int vlen_enc = vector_length_encoding(this);
18562 if (UseAVX >= 2) {
18563 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18564 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18565 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18566 } else {
18567 __ movdl($dst$$XMMRegister, $src$$Register);
18568 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18569 }
18570 } else {
18571 assert(UseAVX < 2, "");
18572 __ movdl($dst$$XMMRegister, $src$$Register);
18573 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18574 if (vlen >= 8) {
18575 assert(vlen == 8, "");
18576 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18577 }
18578 }
18579 %}
18580 ins_pipe( pipe_slow );
18581 %}
18582
18583 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18584 match(Set dst (Replicate con));
18585 effect(TEMP rtmp);
18586 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18587 ins_encode %{
18588 int vlen_enc = vector_length_encoding(this);
18589 BasicType bt = Matcher::vector_element_basic_type(this);
18590 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18591 __ movl($rtmp$$Register, $con$$constant);
18592 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18593 %}
18594 ins_pipe( pipe_slow );
18595 %}
18596
18597 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18598 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18599 match(Set dst (Replicate src));
18600 effect(TEMP rtmp);
18601 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18602 ins_encode %{
18603 int vlen_enc = vector_length_encoding(this);
18604 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18605 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18606 %}
18607 ins_pipe( pipe_slow );
18608 %}
18609
18610 instruct ReplS_mem(vec dst, memory mem) %{
18611 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18612 match(Set dst (Replicate (LoadS mem)));
18613 format %{ "replicateS $dst,$mem" %}
18614 ins_encode %{
18615 int vlen_enc = vector_length_encoding(this);
18616 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18617 %}
18618 ins_pipe( pipe_slow );
18619 %}
18620
18621 // ====================ReplicateI=======================================
18622
18623 instruct ReplI_reg(vec dst, rRegI src) %{
18624 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18625 match(Set dst (Replicate src));
18626 format %{ "replicateI $dst,$src" %}
18627 ins_encode %{
18628 uint vlen = Matcher::vector_length(this);
18629 int vlen_enc = vector_length_encoding(this);
18630 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18631 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18632 } else if (VM_Version::supports_avx2()) {
18633 __ movdl($dst$$XMMRegister, $src$$Register);
18634 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18635 } else {
18636 __ movdl($dst$$XMMRegister, $src$$Register);
18637 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18638 }
18639 %}
18640 ins_pipe( pipe_slow );
18641 %}
18642
18643 instruct ReplI_mem(vec dst, memory mem) %{
18644 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18645 match(Set dst (Replicate (LoadI mem)));
18646 format %{ "replicateI $dst,$mem" %}
18647 ins_encode %{
18648 int vlen_enc = vector_length_encoding(this);
18649 if (VM_Version::supports_avx2()) {
18650 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18651 } else if (VM_Version::supports_avx()) {
18652 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18653 } else {
18654 __ movdl($dst$$XMMRegister, $mem$$Address);
18655 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18656 }
18657 %}
18658 ins_pipe( pipe_slow );
18659 %}
18660
18661 instruct ReplI_imm(vec dst, immI con) %{
18662 predicate(Matcher::is_non_long_integral_vector(n));
18663 match(Set dst (Replicate con));
18664 format %{ "replicateI $dst,$con" %}
18665 ins_encode %{
18666 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18667 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18668 type2aelembytes(Matcher::vector_element_basic_type(this))));
18669 BasicType bt = Matcher::vector_element_basic_type(this);
18670 int vlen = Matcher::vector_length_in_bytes(this);
18671 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18672 %}
18673 ins_pipe( pipe_slow );
18674 %}
18675
18676 // Replicate scalar zero to be vector
18677 instruct ReplI_zero(vec dst, immI_0 zero) %{
18678 predicate(Matcher::is_non_long_integral_vector(n));
18679 match(Set dst (Replicate zero));
18680 format %{ "replicateI $dst,$zero" %}
18681 ins_encode %{
18682 int vlen_enc = vector_length_encoding(this);
18683 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18684 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18685 } else {
18686 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18687 }
18688 %}
18689 ins_pipe( fpu_reg_reg );
18690 %}
18691
18692 instruct ReplI_M1(vec dst, immI_M1 con) %{
18693 predicate(Matcher::is_non_long_integral_vector(n));
18694 match(Set dst (Replicate con));
18695 format %{ "vallones $dst" %}
18696 ins_encode %{
18697 int vector_len = vector_length_encoding(this);
18698 __ vallones($dst$$XMMRegister, vector_len);
18699 %}
18700 ins_pipe( pipe_slow );
18701 %}
18702
18703 // ====================ReplicateL=======================================
18704
18705 // Replicate long (8 byte) scalar to be vector
18706 instruct ReplL_reg(vec dst, rRegL src) %{
18707 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18708 match(Set dst (Replicate src));
18709 format %{ "replicateL $dst,$src" %}
18710 ins_encode %{
18711 int vlen = Matcher::vector_length(this);
18712 int vlen_enc = vector_length_encoding(this);
18713 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18714 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18715 } else if (VM_Version::supports_avx2()) {
18716 __ movdq($dst$$XMMRegister, $src$$Register);
18717 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18718 } else {
18719 __ movdq($dst$$XMMRegister, $src$$Register);
18720 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18721 }
18722 %}
18723 ins_pipe( pipe_slow );
18724 %}
18725
18726 instruct ReplL_mem(vec dst, memory mem) %{
18727 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18728 match(Set dst (Replicate (LoadL mem)));
18729 format %{ "replicateL $dst,$mem" %}
18730 ins_encode %{
18731 int vlen_enc = vector_length_encoding(this);
18732 if (VM_Version::supports_avx2()) {
18733 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18734 } else if (VM_Version::supports_sse3()) {
18735 __ movddup($dst$$XMMRegister, $mem$$Address);
18736 } else {
18737 __ movq($dst$$XMMRegister, $mem$$Address);
18738 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18739 }
18740 %}
18741 ins_pipe( pipe_slow );
18742 %}
18743
18744 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18745 instruct ReplL_imm(vec dst, immL con) %{
18746 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18747 match(Set dst (Replicate con));
18748 format %{ "replicateL $dst,$con" %}
18749 ins_encode %{
18750 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18751 int vlen = Matcher::vector_length_in_bytes(this);
18752 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18753 %}
18754 ins_pipe( pipe_slow );
18755 %}
18756
18757 instruct ReplL_zero(vec dst, immL0 zero) %{
18758 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18759 match(Set dst (Replicate zero));
18760 format %{ "replicateL $dst,$zero" %}
18761 ins_encode %{
18762 int vlen_enc = vector_length_encoding(this);
18763 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18764 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18765 } else {
18766 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18767 }
18768 %}
18769 ins_pipe( fpu_reg_reg );
18770 %}
18771
18772 instruct ReplL_M1(vec dst, immL_M1 con) %{
18773 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18774 match(Set dst (Replicate con));
18775 format %{ "vallones $dst" %}
18776 ins_encode %{
18777 int vector_len = vector_length_encoding(this);
18778 __ vallones($dst$$XMMRegister, vector_len);
18779 %}
18780 ins_pipe( pipe_slow );
18781 %}
18782
18783 // ====================ReplicateF=======================================
18784
18785 instruct vReplF_reg(vec dst, vlRegF src) %{
18786 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18787 match(Set dst (Replicate src));
18788 format %{ "replicateF $dst,$src" %}
18789 ins_encode %{
18790 uint vlen = Matcher::vector_length(this);
18791 int vlen_enc = vector_length_encoding(this);
18792 if (vlen <= 4) {
18793 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18794 } else if (VM_Version::supports_avx2()) {
18795 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18796 } else {
18797 assert(vlen == 8, "sanity");
18798 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18799 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18800 }
18801 %}
18802 ins_pipe( pipe_slow );
18803 %}
18804
18805 instruct ReplF_reg(vec dst, vlRegF src) %{
18806 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18807 match(Set dst (Replicate src));
18808 format %{ "replicateF $dst,$src" %}
18809 ins_encode %{
18810 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18811 %}
18812 ins_pipe( pipe_slow );
18813 %}
18814
18815 instruct ReplF_mem(vec dst, memory mem) %{
18816 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18817 match(Set dst (Replicate (LoadF mem)));
18818 format %{ "replicateF $dst,$mem" %}
18819 ins_encode %{
18820 int vlen_enc = vector_length_encoding(this);
18821 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18822 %}
18823 ins_pipe( pipe_slow );
18824 %}
18825
18826 // Replicate float scalar immediate to be vector by loading from const table.
18827 instruct ReplF_imm(vec dst, immF con) %{
18828 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18829 match(Set dst (Replicate con));
18830 format %{ "replicateF $dst,$con" %}
18831 ins_encode %{
18832 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18833 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18834 int vlen = Matcher::vector_length_in_bytes(this);
18835 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18836 %}
18837 ins_pipe( pipe_slow );
18838 %}
18839
18840 instruct ReplF_zero(vec dst, immF0 zero) %{
18841 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18842 match(Set dst (Replicate zero));
18843 format %{ "replicateF $dst,$zero" %}
18844 ins_encode %{
18845 int vlen_enc = vector_length_encoding(this);
18846 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18847 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848 } else {
18849 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18850 }
18851 %}
18852 ins_pipe( fpu_reg_reg );
18853 %}
18854
18855 // ====================ReplicateD=======================================
18856
18857 // Replicate double (8 bytes) scalar to be vector
18858 instruct vReplD_reg(vec dst, vlRegD src) %{
18859 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18860 match(Set dst (Replicate src));
18861 format %{ "replicateD $dst,$src" %}
18862 ins_encode %{
18863 uint vlen = Matcher::vector_length(this);
18864 int vlen_enc = vector_length_encoding(this);
18865 if (vlen <= 2) {
18866 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18867 } else if (VM_Version::supports_avx2()) {
18868 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18869 } else {
18870 assert(vlen == 4, "sanity");
18871 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18872 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18873 }
18874 %}
18875 ins_pipe( pipe_slow );
18876 %}
18877
18878 instruct ReplD_reg(vec dst, vlRegD src) %{
18879 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18880 match(Set dst (Replicate src));
18881 format %{ "replicateD $dst,$src" %}
18882 ins_encode %{
18883 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18884 %}
18885 ins_pipe( pipe_slow );
18886 %}
18887
18888 instruct ReplD_mem(vec dst, memory mem) %{
18889 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18890 match(Set dst (Replicate (LoadD mem)));
18891 format %{ "replicateD $dst,$mem" %}
18892 ins_encode %{
18893 if (Matcher::vector_length(this) >= 4) {
18894 int vlen_enc = vector_length_encoding(this);
18895 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18896 } else {
18897 __ movddup($dst$$XMMRegister, $mem$$Address);
18898 }
18899 %}
18900 ins_pipe( pipe_slow );
18901 %}
18902
18903 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18904 instruct ReplD_imm(vec dst, immD con) %{
18905 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18906 match(Set dst (Replicate con));
18907 format %{ "replicateD $dst,$con" %}
18908 ins_encode %{
18909 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18910 int vlen = Matcher::vector_length_in_bytes(this);
18911 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18912 %}
18913 ins_pipe( pipe_slow );
18914 %}
18915
18916 instruct ReplD_zero(vec dst, immD0 zero) %{
18917 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18918 match(Set dst (Replicate zero));
18919 format %{ "replicateD $dst,$zero" %}
18920 ins_encode %{
18921 int vlen_enc = vector_length_encoding(this);
18922 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18923 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18924 } else {
18925 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18926 }
18927 %}
18928 ins_pipe( fpu_reg_reg );
18929 %}
18930
18931 // ====================VECTOR INSERT=======================================
18932
18933 instruct insert(vec dst, rRegI val, immU8 idx) %{
18934 predicate(Matcher::vector_length_in_bytes(n) < 32);
18935 match(Set dst (VectorInsert (Binary dst val) idx));
18936 format %{ "vector_insert $dst,$val,$idx" %}
18937 ins_encode %{
18938 assert(UseSSE >= 4, "required");
18939 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18940
18941 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18942
18943 assert(is_integral_type(elem_bt), "");
18944 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18945
18946 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18947 %}
18948 ins_pipe( pipe_slow );
18949 %}
18950
18951 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18952 predicate(Matcher::vector_length_in_bytes(n) == 32);
18953 match(Set dst (VectorInsert (Binary src val) idx));
18954 effect(TEMP vtmp);
18955 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18956 ins_encode %{
18957 int vlen_enc = Assembler::AVX_256bit;
18958 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18959 int elem_per_lane = 16/type2aelembytes(elem_bt);
18960 int log2epr = log2(elem_per_lane);
18961
18962 assert(is_integral_type(elem_bt), "sanity");
18963 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18964
18965 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18966 uint y_idx = ($idx$$constant >> log2epr) & 1;
18967 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18968 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18969 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18970 %}
18971 ins_pipe( pipe_slow );
18972 %}
18973
18974 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18975 predicate(Matcher::vector_length_in_bytes(n) == 64);
18976 match(Set dst (VectorInsert (Binary src val) idx));
18977 effect(TEMP vtmp);
18978 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18979 ins_encode %{
18980 assert(UseAVX > 2, "sanity");
18981
18982 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18983 int elem_per_lane = 16/type2aelembytes(elem_bt);
18984 int log2epr = log2(elem_per_lane);
18985
18986 assert(is_integral_type(elem_bt), "");
18987 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18988
18989 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18990 uint y_idx = ($idx$$constant >> log2epr) & 3;
18991 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18992 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18993 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18994 %}
18995 ins_pipe( pipe_slow );
18996 %}
18997
18998 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18999 predicate(Matcher::vector_length(n) == 2);
19000 match(Set dst (VectorInsert (Binary dst val) idx));
19001 format %{ "vector_insert $dst,$val,$idx" %}
19002 ins_encode %{
19003 assert(UseSSE >= 4, "required");
19004 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19005 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19006
19007 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19008 %}
19009 ins_pipe( pipe_slow );
19010 %}
19011
19012 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19013 predicate(Matcher::vector_length(n) == 4);
19014 match(Set dst (VectorInsert (Binary src val) idx));
19015 effect(TEMP vtmp);
19016 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19017 ins_encode %{
19018 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19019 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19020
19021 uint x_idx = $idx$$constant & right_n_bits(1);
19022 uint y_idx = ($idx$$constant >> 1) & 1;
19023 int vlen_enc = Assembler::AVX_256bit;
19024 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19025 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19026 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19027 %}
19028 ins_pipe( pipe_slow );
19029 %}
19030
19031 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19032 predicate(Matcher::vector_length(n) == 8);
19033 match(Set dst (VectorInsert (Binary src val) idx));
19034 effect(TEMP vtmp);
19035 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19036 ins_encode %{
19037 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19038 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19039
19040 uint x_idx = $idx$$constant & right_n_bits(1);
19041 uint y_idx = ($idx$$constant >> 1) & 3;
19042 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19043 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19044 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19045 %}
19046 ins_pipe( pipe_slow );
19047 %}
19048
19049 instruct insertF(vec dst, regF val, immU8 idx) %{
19050 predicate(Matcher::vector_length(n) < 8);
19051 match(Set dst (VectorInsert (Binary dst val) idx));
19052 format %{ "vector_insert $dst,$val,$idx" %}
19053 ins_encode %{
19054 assert(UseSSE >= 4, "sanity");
19055
19056 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19057 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19058
19059 uint x_idx = $idx$$constant & right_n_bits(2);
19060 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19061 %}
19062 ins_pipe( pipe_slow );
19063 %}
19064
19065 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19066 predicate(Matcher::vector_length(n) >= 8);
19067 match(Set dst (VectorInsert (Binary src val) idx));
19068 effect(TEMP vtmp);
19069 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19070 ins_encode %{
19071 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19072 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19073
19074 int vlen = Matcher::vector_length(this);
19075 uint x_idx = $idx$$constant & right_n_bits(2);
19076 if (vlen == 8) {
19077 uint y_idx = ($idx$$constant >> 2) & 1;
19078 int vlen_enc = Assembler::AVX_256bit;
19079 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19080 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19081 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19082 } else {
19083 assert(vlen == 16, "sanity");
19084 uint y_idx = ($idx$$constant >> 2) & 3;
19085 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19086 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19087 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19088 }
19089 %}
19090 ins_pipe( pipe_slow );
19091 %}
19092
19093 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19094 predicate(Matcher::vector_length(n) == 2);
19095 match(Set dst (VectorInsert (Binary dst val) idx));
19096 effect(TEMP tmp);
19097 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19098 ins_encode %{
19099 assert(UseSSE >= 4, "sanity");
19100 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19101 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19102
19103 __ movq($tmp$$Register, $val$$XMMRegister);
19104 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19105 %}
19106 ins_pipe( pipe_slow );
19107 %}
19108
19109 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19110 predicate(Matcher::vector_length(n) == 4);
19111 match(Set dst (VectorInsert (Binary src val) idx));
19112 effect(TEMP vtmp, TEMP tmp);
19113 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19114 ins_encode %{
19115 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19116 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19117
19118 uint x_idx = $idx$$constant & right_n_bits(1);
19119 uint y_idx = ($idx$$constant >> 1) & 1;
19120 int vlen_enc = Assembler::AVX_256bit;
19121 __ movq($tmp$$Register, $val$$XMMRegister);
19122 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19123 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19124 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19125 %}
19126 ins_pipe( pipe_slow );
19127 %}
19128
19129 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19130 predicate(Matcher::vector_length(n) == 8);
19131 match(Set dst (VectorInsert (Binary src val) idx));
19132 effect(TEMP tmp, TEMP vtmp);
19133 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19134 ins_encode %{
19135 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19136 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19137
19138 uint x_idx = $idx$$constant & right_n_bits(1);
19139 uint y_idx = ($idx$$constant >> 1) & 3;
19140 __ movq($tmp$$Register, $val$$XMMRegister);
19141 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19142 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19143 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19144 %}
19145 ins_pipe( pipe_slow );
19146 %}
19147
19148 // ====================REDUCTION ARITHMETIC=======================================
19149
19150 // =======================Int Reduction==========================================
19151
19152 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19154 match(Set dst (AddReductionVI src1 src2));
19155 match(Set dst (MulReductionVI src1 src2));
19156 match(Set dst (AndReductionV src1 src2));
19157 match(Set dst ( OrReductionV src1 src2));
19158 match(Set dst (XorReductionV src1 src2));
19159 match(Set dst (MinReductionV src1 src2));
19160 match(Set dst (MaxReductionV src1 src2));
19161 effect(TEMP vtmp1, TEMP vtmp2);
19162 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19163 ins_encode %{
19164 int opcode = this->ideal_Opcode();
19165 int vlen = Matcher::vector_length(this, $src2);
19166 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19167 %}
19168 ins_pipe( pipe_slow );
19169 %}
19170
19171 // =======================Long Reduction==========================================
19172
19173 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19174 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19175 match(Set dst (AddReductionVL src1 src2));
19176 match(Set dst (MulReductionVL src1 src2));
19177 match(Set dst (AndReductionV src1 src2));
19178 match(Set dst ( OrReductionV src1 src2));
19179 match(Set dst (XorReductionV src1 src2));
19180 match(Set dst (MinReductionV src1 src2));
19181 match(Set dst (MaxReductionV src1 src2));
19182 effect(TEMP vtmp1, TEMP vtmp2);
19183 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19184 ins_encode %{
19185 int opcode = this->ideal_Opcode();
19186 int vlen = Matcher::vector_length(this, $src2);
19187 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19188 %}
19189 ins_pipe( pipe_slow );
19190 %}
19191
19192 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19193 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19194 match(Set dst (AddReductionVL src1 src2));
19195 match(Set dst (MulReductionVL src1 src2));
19196 match(Set dst (AndReductionV src1 src2));
19197 match(Set dst ( OrReductionV src1 src2));
19198 match(Set dst (XorReductionV src1 src2));
19199 match(Set dst (MinReductionV src1 src2));
19200 match(Set dst (MaxReductionV src1 src2));
19201 effect(TEMP vtmp1, TEMP vtmp2);
19202 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19203 ins_encode %{
19204 int opcode = this->ideal_Opcode();
19205 int vlen = Matcher::vector_length(this, $src2);
19206 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19207 %}
19208 ins_pipe( pipe_slow );
19209 %}
19210
19211 // =======================Float Reduction==========================================
19212
19213 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19214 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19215 match(Set dst (AddReductionVF dst src));
19216 match(Set dst (MulReductionVF dst src));
19217 effect(TEMP dst, TEMP vtmp);
19218 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19219 ins_encode %{
19220 int opcode = this->ideal_Opcode();
19221 int vlen = Matcher::vector_length(this, $src);
19222 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19223 %}
19224 ins_pipe( pipe_slow );
19225 %}
19226
19227 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19228 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19229 match(Set dst (AddReductionVF dst src));
19230 match(Set dst (MulReductionVF dst src));
19231 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19232 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19233 ins_encode %{
19234 int opcode = this->ideal_Opcode();
19235 int vlen = Matcher::vector_length(this, $src);
19236 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19237 %}
19238 ins_pipe( pipe_slow );
19239 %}
19240
19241 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19242 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19243 match(Set dst (AddReductionVF dst src));
19244 match(Set dst (MulReductionVF dst src));
19245 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19246 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19247 ins_encode %{
19248 int opcode = this->ideal_Opcode();
19249 int vlen = Matcher::vector_length(this, $src);
19250 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19251 %}
19252 ins_pipe( pipe_slow );
19253 %}
19254
19255
19256 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19257 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19258 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19259 // src1 contains reduction identity
19260 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19261 match(Set dst (AddReductionVF src1 src2));
19262 match(Set dst (MulReductionVF src1 src2));
19263 effect(TEMP dst);
19264 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19265 ins_encode %{
19266 int opcode = this->ideal_Opcode();
19267 int vlen = Matcher::vector_length(this, $src2);
19268 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19269 %}
19270 ins_pipe( pipe_slow );
19271 %}
19272
19273 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19274 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19275 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19276 // src1 contains reduction identity
19277 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19278 match(Set dst (AddReductionVF src1 src2));
19279 match(Set dst (MulReductionVF src1 src2));
19280 effect(TEMP dst, TEMP vtmp);
19281 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19282 ins_encode %{
19283 int opcode = this->ideal_Opcode();
19284 int vlen = Matcher::vector_length(this, $src2);
19285 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19286 %}
19287 ins_pipe( pipe_slow );
19288 %}
19289
19290 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19291 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19292 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19293 // src1 contains reduction identity
19294 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19295 match(Set dst (AddReductionVF src1 src2));
19296 match(Set dst (MulReductionVF src1 src2));
19297 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19298 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19299 ins_encode %{
19300 int opcode = this->ideal_Opcode();
19301 int vlen = Matcher::vector_length(this, $src2);
19302 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19303 %}
19304 ins_pipe( pipe_slow );
19305 %}
19306
19307 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19308 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19309 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19310 // src1 contains reduction identity
19311 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19312 match(Set dst (AddReductionVF src1 src2));
19313 match(Set dst (MulReductionVF src1 src2));
19314 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19315 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19316 ins_encode %{
19317 int opcode = this->ideal_Opcode();
19318 int vlen = Matcher::vector_length(this, $src2);
19319 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19320 %}
19321 ins_pipe( pipe_slow );
19322 %}
19323
19324 // =======================Double Reduction==========================================
19325
19326 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19327 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19328 match(Set dst (AddReductionVD dst src));
19329 match(Set dst (MulReductionVD dst src));
19330 effect(TEMP dst, TEMP vtmp);
19331 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19332 ins_encode %{
19333 int opcode = this->ideal_Opcode();
19334 int vlen = Matcher::vector_length(this, $src);
19335 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19336 %}
19337 ins_pipe( pipe_slow );
19338 %}
19339
19340 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19341 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19342 match(Set dst (AddReductionVD dst src));
19343 match(Set dst (MulReductionVD dst src));
19344 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19345 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19346 ins_encode %{
19347 int opcode = this->ideal_Opcode();
19348 int vlen = Matcher::vector_length(this, $src);
19349 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19350 %}
19351 ins_pipe( pipe_slow );
19352 %}
19353
19354 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19355 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19356 match(Set dst (AddReductionVD dst src));
19357 match(Set dst (MulReductionVD dst src));
19358 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19359 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19360 ins_encode %{
19361 int opcode = this->ideal_Opcode();
19362 int vlen = Matcher::vector_length(this, $src);
19363 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19364 %}
19365 ins_pipe( pipe_slow );
19366 %}
19367
19368 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19369 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19370 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19371 // src1 contains reduction identity
19372 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19373 match(Set dst (AddReductionVD src1 src2));
19374 match(Set dst (MulReductionVD src1 src2));
19375 effect(TEMP dst);
19376 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19377 ins_encode %{
19378 int opcode = this->ideal_Opcode();
19379 int vlen = Matcher::vector_length(this, $src2);
19380 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19381 %}
19382 ins_pipe( pipe_slow );
19383 %}
19384
19385 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19386 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19387 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19388 // src1 contains reduction identity
19389 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19390 match(Set dst (AddReductionVD src1 src2));
19391 match(Set dst (MulReductionVD src1 src2));
19392 effect(TEMP dst, TEMP vtmp);
19393 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19394 ins_encode %{
19395 int opcode = this->ideal_Opcode();
19396 int vlen = Matcher::vector_length(this, $src2);
19397 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19398 %}
19399 ins_pipe( pipe_slow );
19400 %}
19401
19402 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19403 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19404 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19405 // src1 contains reduction identity
19406 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19407 match(Set dst (AddReductionVD src1 src2));
19408 match(Set dst (MulReductionVD src1 src2));
19409 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19410 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19411 ins_encode %{
19412 int opcode = this->ideal_Opcode();
19413 int vlen = Matcher::vector_length(this, $src2);
19414 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19415 %}
19416 ins_pipe( pipe_slow );
19417 %}
19418
19419 // =======================Byte Reduction==========================================
19420
19421 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19422 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19423 match(Set dst (AddReductionVI src1 src2));
19424 match(Set dst (AndReductionV src1 src2));
19425 match(Set dst ( OrReductionV src1 src2));
19426 match(Set dst (XorReductionV src1 src2));
19427 match(Set dst (MinReductionV src1 src2));
19428 match(Set dst (MaxReductionV src1 src2));
19429 effect(TEMP vtmp1, TEMP vtmp2);
19430 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19431 ins_encode %{
19432 int opcode = this->ideal_Opcode();
19433 int vlen = Matcher::vector_length(this, $src2);
19434 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19435 %}
19436 ins_pipe( pipe_slow );
19437 %}
19438
19439 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19440 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19441 match(Set dst (AddReductionVI src1 src2));
19442 match(Set dst (AndReductionV src1 src2));
19443 match(Set dst ( OrReductionV src1 src2));
19444 match(Set dst (XorReductionV src1 src2));
19445 match(Set dst (MinReductionV src1 src2));
19446 match(Set dst (MaxReductionV src1 src2));
19447 effect(TEMP vtmp1, TEMP vtmp2);
19448 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449 ins_encode %{
19450 int opcode = this->ideal_Opcode();
19451 int vlen = Matcher::vector_length(this, $src2);
19452 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453 %}
19454 ins_pipe( pipe_slow );
19455 %}
19456
19457 // =======================Short Reduction==========================================
19458
19459 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19460 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19461 match(Set dst (AddReductionVI src1 src2));
19462 match(Set dst (MulReductionVI src1 src2));
19463 match(Set dst (AndReductionV src1 src2));
19464 match(Set dst ( OrReductionV src1 src2));
19465 match(Set dst (XorReductionV src1 src2));
19466 match(Set dst (MinReductionV src1 src2));
19467 match(Set dst (MaxReductionV src1 src2));
19468 effect(TEMP vtmp1, TEMP vtmp2);
19469 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19470 ins_encode %{
19471 int opcode = this->ideal_Opcode();
19472 int vlen = Matcher::vector_length(this, $src2);
19473 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19474 %}
19475 ins_pipe( pipe_slow );
19476 %}
19477
19478 // =======================Mul Reduction==========================================
19479
19480 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19481 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19482 Matcher::vector_length(n->in(2)) <= 32); // src2
19483 match(Set dst (MulReductionVI src1 src2));
19484 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19485 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19486 ins_encode %{
19487 int opcode = this->ideal_Opcode();
19488 int vlen = Matcher::vector_length(this, $src2);
19489 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19490 %}
19491 ins_pipe( pipe_slow );
19492 %}
19493
19494 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19496 Matcher::vector_length(n->in(2)) == 64); // src2
19497 match(Set dst (MulReductionVI src1 src2));
19498 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19499 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19500 ins_encode %{
19501 int opcode = this->ideal_Opcode();
19502 int vlen = Matcher::vector_length(this, $src2);
19503 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19504 %}
19505 ins_pipe( pipe_slow );
19506 %}
19507
19508 //--------------------Min/Max Float Reduction --------------------
19509 // Float Min Reduction
19510 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19511 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19512 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19513 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19514 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19515 Matcher::vector_length(n->in(2)) == 2);
19516 match(Set dst (MinReductionV src1 src2));
19517 match(Set dst (MaxReductionV src1 src2));
19518 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19519 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19520 ins_encode %{
19521 assert(UseAVX > 0, "sanity");
19522
19523 int opcode = this->ideal_Opcode();
19524 int vlen = Matcher::vector_length(this, $src2);
19525 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19526 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19527 %}
19528 ins_pipe( pipe_slow );
19529 %}
19530
19531 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19532 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19533 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19534 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19535 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19536 Matcher::vector_length(n->in(2)) >= 4);
19537 match(Set dst (MinReductionV src1 src2));
19538 match(Set dst (MaxReductionV src1 src2));
19539 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19540 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19541 ins_encode %{
19542 assert(UseAVX > 0, "sanity");
19543
19544 int opcode = this->ideal_Opcode();
19545 int vlen = Matcher::vector_length(this, $src2);
19546 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19547 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19548 %}
19549 ins_pipe( pipe_slow );
19550 %}
19551
19552 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19553 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19554 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19555 Matcher::vector_length(n->in(2)) == 2);
19556 match(Set dst (MinReductionV dst src));
19557 match(Set dst (MaxReductionV dst src));
19558 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19559 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19560 ins_encode %{
19561 assert(UseAVX > 0, "sanity");
19562
19563 int opcode = this->ideal_Opcode();
19564 int vlen = Matcher::vector_length(this, $src);
19565 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19566 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19567 %}
19568 ins_pipe( pipe_slow );
19569 %}
19570
19571
19572 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19573 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19574 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19575 Matcher::vector_length(n->in(2)) >= 4);
19576 match(Set dst (MinReductionV dst src));
19577 match(Set dst (MaxReductionV dst src));
19578 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19579 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19580 ins_encode %{
19581 assert(UseAVX > 0, "sanity");
19582
19583 int opcode = this->ideal_Opcode();
19584 int vlen = Matcher::vector_length(this, $src);
19585 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19586 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19587 %}
19588 ins_pipe( pipe_slow );
19589 %}
19590
19591 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19592 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19593 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19594 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19595 Matcher::vector_length(n->in(2)) == 2);
19596 match(Set dst (MinReductionV src1 src2));
19597 match(Set dst (MaxReductionV src1 src2));
19598 effect(TEMP dst, TEMP xtmp1);
19599 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19600 ins_encode %{
19601 int opcode = this->ideal_Opcode();
19602 int vlen = Matcher::vector_length(this, $src2);
19603 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19604 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19605 %}
19606 ins_pipe( pipe_slow );
19607 %}
19608
19609 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19610 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19611 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19612 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19613 Matcher::vector_length(n->in(2)) >= 4);
19614 match(Set dst (MinReductionV src1 src2));
19615 match(Set dst (MaxReductionV src1 src2));
19616 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19617 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19618 ins_encode %{
19619 int opcode = this->ideal_Opcode();
19620 int vlen = Matcher::vector_length(this, $src2);
19621 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19622 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19623 %}
19624 ins_pipe( pipe_slow );
19625 %}
19626
19627 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19628 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19629 Matcher::vector_length(n->in(2)) == 2);
19630 match(Set dst (MinReductionV dst src));
19631 match(Set dst (MaxReductionV dst src));
19632 effect(TEMP dst, TEMP xtmp1);
19633 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19634 ins_encode %{
19635 int opcode = this->ideal_Opcode();
19636 int vlen = Matcher::vector_length(this, $src);
19637 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19638 $xtmp1$$XMMRegister);
19639 %}
19640 ins_pipe( pipe_slow );
19641 %}
19642
19643 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19644 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19645 Matcher::vector_length(n->in(2)) >= 4);
19646 match(Set dst (MinReductionV dst src));
19647 match(Set dst (MaxReductionV dst src));
19648 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19649 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19650 ins_encode %{
19651 int opcode = this->ideal_Opcode();
19652 int vlen = Matcher::vector_length(this, $src);
19653 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19654 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19655 %}
19656 ins_pipe( pipe_slow );
19657 %}
19658
19659 //--------------------Min Double Reduction --------------------
19660 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19661 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19662 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19663 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19664 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19665 Matcher::vector_length(n->in(2)) == 2);
19666 match(Set dst (MinReductionV src1 src2));
19667 match(Set dst (MaxReductionV src1 src2));
19668 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19669 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19670 ins_encode %{
19671 assert(UseAVX > 0, "sanity");
19672
19673 int opcode = this->ideal_Opcode();
19674 int vlen = Matcher::vector_length(this, $src2);
19675 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19677 %}
19678 ins_pipe( pipe_slow );
19679 %}
19680
19681 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19682 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19683 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19684 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19685 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19686 Matcher::vector_length(n->in(2)) >= 4);
19687 match(Set dst (MinReductionV src1 src2));
19688 match(Set dst (MaxReductionV src1 src2));
19689 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19690 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19691 ins_encode %{
19692 assert(UseAVX > 0, "sanity");
19693
19694 int opcode = this->ideal_Opcode();
19695 int vlen = Matcher::vector_length(this, $src2);
19696 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19697 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19698 %}
19699 ins_pipe( pipe_slow );
19700 %}
19701
19702
19703 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19704 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19705 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19706 Matcher::vector_length(n->in(2)) == 2);
19707 match(Set dst (MinReductionV dst src));
19708 match(Set dst (MaxReductionV dst src));
19709 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19710 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19711 ins_encode %{
19712 assert(UseAVX > 0, "sanity");
19713
19714 int opcode = this->ideal_Opcode();
19715 int vlen = Matcher::vector_length(this, $src);
19716 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19717 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19718 %}
19719 ins_pipe( pipe_slow );
19720 %}
19721
19722 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19723 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19724 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19725 Matcher::vector_length(n->in(2)) >= 4);
19726 match(Set dst (MinReductionV dst src));
19727 match(Set dst (MaxReductionV dst src));
19728 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19729 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19730 ins_encode %{
19731 assert(UseAVX > 0, "sanity");
19732
19733 int opcode = this->ideal_Opcode();
19734 int vlen = Matcher::vector_length(this, $src);
19735 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19736 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19737 %}
19738 ins_pipe( pipe_slow );
19739 %}
19740
19741 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19742 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19743 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19744 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19745 Matcher::vector_length(n->in(2)) == 2);
19746 match(Set dst (MinReductionV src1 src2));
19747 match(Set dst (MaxReductionV src1 src2));
19748 effect(TEMP dst, TEMP xtmp1);
19749 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19750 ins_encode %{
19751 int opcode = this->ideal_Opcode();
19752 int vlen = Matcher::vector_length(this, $src2);
19753 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19754 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19755 %}
19756 ins_pipe( pipe_slow );
19757 %}
19758
19759 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19760 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19761 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19762 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19763 Matcher::vector_length(n->in(2)) >= 4);
19764 match(Set dst (MinReductionV src1 src2));
19765 match(Set dst (MaxReductionV src1 src2));
19766 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19767 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19768 ins_encode %{
19769 int opcode = this->ideal_Opcode();
19770 int vlen = Matcher::vector_length(this, $src2);
19771 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19772 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19773 %}
19774 ins_pipe( pipe_slow );
19775 %}
19776
19777
19778 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19779 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19780 Matcher::vector_length(n->in(2)) == 2);
19781 match(Set dst (MinReductionV dst src));
19782 match(Set dst (MaxReductionV dst src));
19783 effect(TEMP dst, TEMP xtmp1);
19784 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19785 ins_encode %{
19786 int opcode = this->ideal_Opcode();
19787 int vlen = Matcher::vector_length(this, $src);
19788 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19789 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19790 %}
19791 ins_pipe( pipe_slow );
19792 %}
19793
19794 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19795 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19796 Matcher::vector_length(n->in(2)) >= 4);
19797 match(Set dst (MinReductionV dst src));
19798 match(Set dst (MaxReductionV dst src));
19799 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19800 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19801 ins_encode %{
19802 int opcode = this->ideal_Opcode();
19803 int vlen = Matcher::vector_length(this, $src);
19804 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19805 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19806 %}
19807 ins_pipe( pipe_slow );
19808 %}
19809
19810 // ====================VECTOR ARITHMETIC=======================================
19811
19812 // --------------------------------- ADD --------------------------------------
19813
19814 // Bytes vector add
19815 instruct vaddB(vec dst, vec src) %{
19816 predicate(UseAVX == 0);
19817 match(Set dst (AddVB dst src));
19818 format %{ "paddb $dst,$src\t! add packedB" %}
19819 ins_encode %{
19820 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19821 %}
19822 ins_pipe( pipe_slow );
19823 %}
19824
19825 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19826 predicate(UseAVX > 0);
19827 match(Set dst (AddVB src1 src2));
19828 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19829 ins_encode %{
19830 int vlen_enc = vector_length_encoding(this);
19831 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19832 %}
19833 ins_pipe( pipe_slow );
19834 %}
19835
19836 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19837 predicate((UseAVX > 0) &&
19838 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19839 match(Set dst (AddVB src (LoadVector mem)));
19840 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19841 ins_encode %{
19842 int vlen_enc = vector_length_encoding(this);
19843 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19844 %}
19845 ins_pipe( pipe_slow );
19846 %}
19847
19848 // Shorts/Chars vector add
19849 instruct vaddS(vec dst, vec src) %{
19850 predicate(UseAVX == 0);
19851 match(Set dst (AddVS dst src));
19852 format %{ "paddw $dst,$src\t! add packedS" %}
19853 ins_encode %{
19854 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19855 %}
19856 ins_pipe( pipe_slow );
19857 %}
19858
19859 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19860 predicate(UseAVX > 0);
19861 match(Set dst (AddVS src1 src2));
19862 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19863 ins_encode %{
19864 int vlen_enc = vector_length_encoding(this);
19865 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19866 %}
19867 ins_pipe( pipe_slow );
19868 %}
19869
19870 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19871 predicate((UseAVX > 0) &&
19872 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19873 match(Set dst (AddVS src (LoadVector mem)));
19874 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19875 ins_encode %{
19876 int vlen_enc = vector_length_encoding(this);
19877 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19878 %}
19879 ins_pipe( pipe_slow );
19880 %}
19881
19882 // Integers vector add
19883 instruct vaddI(vec dst, vec src) %{
19884 predicate(UseAVX == 0);
19885 match(Set dst (AddVI dst src));
19886 format %{ "paddd $dst,$src\t! add packedI" %}
19887 ins_encode %{
19888 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19889 %}
19890 ins_pipe( pipe_slow );
19891 %}
19892
19893 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19894 predicate(UseAVX > 0);
19895 match(Set dst (AddVI src1 src2));
19896 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19897 ins_encode %{
19898 int vlen_enc = vector_length_encoding(this);
19899 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19900 %}
19901 ins_pipe( pipe_slow );
19902 %}
19903
19904
19905 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19906 predicate((UseAVX > 0) &&
19907 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19908 match(Set dst (AddVI src (LoadVector mem)));
19909 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19910 ins_encode %{
19911 int vlen_enc = vector_length_encoding(this);
19912 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19913 %}
19914 ins_pipe( pipe_slow );
19915 %}
19916
19917 // Longs vector add
19918 instruct vaddL(vec dst, vec src) %{
19919 predicate(UseAVX == 0);
19920 match(Set dst (AddVL dst src));
19921 format %{ "paddq $dst,$src\t! add packedL" %}
19922 ins_encode %{
19923 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19924 %}
19925 ins_pipe( pipe_slow );
19926 %}
19927
19928 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19929 predicate(UseAVX > 0);
19930 match(Set dst (AddVL src1 src2));
19931 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19932 ins_encode %{
19933 int vlen_enc = vector_length_encoding(this);
19934 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19935 %}
19936 ins_pipe( pipe_slow );
19937 %}
19938
19939 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19940 predicate((UseAVX > 0) &&
19941 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19942 match(Set dst (AddVL src (LoadVector mem)));
19943 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19944 ins_encode %{
19945 int vlen_enc = vector_length_encoding(this);
19946 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19947 %}
19948 ins_pipe( pipe_slow );
19949 %}
19950
19951 // Floats vector add
19952 instruct vaddF(vec dst, vec src) %{
19953 predicate(UseAVX == 0);
19954 match(Set dst (AddVF dst src));
19955 format %{ "addps $dst,$src\t! add packedF" %}
19956 ins_encode %{
19957 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19958 %}
19959 ins_pipe( pipe_slow );
19960 %}
19961
19962 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19963 predicate(UseAVX > 0);
19964 match(Set dst (AddVF src1 src2));
19965 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19966 ins_encode %{
19967 int vlen_enc = vector_length_encoding(this);
19968 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19969 %}
19970 ins_pipe( pipe_slow );
19971 %}
19972
19973 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19974 predicate((UseAVX > 0) &&
19975 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19976 match(Set dst (AddVF src (LoadVector mem)));
19977 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19978 ins_encode %{
19979 int vlen_enc = vector_length_encoding(this);
19980 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19981 %}
19982 ins_pipe( pipe_slow );
19983 %}
19984
19985 // Doubles vector add
19986 instruct vaddD(vec dst, vec src) %{
19987 predicate(UseAVX == 0);
19988 match(Set dst (AddVD dst src));
19989 format %{ "addpd $dst,$src\t! add packedD" %}
19990 ins_encode %{
19991 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19992 %}
19993 ins_pipe( pipe_slow );
19994 %}
19995
19996 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19997 predicate(UseAVX > 0);
19998 match(Set dst (AddVD src1 src2));
19999 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20000 ins_encode %{
20001 int vlen_enc = vector_length_encoding(this);
20002 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20003 %}
20004 ins_pipe( pipe_slow );
20005 %}
20006
20007 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20008 predicate((UseAVX > 0) &&
20009 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20010 match(Set dst (AddVD src (LoadVector mem)));
20011 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20012 ins_encode %{
20013 int vlen_enc = vector_length_encoding(this);
20014 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20015 %}
20016 ins_pipe( pipe_slow );
20017 %}
20018
20019 // --------------------------------- SUB --------------------------------------
20020
20021 // Bytes vector sub
20022 instruct vsubB(vec dst, vec src) %{
20023 predicate(UseAVX == 0);
20024 match(Set dst (SubVB dst src));
20025 format %{ "psubb $dst,$src\t! sub packedB" %}
20026 ins_encode %{
20027 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20028 %}
20029 ins_pipe( pipe_slow );
20030 %}
20031
20032 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20033 predicate(UseAVX > 0);
20034 match(Set dst (SubVB src1 src2));
20035 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20036 ins_encode %{
20037 int vlen_enc = vector_length_encoding(this);
20038 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20039 %}
20040 ins_pipe( pipe_slow );
20041 %}
20042
20043 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20044 predicate((UseAVX > 0) &&
20045 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20046 match(Set dst (SubVB src (LoadVector mem)));
20047 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20048 ins_encode %{
20049 int vlen_enc = vector_length_encoding(this);
20050 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20051 %}
20052 ins_pipe( pipe_slow );
20053 %}
20054
20055 // Shorts/Chars vector sub
20056 instruct vsubS(vec dst, vec src) %{
20057 predicate(UseAVX == 0);
20058 match(Set dst (SubVS dst src));
20059 format %{ "psubw $dst,$src\t! sub packedS" %}
20060 ins_encode %{
20061 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20062 %}
20063 ins_pipe( pipe_slow );
20064 %}
20065
20066
20067 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20068 predicate(UseAVX > 0);
20069 match(Set dst (SubVS src1 src2));
20070 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20071 ins_encode %{
20072 int vlen_enc = vector_length_encoding(this);
20073 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20074 %}
20075 ins_pipe( pipe_slow );
20076 %}
20077
20078 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20079 predicate((UseAVX > 0) &&
20080 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20081 match(Set dst (SubVS src (LoadVector mem)));
20082 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20083 ins_encode %{
20084 int vlen_enc = vector_length_encoding(this);
20085 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20086 %}
20087 ins_pipe( pipe_slow );
20088 %}
20089
20090 // Integers vector sub
20091 instruct vsubI(vec dst, vec src) %{
20092 predicate(UseAVX == 0);
20093 match(Set dst (SubVI dst src));
20094 format %{ "psubd $dst,$src\t! sub packedI" %}
20095 ins_encode %{
20096 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20097 %}
20098 ins_pipe( pipe_slow );
20099 %}
20100
20101 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20102 predicate(UseAVX > 0);
20103 match(Set dst (SubVI src1 src2));
20104 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20105 ins_encode %{
20106 int vlen_enc = vector_length_encoding(this);
20107 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20108 %}
20109 ins_pipe( pipe_slow );
20110 %}
20111
20112 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20113 predicate((UseAVX > 0) &&
20114 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20115 match(Set dst (SubVI src (LoadVector mem)));
20116 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20117 ins_encode %{
20118 int vlen_enc = vector_length_encoding(this);
20119 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20120 %}
20121 ins_pipe( pipe_slow );
20122 %}
20123
20124 // Longs vector sub
20125 instruct vsubL(vec dst, vec src) %{
20126 predicate(UseAVX == 0);
20127 match(Set dst (SubVL dst src));
20128 format %{ "psubq $dst,$src\t! sub packedL" %}
20129 ins_encode %{
20130 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20131 %}
20132 ins_pipe( pipe_slow );
20133 %}
20134
20135 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20136 predicate(UseAVX > 0);
20137 match(Set dst (SubVL src1 src2));
20138 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20139 ins_encode %{
20140 int vlen_enc = vector_length_encoding(this);
20141 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20142 %}
20143 ins_pipe( pipe_slow );
20144 %}
20145
20146
20147 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20148 predicate((UseAVX > 0) &&
20149 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20150 match(Set dst (SubVL src (LoadVector mem)));
20151 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20152 ins_encode %{
20153 int vlen_enc = vector_length_encoding(this);
20154 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20155 %}
20156 ins_pipe( pipe_slow );
20157 %}
20158
20159 // Floats vector sub
20160 instruct vsubF(vec dst, vec src) %{
20161 predicate(UseAVX == 0);
20162 match(Set dst (SubVF dst src));
20163 format %{ "subps $dst,$src\t! sub packedF" %}
20164 ins_encode %{
20165 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20166 %}
20167 ins_pipe( pipe_slow );
20168 %}
20169
20170 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20171 predicate(UseAVX > 0);
20172 match(Set dst (SubVF src1 src2));
20173 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20174 ins_encode %{
20175 int vlen_enc = vector_length_encoding(this);
20176 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20177 %}
20178 ins_pipe( pipe_slow );
20179 %}
20180
20181 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20182 predicate((UseAVX > 0) &&
20183 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20184 match(Set dst (SubVF src (LoadVector mem)));
20185 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20186 ins_encode %{
20187 int vlen_enc = vector_length_encoding(this);
20188 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20189 %}
20190 ins_pipe( pipe_slow );
20191 %}
20192
20193 // Doubles vector sub
20194 instruct vsubD(vec dst, vec src) %{
20195 predicate(UseAVX == 0);
20196 match(Set dst (SubVD dst src));
20197 format %{ "subpd $dst,$src\t! sub packedD" %}
20198 ins_encode %{
20199 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20200 %}
20201 ins_pipe( pipe_slow );
20202 %}
20203
20204 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20205 predicate(UseAVX > 0);
20206 match(Set dst (SubVD src1 src2));
20207 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20208 ins_encode %{
20209 int vlen_enc = vector_length_encoding(this);
20210 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20211 %}
20212 ins_pipe( pipe_slow );
20213 %}
20214
20215 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20216 predicate((UseAVX > 0) &&
20217 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20218 match(Set dst (SubVD src (LoadVector mem)));
20219 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20220 ins_encode %{
20221 int vlen_enc = vector_length_encoding(this);
20222 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20223 %}
20224 ins_pipe( pipe_slow );
20225 %}
20226
20227 // --------------------------------- MUL --------------------------------------
20228
20229 // Byte vector mul
20230 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20231 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20232 match(Set dst (MulVB src1 src2));
20233 effect(TEMP dst, TEMP xtmp);
20234 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20235 ins_encode %{
20236 assert(UseSSE > 3, "required");
20237 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20238 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20239 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20240 __ psllw($dst$$XMMRegister, 8);
20241 __ psrlw($dst$$XMMRegister, 8);
20242 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20243 %}
20244 ins_pipe( pipe_slow );
20245 %}
20246
20247 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20248 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20249 match(Set dst (MulVB src1 src2));
20250 effect(TEMP dst, TEMP xtmp);
20251 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20252 ins_encode %{
20253 assert(UseSSE > 3, "required");
20254 // Odd-index elements
20255 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20256 __ psrlw($dst$$XMMRegister, 8);
20257 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20258 __ psrlw($xtmp$$XMMRegister, 8);
20259 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20260 __ psllw($dst$$XMMRegister, 8);
20261 // Even-index elements
20262 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20263 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20264 __ psllw($xtmp$$XMMRegister, 8);
20265 __ psrlw($xtmp$$XMMRegister, 8);
20266 // Combine
20267 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20268 %}
20269 ins_pipe( pipe_slow );
20270 %}
20271
20272 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20273 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20274 match(Set dst (MulVB src1 src2));
20275 effect(TEMP xtmp1, TEMP xtmp2);
20276 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20277 ins_encode %{
20278 int vlen_enc = vector_length_encoding(this);
20279 // Odd-index elements
20280 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20281 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20282 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20283 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20284 // Even-index elements
20285 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20286 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20287 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20288 // Combine
20289 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20290 %}
20291 ins_pipe( pipe_slow );
20292 %}
20293
20294 // Shorts/Chars vector mul
20295 instruct vmulS(vec dst, vec src) %{
20296 predicate(UseAVX == 0);
20297 match(Set dst (MulVS dst src));
20298 format %{ "pmullw $dst,$src\t! mul packedS" %}
20299 ins_encode %{
20300 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20301 %}
20302 ins_pipe( pipe_slow );
20303 %}
20304
20305 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20306 predicate(UseAVX > 0);
20307 match(Set dst (MulVS src1 src2));
20308 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20309 ins_encode %{
20310 int vlen_enc = vector_length_encoding(this);
20311 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20312 %}
20313 ins_pipe( pipe_slow );
20314 %}
20315
20316 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20317 predicate((UseAVX > 0) &&
20318 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20319 match(Set dst (MulVS src (LoadVector mem)));
20320 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20321 ins_encode %{
20322 int vlen_enc = vector_length_encoding(this);
20323 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20324 %}
20325 ins_pipe( pipe_slow );
20326 %}
20327
20328 // Integers vector mul
20329 instruct vmulI(vec dst, vec src) %{
20330 predicate(UseAVX == 0);
20331 match(Set dst (MulVI dst src));
20332 format %{ "pmulld $dst,$src\t! mul packedI" %}
20333 ins_encode %{
20334 assert(UseSSE > 3, "required");
20335 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20336 %}
20337 ins_pipe( pipe_slow );
20338 %}
20339
20340 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20341 predicate(UseAVX > 0);
20342 match(Set dst (MulVI src1 src2));
20343 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20344 ins_encode %{
20345 int vlen_enc = vector_length_encoding(this);
20346 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20347 %}
20348 ins_pipe( pipe_slow );
20349 %}
20350
20351 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20352 predicate((UseAVX > 0) &&
20353 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20354 match(Set dst (MulVI src (LoadVector mem)));
20355 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20356 ins_encode %{
20357 int vlen_enc = vector_length_encoding(this);
20358 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20359 %}
20360 ins_pipe( pipe_slow );
20361 %}
20362
20363 // Longs vector mul
20364 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20365 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20366 VM_Version::supports_avx512dq()) ||
20367 VM_Version::supports_avx512vldq());
20368 match(Set dst (MulVL src1 src2));
20369 ins_cost(500);
20370 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20371 ins_encode %{
20372 assert(UseAVX > 2, "required");
20373 int vlen_enc = vector_length_encoding(this);
20374 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20375 %}
20376 ins_pipe( pipe_slow );
20377 %}
20378
20379 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20380 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20381 VM_Version::supports_avx512dq()) ||
20382 (Matcher::vector_length_in_bytes(n) > 8 &&
20383 VM_Version::supports_avx512vldq()));
20384 match(Set dst (MulVL src (LoadVector mem)));
20385 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20386 ins_cost(500);
20387 ins_encode %{
20388 assert(UseAVX > 2, "required");
20389 int vlen_enc = vector_length_encoding(this);
20390 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20391 %}
20392 ins_pipe( pipe_slow );
20393 %}
20394
20395 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20396 predicate(UseAVX == 0);
20397 match(Set dst (MulVL src1 src2));
20398 ins_cost(500);
20399 effect(TEMP dst, TEMP xtmp);
20400 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20401 ins_encode %{
20402 assert(VM_Version::supports_sse4_1(), "required");
20403 // Get the lo-hi products, only the lower 32 bits is in concerns
20404 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20405 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20406 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20407 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20408 __ psllq($dst$$XMMRegister, 32);
20409 // Get the lo-lo products
20410 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20411 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20412 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20413 %}
20414 ins_pipe( pipe_slow );
20415 %}
20416
20417 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20418 predicate(UseAVX > 0 &&
20419 ((Matcher::vector_length_in_bytes(n) == 64 &&
20420 !VM_Version::supports_avx512dq()) ||
20421 (Matcher::vector_length_in_bytes(n) < 64 &&
20422 !VM_Version::supports_avx512vldq())));
20423 match(Set dst (MulVL src1 src2));
20424 effect(TEMP xtmp1, TEMP xtmp2);
20425 ins_cost(500);
20426 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20427 ins_encode %{
20428 int vlen_enc = vector_length_encoding(this);
20429 // Get the lo-hi products, only the lower 32 bits is in concerns
20430 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20431 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20432 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20433 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20434 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20435 // Get the lo-lo products
20436 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20437 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20438 %}
20439 ins_pipe( pipe_slow );
20440 %}
20441
20442 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20443 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20444 match(Set dst (MulVL src1 src2));
20445 ins_cost(100);
20446 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20447 ins_encode %{
20448 int vlen_enc = vector_length_encoding(this);
20449 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20450 %}
20451 ins_pipe( pipe_slow );
20452 %}
20453
20454 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20455 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20456 match(Set dst (MulVL src1 src2));
20457 ins_cost(100);
20458 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20459 ins_encode %{
20460 int vlen_enc = vector_length_encoding(this);
20461 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20462 %}
20463 ins_pipe( pipe_slow );
20464 %}
20465
20466 // Floats vector mul
20467 instruct vmulF(vec dst, vec src) %{
20468 predicate(UseAVX == 0);
20469 match(Set dst (MulVF dst src));
20470 format %{ "mulps $dst,$src\t! mul packedF" %}
20471 ins_encode %{
20472 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20473 %}
20474 ins_pipe( pipe_slow );
20475 %}
20476
20477 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20478 predicate(UseAVX > 0);
20479 match(Set dst (MulVF src1 src2));
20480 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20481 ins_encode %{
20482 int vlen_enc = vector_length_encoding(this);
20483 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20484 %}
20485 ins_pipe( pipe_slow );
20486 %}
20487
20488 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20489 predicate((UseAVX > 0) &&
20490 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20491 match(Set dst (MulVF src (LoadVector mem)));
20492 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20493 ins_encode %{
20494 int vlen_enc = vector_length_encoding(this);
20495 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20496 %}
20497 ins_pipe( pipe_slow );
20498 %}
20499
20500 // Doubles vector mul
20501 instruct vmulD(vec dst, vec src) %{
20502 predicate(UseAVX == 0);
20503 match(Set dst (MulVD dst src));
20504 format %{ "mulpd $dst,$src\t! mul packedD" %}
20505 ins_encode %{
20506 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20507 %}
20508 ins_pipe( pipe_slow );
20509 %}
20510
20511 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20512 predicate(UseAVX > 0);
20513 match(Set dst (MulVD src1 src2));
20514 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20515 ins_encode %{
20516 int vlen_enc = vector_length_encoding(this);
20517 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20518 %}
20519 ins_pipe( pipe_slow );
20520 %}
20521
20522 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20523 predicate((UseAVX > 0) &&
20524 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20525 match(Set dst (MulVD src (LoadVector mem)));
20526 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20527 ins_encode %{
20528 int vlen_enc = vector_length_encoding(this);
20529 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20530 %}
20531 ins_pipe( pipe_slow );
20532 %}
20533
20534 // --------------------------------- DIV --------------------------------------
20535
20536 // Floats vector div
20537 instruct vdivF(vec dst, vec src) %{
20538 predicate(UseAVX == 0);
20539 match(Set dst (DivVF dst src));
20540 format %{ "divps $dst,$src\t! div packedF" %}
20541 ins_encode %{
20542 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20543 %}
20544 ins_pipe( pipe_slow );
20545 %}
20546
20547 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20548 predicate(UseAVX > 0);
20549 match(Set dst (DivVF src1 src2));
20550 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20551 ins_encode %{
20552 int vlen_enc = vector_length_encoding(this);
20553 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20554 %}
20555 ins_pipe( pipe_slow );
20556 %}
20557
20558 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20559 predicate((UseAVX > 0) &&
20560 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20561 match(Set dst (DivVF src (LoadVector mem)));
20562 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20563 ins_encode %{
20564 int vlen_enc = vector_length_encoding(this);
20565 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20566 %}
20567 ins_pipe( pipe_slow );
20568 %}
20569
20570 // Doubles vector div
20571 instruct vdivD(vec dst, vec src) %{
20572 predicate(UseAVX == 0);
20573 match(Set dst (DivVD dst src));
20574 format %{ "divpd $dst,$src\t! div packedD" %}
20575 ins_encode %{
20576 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20577 %}
20578 ins_pipe( pipe_slow );
20579 %}
20580
20581 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20582 predicate(UseAVX > 0);
20583 match(Set dst (DivVD src1 src2));
20584 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20585 ins_encode %{
20586 int vlen_enc = vector_length_encoding(this);
20587 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20593 predicate((UseAVX > 0) &&
20594 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20595 match(Set dst (DivVD src (LoadVector mem)));
20596 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20597 ins_encode %{
20598 int vlen_enc = vector_length_encoding(this);
20599 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20600 %}
20601 ins_pipe( pipe_slow );
20602 %}
20603
20604 // ------------------------------ MinMax ---------------------------------------
20605
20606 // Byte, Short, Int vector Min/Max
20607 instruct minmax_reg_sse(vec dst, vec src) %{
20608 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20609 UseAVX == 0);
20610 match(Set dst (MinV dst src));
20611 match(Set dst (MaxV dst src));
20612 format %{ "vector_minmax $dst,$src\t! " %}
20613 ins_encode %{
20614 assert(UseSSE >= 4, "required");
20615
20616 int opcode = this->ideal_Opcode();
20617 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20618 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20619 %}
20620 ins_pipe( pipe_slow );
20621 %}
20622
20623 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20624 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20625 UseAVX > 0);
20626 match(Set dst (MinV src1 src2));
20627 match(Set dst (MaxV src1 src2));
20628 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20629 ins_encode %{
20630 int opcode = this->ideal_Opcode();
20631 int vlen_enc = vector_length_encoding(this);
20632 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20633
20634 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20635 %}
20636 ins_pipe( pipe_slow );
20637 %}
20638
20639 // Long vector Min/Max
20640 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20641 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20642 UseAVX == 0);
20643 match(Set dst (MinV dst src));
20644 match(Set dst (MaxV src dst));
20645 effect(TEMP dst, TEMP tmp);
20646 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20647 ins_encode %{
20648 assert(UseSSE >= 4, "required");
20649
20650 int opcode = this->ideal_Opcode();
20651 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20652 assert(elem_bt == T_LONG, "sanity");
20653
20654 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20655 %}
20656 ins_pipe( pipe_slow );
20657 %}
20658
20659 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20660 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20661 UseAVX > 0 && !VM_Version::supports_avx512vl());
20662 match(Set dst (MinV src1 src2));
20663 match(Set dst (MaxV src1 src2));
20664 effect(TEMP dst);
20665 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20666 ins_encode %{
20667 int vlen_enc = vector_length_encoding(this);
20668 int opcode = this->ideal_Opcode();
20669 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20670 assert(elem_bt == T_LONG, "sanity");
20671
20672 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20673 %}
20674 ins_pipe( pipe_slow );
20675 %}
20676
20677 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20678 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20679 Matcher::vector_element_basic_type(n) == T_LONG);
20680 match(Set dst (MinV src1 src2));
20681 match(Set dst (MaxV src1 src2));
20682 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20683 ins_encode %{
20684 assert(UseAVX > 2, "required");
20685
20686 int vlen_enc = vector_length_encoding(this);
20687 int opcode = this->ideal_Opcode();
20688 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20689 assert(elem_bt == T_LONG, "sanity");
20690
20691 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20692 %}
20693 ins_pipe( pipe_slow );
20694 %}
20695
20696 // Float/Double vector Min/Max
20697 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20698 predicate(VM_Version::supports_avx10_2() &&
20699 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20700 match(Set dst (MinV a b));
20701 match(Set dst (MaxV a b));
20702 format %{ "vector_minmaxFP $dst, $a, $b" %}
20703 ins_encode %{
20704 int vlen_enc = vector_length_encoding(this);
20705 int opcode = this->ideal_Opcode();
20706 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20707 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20708 %}
20709 ins_pipe( pipe_slow );
20710 %}
20711
20712 // Float/Double vector Min/Max
20713 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20714 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20715 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20716 UseAVX > 0);
20717 match(Set dst (MinV a b));
20718 match(Set dst (MaxV a b));
20719 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20720 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20721 ins_encode %{
20722 assert(UseAVX > 0, "required");
20723
20724 int opcode = this->ideal_Opcode();
20725 int vlen_enc = vector_length_encoding(this);
20726 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20727
20728 __ vminmax_fp(opcode, elem_bt,
20729 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20730 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20731 %}
20732 ins_pipe( pipe_slow );
20733 %}
20734
20735 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20736 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20737 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20738 match(Set dst (MinV a b));
20739 match(Set dst (MaxV a b));
20740 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20741 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20742 ins_encode %{
20743 assert(UseAVX > 2, "required");
20744
20745 int opcode = this->ideal_Opcode();
20746 int vlen_enc = vector_length_encoding(this);
20747 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20748
20749 __ evminmax_fp(opcode, elem_bt,
20750 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20751 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20752 %}
20753 ins_pipe( pipe_slow );
20754 %}
20755
20756 // ------------------------------ Unsigned vector Min/Max ----------------------
20757
20758 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20759 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20760 match(Set dst (UMinV a b));
20761 match(Set dst (UMaxV a b));
20762 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20763 ins_encode %{
20764 int opcode = this->ideal_Opcode();
20765 int vlen_enc = vector_length_encoding(this);
20766 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20767 assert(is_integral_type(elem_bt), "");
20768 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20769 %}
20770 ins_pipe( pipe_slow );
20771 %}
20772
20773 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20774 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20775 match(Set dst (UMinV a (LoadVector b)));
20776 match(Set dst (UMaxV a (LoadVector b)));
20777 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20778 ins_encode %{
20779 int opcode = this->ideal_Opcode();
20780 int vlen_enc = vector_length_encoding(this);
20781 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20782 assert(is_integral_type(elem_bt), "");
20783 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20784 %}
20785 ins_pipe( pipe_slow );
20786 %}
20787
20788 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20789 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20790 match(Set dst (UMinV a b));
20791 match(Set dst (UMaxV a b));
20792 effect(TEMP xtmp1, TEMP xtmp2);
20793 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20794 ins_encode %{
20795 int opcode = this->ideal_Opcode();
20796 int vlen_enc = vector_length_encoding(this);
20797 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20798 %}
20799 ins_pipe( pipe_slow );
20800 %}
20801
20802 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20803 match(Set dst (UMinV (Binary dst src2) mask));
20804 match(Set dst (UMaxV (Binary dst src2) mask));
20805 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20806 ins_encode %{
20807 int vlen_enc = vector_length_encoding(this);
20808 BasicType bt = Matcher::vector_element_basic_type(this);
20809 int opc = this->ideal_Opcode();
20810 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20811 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20812 %}
20813 ins_pipe( pipe_slow );
20814 %}
20815
20816 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20817 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20818 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20819 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20820 ins_encode %{
20821 int vlen_enc = vector_length_encoding(this);
20822 BasicType bt = Matcher::vector_element_basic_type(this);
20823 int opc = this->ideal_Opcode();
20824 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20825 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20826 %}
20827 ins_pipe( pipe_slow );
20828 %}
20829
20830 // --------------------------------- Signum/CopySign ---------------------------
20831
20832 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20833 match(Set dst (SignumF dst (Binary zero one)));
20834 effect(KILL cr);
20835 format %{ "signumF $dst, $dst" %}
20836 ins_encode %{
20837 int opcode = this->ideal_Opcode();
20838 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20839 %}
20840 ins_pipe( pipe_slow );
20841 %}
20842
20843 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20844 match(Set dst (SignumD dst (Binary zero one)));
20845 effect(KILL cr);
20846 format %{ "signumD $dst, $dst" %}
20847 ins_encode %{
20848 int opcode = this->ideal_Opcode();
20849 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20850 %}
20851 ins_pipe( pipe_slow );
20852 %}
20853
20854 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20855 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20856 match(Set dst (SignumVF src (Binary zero one)));
20857 match(Set dst (SignumVD src (Binary zero one)));
20858 effect(TEMP dst, TEMP xtmp1);
20859 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20860 ins_encode %{
20861 int opcode = this->ideal_Opcode();
20862 int vec_enc = vector_length_encoding(this);
20863 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20864 $xtmp1$$XMMRegister, vec_enc);
20865 %}
20866 ins_pipe( pipe_slow );
20867 %}
20868
20869 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20870 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20871 match(Set dst (SignumVF src (Binary zero one)));
20872 match(Set dst (SignumVD src (Binary zero one)));
20873 effect(TEMP dst, TEMP ktmp1);
20874 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20875 ins_encode %{
20876 int opcode = this->ideal_Opcode();
20877 int vec_enc = vector_length_encoding(this);
20878 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20879 $ktmp1$$KRegister, vec_enc);
20880 %}
20881 ins_pipe( pipe_slow );
20882 %}
20883
20884 // ---------------------------------------
20885 // For copySign use 0xE4 as writemask for vpternlog
20886 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20887 // C (xmm2) is set to 0x7FFFFFFF
20888 // Wherever xmm2 is 0, we want to pick from B (sign)
20889 // Wherever xmm2 is 1, we want to pick from A (src)
20890 //
20891 // A B C Result
20892 // 0 0 0 0
20893 // 0 0 1 0
20894 // 0 1 0 1
20895 // 0 1 1 0
20896 // 1 0 0 0
20897 // 1 0 1 1
20898 // 1 1 0 1
20899 // 1 1 1 1
20900 //
20901 // Result going from high bit to low bit is 0x11100100 = 0xe4
20902 // ---------------------------------------
20903
20904 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20905 match(Set dst (CopySignF dst src));
20906 effect(TEMP tmp1, TEMP tmp2);
20907 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20908 ins_encode %{
20909 __ movl($tmp2$$Register, 0x7FFFFFFF);
20910 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20911 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20912 %}
20913 ins_pipe( pipe_slow );
20914 %}
20915
20916 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20917 match(Set dst (CopySignD dst (Binary src zero)));
20918 ins_cost(100);
20919 effect(TEMP tmp1, TEMP tmp2);
20920 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20921 ins_encode %{
20922 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20923 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20924 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20925 %}
20926 ins_pipe( pipe_slow );
20927 %}
20928
20929 //----------------------------- CompressBits/ExpandBits ------------------------
20930
20931 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20932 predicate(n->bottom_type()->isa_int());
20933 match(Set dst (CompressBits src mask));
20934 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20935 ins_encode %{
20936 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20937 %}
20938 ins_pipe( pipe_slow );
20939 %}
20940
20941 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20942 predicate(n->bottom_type()->isa_int());
20943 match(Set dst (ExpandBits src mask));
20944 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20945 ins_encode %{
20946 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20947 %}
20948 ins_pipe( pipe_slow );
20949 %}
20950
20951 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20952 predicate(n->bottom_type()->isa_int());
20953 match(Set dst (CompressBits src (LoadI mask)));
20954 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20955 ins_encode %{
20956 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20957 %}
20958 ins_pipe( pipe_slow );
20959 %}
20960
20961 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20962 predicate(n->bottom_type()->isa_int());
20963 match(Set dst (ExpandBits src (LoadI mask)));
20964 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20965 ins_encode %{
20966 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20967 %}
20968 ins_pipe( pipe_slow );
20969 %}
20970
20971 // --------------------------------- Sqrt --------------------------------------
20972
20973 instruct vsqrtF_reg(vec dst, vec src) %{
20974 match(Set dst (SqrtVF src));
20975 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20976 ins_encode %{
20977 assert(UseAVX > 0, "required");
20978 int vlen_enc = vector_length_encoding(this);
20979 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20980 %}
20981 ins_pipe( pipe_slow );
20982 %}
20983
20984 instruct vsqrtF_mem(vec dst, memory mem) %{
20985 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20986 match(Set dst (SqrtVF (LoadVector mem)));
20987 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20988 ins_encode %{
20989 assert(UseAVX > 0, "required");
20990 int vlen_enc = vector_length_encoding(this);
20991 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20992 %}
20993 ins_pipe( pipe_slow );
20994 %}
20995
20996 // Floating point vector sqrt
20997 instruct vsqrtD_reg(vec dst, vec src) %{
20998 match(Set dst (SqrtVD src));
20999 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21000 ins_encode %{
21001 assert(UseAVX > 0, "required");
21002 int vlen_enc = vector_length_encoding(this);
21003 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21004 %}
21005 ins_pipe( pipe_slow );
21006 %}
21007
21008 instruct vsqrtD_mem(vec dst, memory mem) %{
21009 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21010 match(Set dst (SqrtVD (LoadVector mem)));
21011 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21012 ins_encode %{
21013 assert(UseAVX > 0, "required");
21014 int vlen_enc = vector_length_encoding(this);
21015 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21016 %}
21017 ins_pipe( pipe_slow );
21018 %}
21019
21020 // ------------------------------ Shift ---------------------------------------
21021
21022 // Left and right shift count vectors are the same on x86
21023 // (only lowest bits of xmm reg are used for count).
21024 instruct vshiftcnt(vec dst, rRegI cnt) %{
21025 match(Set dst (LShiftCntV cnt));
21026 match(Set dst (RShiftCntV cnt));
21027 format %{ "movdl $dst,$cnt\t! load shift count" %}
21028 ins_encode %{
21029 __ movdl($dst$$XMMRegister, $cnt$$Register);
21030 %}
21031 ins_pipe( pipe_slow );
21032 %}
21033
21034 // Byte vector shift
21035 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21036 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21037 match(Set dst ( LShiftVB src shift));
21038 match(Set dst ( RShiftVB src shift));
21039 match(Set dst (URShiftVB src shift));
21040 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21041 format %{"vector_byte_shift $dst,$src,$shift" %}
21042 ins_encode %{
21043 assert(UseSSE > 3, "required");
21044 int opcode = this->ideal_Opcode();
21045 bool sign = (opcode != Op_URShiftVB);
21046 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21047 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21048 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21049 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21050 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21051 %}
21052 ins_pipe( pipe_slow );
21053 %}
21054
21055 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21056 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21057 UseAVX <= 1);
21058 match(Set dst ( LShiftVB src shift));
21059 match(Set dst ( RShiftVB src shift));
21060 match(Set dst (URShiftVB src shift));
21061 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21062 format %{"vector_byte_shift $dst,$src,$shift" %}
21063 ins_encode %{
21064 assert(UseSSE > 3, "required");
21065 int opcode = this->ideal_Opcode();
21066 bool sign = (opcode != Op_URShiftVB);
21067 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21068 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21069 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21070 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21071 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21072 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21073 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21074 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21075 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21076 %}
21077 ins_pipe( pipe_slow );
21078 %}
21079
21080 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21081 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21082 UseAVX > 1);
21083 match(Set dst ( LShiftVB src shift));
21084 match(Set dst ( RShiftVB src shift));
21085 match(Set dst (URShiftVB src shift));
21086 effect(TEMP dst, TEMP tmp);
21087 format %{"vector_byte_shift $dst,$src,$shift" %}
21088 ins_encode %{
21089 int opcode = this->ideal_Opcode();
21090 bool sign = (opcode != Op_URShiftVB);
21091 int vlen_enc = Assembler::AVX_256bit;
21092 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21093 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21094 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21095 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21096 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21097 %}
21098 ins_pipe( pipe_slow );
21099 %}
21100
21101 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21102 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21103 match(Set dst ( LShiftVB src shift));
21104 match(Set dst ( RShiftVB src shift));
21105 match(Set dst (URShiftVB src shift));
21106 effect(TEMP dst, TEMP tmp);
21107 format %{"vector_byte_shift $dst,$src,$shift" %}
21108 ins_encode %{
21109 assert(UseAVX > 1, "required");
21110 int opcode = this->ideal_Opcode();
21111 bool sign = (opcode != Op_URShiftVB);
21112 int vlen_enc = Assembler::AVX_256bit;
21113 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21114 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21115 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21116 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21117 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21118 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21119 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21120 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21121 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21122 %}
21123 ins_pipe( pipe_slow );
21124 %}
21125
21126 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21127 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21128 match(Set dst ( LShiftVB src shift));
21129 match(Set dst (RShiftVB src shift));
21130 match(Set dst (URShiftVB src shift));
21131 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21132 format %{"vector_byte_shift $dst,$src,$shift" %}
21133 ins_encode %{
21134 assert(UseAVX > 2, "required");
21135 int opcode = this->ideal_Opcode();
21136 bool sign = (opcode != Op_URShiftVB);
21137 int vlen_enc = Assembler::AVX_512bit;
21138 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21139 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21140 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21141 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21142 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21143 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21144 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21145 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21146 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21147 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21148 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21149 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21150 %}
21151 ins_pipe( pipe_slow );
21152 %}
21153
21154 // Shorts vector logical right shift produces incorrect Java result
21155 // for negative data because java code convert short value into int with
21156 // sign extension before a shift. But char vectors are fine since chars are
21157 // unsigned values.
21158 // Shorts/Chars vector left shift
21159 instruct vshiftS(vec dst, vec src, vec shift) %{
21160 predicate(!n->as_ShiftV()->is_var_shift());
21161 match(Set dst ( LShiftVS src shift));
21162 match(Set dst ( RShiftVS src shift));
21163 match(Set dst (URShiftVS src shift));
21164 effect(TEMP dst, USE src, USE shift);
21165 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21166 ins_encode %{
21167 int opcode = this->ideal_Opcode();
21168 if (UseAVX > 0) {
21169 int vlen_enc = vector_length_encoding(this);
21170 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21171 } else {
21172 int vlen = Matcher::vector_length(this);
21173 if (vlen == 2) {
21174 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21175 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21176 } else if (vlen == 4) {
21177 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21178 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21179 } else {
21180 assert (vlen == 8, "sanity");
21181 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21182 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21183 }
21184 }
21185 %}
21186 ins_pipe( pipe_slow );
21187 %}
21188
21189 // Integers vector left shift
21190 instruct vshiftI(vec dst, vec src, vec shift) %{
21191 predicate(!n->as_ShiftV()->is_var_shift());
21192 match(Set dst ( LShiftVI src shift));
21193 match(Set dst ( RShiftVI src shift));
21194 match(Set dst (URShiftVI src shift));
21195 effect(TEMP dst, USE src, USE shift);
21196 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21197 ins_encode %{
21198 int opcode = this->ideal_Opcode();
21199 if (UseAVX > 0) {
21200 int vlen_enc = vector_length_encoding(this);
21201 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21202 } else {
21203 int vlen = Matcher::vector_length(this);
21204 if (vlen == 2) {
21205 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21206 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21207 } else {
21208 assert(vlen == 4, "sanity");
21209 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21210 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21211 }
21212 }
21213 %}
21214 ins_pipe( pipe_slow );
21215 %}
21216
21217 // Integers vector left constant shift
21218 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21219 match(Set dst (LShiftVI src (LShiftCntV shift)));
21220 match(Set dst (RShiftVI src (RShiftCntV shift)));
21221 match(Set dst (URShiftVI src (RShiftCntV shift)));
21222 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21223 ins_encode %{
21224 int opcode = this->ideal_Opcode();
21225 if (UseAVX > 0) {
21226 int vector_len = vector_length_encoding(this);
21227 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21228 } else {
21229 int vlen = Matcher::vector_length(this);
21230 if (vlen == 2) {
21231 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21232 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21233 } else {
21234 assert(vlen == 4, "sanity");
21235 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21236 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21237 }
21238 }
21239 %}
21240 ins_pipe( pipe_slow );
21241 %}
21242
21243 // Longs vector shift
21244 instruct vshiftL(vec dst, vec src, vec shift) %{
21245 predicate(!n->as_ShiftV()->is_var_shift());
21246 match(Set dst ( LShiftVL src shift));
21247 match(Set dst (URShiftVL src shift));
21248 effect(TEMP dst, USE src, USE shift);
21249 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21250 ins_encode %{
21251 int opcode = this->ideal_Opcode();
21252 if (UseAVX > 0) {
21253 int vlen_enc = vector_length_encoding(this);
21254 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21255 } else {
21256 assert(Matcher::vector_length(this) == 2, "");
21257 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21258 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21259 }
21260 %}
21261 ins_pipe( pipe_slow );
21262 %}
21263
21264 // Longs vector constant shift
21265 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21266 match(Set dst (LShiftVL src (LShiftCntV shift)));
21267 match(Set dst (URShiftVL src (RShiftCntV shift)));
21268 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21269 ins_encode %{
21270 int opcode = this->ideal_Opcode();
21271 if (UseAVX > 0) {
21272 int vector_len = vector_length_encoding(this);
21273 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21274 } else {
21275 assert(Matcher::vector_length(this) == 2, "");
21276 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21277 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21278 }
21279 %}
21280 ins_pipe( pipe_slow );
21281 %}
21282
21283 // -------------------ArithmeticRightShift -----------------------------------
21284 // Long vector arithmetic right shift
21285 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21286 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21287 match(Set dst (RShiftVL src shift));
21288 effect(TEMP dst, TEMP tmp);
21289 format %{ "vshiftq $dst,$src,$shift" %}
21290 ins_encode %{
21291 uint vlen = Matcher::vector_length(this);
21292 if (vlen == 2) {
21293 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21294 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21295 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21296 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21297 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21298 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21299 } else {
21300 assert(vlen == 4, "sanity");
21301 assert(UseAVX > 1, "required");
21302 int vlen_enc = Assembler::AVX_256bit;
21303 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21304 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21305 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21306 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21307 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21308 }
21309 %}
21310 ins_pipe( pipe_slow );
21311 %}
21312
21313 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21314 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21315 match(Set dst (RShiftVL src shift));
21316 format %{ "vshiftq $dst,$src,$shift" %}
21317 ins_encode %{
21318 int vlen_enc = vector_length_encoding(this);
21319 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21320 %}
21321 ins_pipe( pipe_slow );
21322 %}
21323
21324 // ------------------- Variable Shift -----------------------------
21325 // Byte variable shift
21326 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21327 predicate(Matcher::vector_length(n) <= 8 &&
21328 n->as_ShiftV()->is_var_shift() &&
21329 !VM_Version::supports_avx512bw());
21330 match(Set dst ( LShiftVB src shift));
21331 match(Set dst ( RShiftVB src shift));
21332 match(Set dst (URShiftVB src shift));
21333 effect(TEMP dst, TEMP vtmp);
21334 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21335 ins_encode %{
21336 assert(UseAVX >= 2, "required");
21337
21338 int opcode = this->ideal_Opcode();
21339 int vlen_enc = Assembler::AVX_128bit;
21340 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21341 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21342 %}
21343 ins_pipe( pipe_slow );
21344 %}
21345
21346 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21347 predicate(Matcher::vector_length(n) == 16 &&
21348 n->as_ShiftV()->is_var_shift() &&
21349 !VM_Version::supports_avx512bw());
21350 match(Set dst ( LShiftVB src shift));
21351 match(Set dst ( RShiftVB src shift));
21352 match(Set dst (URShiftVB src shift));
21353 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21354 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21355 ins_encode %{
21356 assert(UseAVX >= 2, "required");
21357
21358 int opcode = this->ideal_Opcode();
21359 int vlen_enc = Assembler::AVX_128bit;
21360 // Shift lower half and get word result in dst
21361 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21362
21363 // Shift upper half and get word result in vtmp1
21364 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21365 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21366 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21367
21368 // Merge and down convert the two word results to byte in dst
21369 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21370 %}
21371 ins_pipe( pipe_slow );
21372 %}
21373
21374 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21375 predicate(Matcher::vector_length(n) == 32 &&
21376 n->as_ShiftV()->is_var_shift() &&
21377 !VM_Version::supports_avx512bw());
21378 match(Set dst ( LShiftVB src shift));
21379 match(Set dst ( RShiftVB src shift));
21380 match(Set dst (URShiftVB src shift));
21381 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21382 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21383 ins_encode %{
21384 assert(UseAVX >= 2, "required");
21385
21386 int opcode = this->ideal_Opcode();
21387 int vlen_enc = Assembler::AVX_128bit;
21388 // Process lower 128 bits and get result in dst
21389 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21390 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21391 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21392 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21393 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21394
21395 // Process higher 128 bits and get result in vtmp3
21396 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21397 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21398 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21399 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21400 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21401 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21402 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21403
21404 // Merge the two results in dst
21405 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21406 %}
21407 ins_pipe( pipe_slow );
21408 %}
21409
21410 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21411 predicate(Matcher::vector_length(n) <= 32 &&
21412 n->as_ShiftV()->is_var_shift() &&
21413 VM_Version::supports_avx512bw());
21414 match(Set dst ( LShiftVB src shift));
21415 match(Set dst ( RShiftVB src shift));
21416 match(Set dst (URShiftVB src shift));
21417 effect(TEMP dst, TEMP vtmp);
21418 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21419 ins_encode %{
21420 assert(UseAVX > 2, "required");
21421
21422 int opcode = this->ideal_Opcode();
21423 int vlen_enc = vector_length_encoding(this);
21424 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21425 %}
21426 ins_pipe( pipe_slow );
21427 %}
21428
21429 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21430 predicate(Matcher::vector_length(n) == 64 &&
21431 n->as_ShiftV()->is_var_shift() &&
21432 VM_Version::supports_avx512bw());
21433 match(Set dst ( LShiftVB src shift));
21434 match(Set dst ( RShiftVB src shift));
21435 match(Set dst (URShiftVB src shift));
21436 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21437 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21438 ins_encode %{
21439 assert(UseAVX > 2, "required");
21440
21441 int opcode = this->ideal_Opcode();
21442 int vlen_enc = Assembler::AVX_256bit;
21443 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21444 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21445 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21446 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21447 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21448 %}
21449 ins_pipe( pipe_slow );
21450 %}
21451
21452 // Short variable shift
21453 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21454 predicate(Matcher::vector_length(n) <= 8 &&
21455 n->as_ShiftV()->is_var_shift() &&
21456 !VM_Version::supports_avx512bw());
21457 match(Set dst ( LShiftVS src shift));
21458 match(Set dst ( RShiftVS src shift));
21459 match(Set dst (URShiftVS src shift));
21460 effect(TEMP dst, TEMP vtmp);
21461 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21462 ins_encode %{
21463 assert(UseAVX >= 2, "required");
21464
21465 int opcode = this->ideal_Opcode();
21466 bool sign = (opcode != Op_URShiftVS);
21467 int vlen_enc = Assembler::AVX_256bit;
21468 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21469 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21470 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21471 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21472 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21473 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21474 %}
21475 ins_pipe( pipe_slow );
21476 %}
21477
21478 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21479 predicate(Matcher::vector_length(n) == 16 &&
21480 n->as_ShiftV()->is_var_shift() &&
21481 !VM_Version::supports_avx512bw());
21482 match(Set dst ( LShiftVS src shift));
21483 match(Set dst ( RShiftVS src shift));
21484 match(Set dst (URShiftVS src shift));
21485 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21486 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21487 ins_encode %{
21488 assert(UseAVX >= 2, "required");
21489
21490 int opcode = this->ideal_Opcode();
21491 bool sign = (opcode != Op_URShiftVS);
21492 int vlen_enc = Assembler::AVX_256bit;
21493 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21494 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21495 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21496 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21497 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21498
21499 // Shift upper half, with result in dst using vtmp1 as TEMP
21500 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21501 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21502 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21503 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21504 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21505 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21506
21507 // Merge lower and upper half result into dst
21508 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21509 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21510 %}
21511 ins_pipe( pipe_slow );
21512 %}
21513
21514 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21515 predicate(n->as_ShiftV()->is_var_shift() &&
21516 VM_Version::supports_avx512bw());
21517 match(Set dst ( LShiftVS src shift));
21518 match(Set dst ( RShiftVS src shift));
21519 match(Set dst (URShiftVS src shift));
21520 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21521 ins_encode %{
21522 assert(UseAVX > 2, "required");
21523
21524 int opcode = this->ideal_Opcode();
21525 int vlen_enc = vector_length_encoding(this);
21526 if (!VM_Version::supports_avx512vl()) {
21527 vlen_enc = Assembler::AVX_512bit;
21528 }
21529 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21530 %}
21531 ins_pipe( pipe_slow );
21532 %}
21533
21534 //Integer variable shift
21535 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21536 predicate(n->as_ShiftV()->is_var_shift());
21537 match(Set dst ( LShiftVI src shift));
21538 match(Set dst ( RShiftVI src shift));
21539 match(Set dst (URShiftVI src shift));
21540 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21541 ins_encode %{
21542 assert(UseAVX >= 2, "required");
21543
21544 int opcode = this->ideal_Opcode();
21545 int vlen_enc = vector_length_encoding(this);
21546 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21547 %}
21548 ins_pipe( pipe_slow );
21549 %}
21550
21551 //Long variable shift
21552 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21553 predicate(n->as_ShiftV()->is_var_shift());
21554 match(Set dst ( LShiftVL src shift));
21555 match(Set dst (URShiftVL src shift));
21556 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21557 ins_encode %{
21558 assert(UseAVX >= 2, "required");
21559
21560 int opcode = this->ideal_Opcode();
21561 int vlen_enc = vector_length_encoding(this);
21562 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21563 %}
21564 ins_pipe( pipe_slow );
21565 %}
21566
21567 //Long variable right shift arithmetic
21568 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21569 predicate(Matcher::vector_length(n) <= 4 &&
21570 n->as_ShiftV()->is_var_shift() &&
21571 UseAVX == 2);
21572 match(Set dst (RShiftVL src shift));
21573 effect(TEMP dst, TEMP vtmp);
21574 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21575 ins_encode %{
21576 int opcode = this->ideal_Opcode();
21577 int vlen_enc = vector_length_encoding(this);
21578 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21579 $vtmp$$XMMRegister);
21580 %}
21581 ins_pipe( pipe_slow );
21582 %}
21583
21584 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21585 predicate(n->as_ShiftV()->is_var_shift() &&
21586 UseAVX > 2);
21587 match(Set dst (RShiftVL src shift));
21588 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21589 ins_encode %{
21590 int opcode = this->ideal_Opcode();
21591 int vlen_enc = vector_length_encoding(this);
21592 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21593 %}
21594 ins_pipe( pipe_slow );
21595 %}
21596
21597 // --------------------------------- AND --------------------------------------
21598
21599 instruct vand(vec dst, vec src) %{
21600 predicate(UseAVX == 0);
21601 match(Set dst (AndV dst src));
21602 format %{ "pand $dst,$src\t! and vectors" %}
21603 ins_encode %{
21604 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21605 %}
21606 ins_pipe( pipe_slow );
21607 %}
21608
21609 instruct vand_reg(vec dst, vec src1, vec src2) %{
21610 predicate(UseAVX > 0);
21611 match(Set dst (AndV src1 src2));
21612 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21613 ins_encode %{
21614 int vlen_enc = vector_length_encoding(this);
21615 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21616 %}
21617 ins_pipe( pipe_slow );
21618 %}
21619
21620 instruct vand_mem(vec dst, vec src, memory mem) %{
21621 predicate((UseAVX > 0) &&
21622 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21623 match(Set dst (AndV src (LoadVector mem)));
21624 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21625 ins_encode %{
21626 int vlen_enc = vector_length_encoding(this);
21627 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21628 %}
21629 ins_pipe( pipe_slow );
21630 %}
21631
21632 // --------------------------------- OR ---------------------------------------
21633
21634 instruct vor(vec dst, vec src) %{
21635 predicate(UseAVX == 0);
21636 match(Set dst (OrV dst src));
21637 format %{ "por $dst,$src\t! or vectors" %}
21638 ins_encode %{
21639 __ por($dst$$XMMRegister, $src$$XMMRegister);
21640 %}
21641 ins_pipe( pipe_slow );
21642 %}
21643
21644 instruct vor_reg(vec dst, vec src1, vec src2) %{
21645 predicate(UseAVX > 0);
21646 match(Set dst (OrV src1 src2));
21647 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21648 ins_encode %{
21649 int vlen_enc = vector_length_encoding(this);
21650 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21651 %}
21652 ins_pipe( pipe_slow );
21653 %}
21654
21655 instruct vor_mem(vec dst, vec src, memory mem) %{
21656 predicate((UseAVX > 0) &&
21657 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21658 match(Set dst (OrV src (LoadVector mem)));
21659 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21660 ins_encode %{
21661 int vlen_enc = vector_length_encoding(this);
21662 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21663 %}
21664 ins_pipe( pipe_slow );
21665 %}
21666
21667 // --------------------------------- XOR --------------------------------------
21668
21669 instruct vxor(vec dst, vec src) %{
21670 predicate(UseAVX == 0);
21671 match(Set dst (XorV dst src));
21672 format %{ "pxor $dst,$src\t! xor vectors" %}
21673 ins_encode %{
21674 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21675 %}
21676 ins_pipe( pipe_slow );
21677 %}
21678
21679 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21680 predicate(UseAVX > 0);
21681 match(Set dst (XorV src1 src2));
21682 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21683 ins_encode %{
21684 int vlen_enc = vector_length_encoding(this);
21685 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21686 %}
21687 ins_pipe( pipe_slow );
21688 %}
21689
21690 instruct vxor_mem(vec dst, vec src, memory mem) %{
21691 predicate((UseAVX > 0) &&
21692 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21693 match(Set dst (XorV src (LoadVector mem)));
21694 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21695 ins_encode %{
21696 int vlen_enc = vector_length_encoding(this);
21697 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21698 %}
21699 ins_pipe( pipe_slow );
21700 %}
21701
21702 // --------------------------------- VectorCast --------------------------------------
21703
21704 instruct vcastBtoX(vec dst, vec src) %{
21705 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21706 match(Set dst (VectorCastB2X src));
21707 format %{ "vector_cast_b2x $dst,$src\t!" %}
21708 ins_encode %{
21709 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21710 int vlen_enc = vector_length_encoding(this);
21711 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21712 %}
21713 ins_pipe( pipe_slow );
21714 %}
21715
21716 instruct vcastBtoD(legVec dst, legVec src) %{
21717 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21718 match(Set dst (VectorCastB2X src));
21719 format %{ "vector_cast_b2x $dst,$src\t!" %}
21720 ins_encode %{
21721 int vlen_enc = vector_length_encoding(this);
21722 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 instruct castStoX(vec dst, vec src) %{
21728 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21729 Matcher::vector_length(n->in(1)) <= 8 && // src
21730 Matcher::vector_element_basic_type(n) == T_BYTE);
21731 match(Set dst (VectorCastS2X src));
21732 format %{ "vector_cast_s2x $dst,$src" %}
21733 ins_encode %{
21734 assert(UseAVX > 0, "required");
21735
21736 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21737 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21738 %}
21739 ins_pipe( pipe_slow );
21740 %}
21741
21742 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21743 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21744 Matcher::vector_length(n->in(1)) == 16 && // src
21745 Matcher::vector_element_basic_type(n) == T_BYTE);
21746 effect(TEMP dst, TEMP vtmp);
21747 match(Set dst (VectorCastS2X src));
21748 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21749 ins_encode %{
21750 assert(UseAVX > 0, "required");
21751
21752 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21753 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21754 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21755 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21756 %}
21757 ins_pipe( pipe_slow );
21758 %}
21759
21760 instruct vcastStoX_evex(vec dst, vec src) %{
21761 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21762 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21763 match(Set dst (VectorCastS2X src));
21764 format %{ "vector_cast_s2x $dst,$src\t!" %}
21765 ins_encode %{
21766 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21767 int src_vlen_enc = vector_length_encoding(this, $src);
21768 int vlen_enc = vector_length_encoding(this);
21769 switch (to_elem_bt) {
21770 case T_BYTE:
21771 if (!VM_Version::supports_avx512vl()) {
21772 vlen_enc = Assembler::AVX_512bit;
21773 }
21774 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21775 break;
21776 case T_INT:
21777 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21778 break;
21779 case T_FLOAT:
21780 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21781 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21782 break;
21783 case T_LONG:
21784 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21785 break;
21786 case T_DOUBLE: {
21787 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21788 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21789 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21790 break;
21791 }
21792 default:
21793 ShouldNotReachHere();
21794 }
21795 %}
21796 ins_pipe( pipe_slow );
21797 %}
21798
21799 instruct castItoX(vec dst, vec src) %{
21800 predicate(UseAVX <= 2 &&
21801 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21802 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21803 match(Set dst (VectorCastI2X src));
21804 format %{ "vector_cast_i2x $dst,$src" %}
21805 ins_encode %{
21806 assert(UseAVX > 0, "required");
21807
21808 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21809 int vlen_enc = vector_length_encoding(this, $src);
21810
21811 if (to_elem_bt == T_BYTE) {
21812 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21813 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21814 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21815 } else {
21816 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21817 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21818 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21819 }
21820 %}
21821 ins_pipe( pipe_slow );
21822 %}
21823
21824 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21825 predicate(UseAVX <= 2 &&
21826 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21827 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21828 match(Set dst (VectorCastI2X src));
21829 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21830 effect(TEMP dst, TEMP vtmp);
21831 ins_encode %{
21832 assert(UseAVX > 0, "required");
21833
21834 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21835 int vlen_enc = vector_length_encoding(this, $src);
21836
21837 if (to_elem_bt == T_BYTE) {
21838 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21839 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21840 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21841 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21842 } else {
21843 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21844 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21845 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21846 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21847 }
21848 %}
21849 ins_pipe( pipe_slow );
21850 %}
21851
21852 instruct vcastItoX_evex(vec dst, vec src) %{
21853 predicate(UseAVX > 2 ||
21854 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21855 match(Set dst (VectorCastI2X src));
21856 format %{ "vector_cast_i2x $dst,$src\t!" %}
21857 ins_encode %{
21858 assert(UseAVX > 0, "required");
21859
21860 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21861 int src_vlen_enc = vector_length_encoding(this, $src);
21862 int dst_vlen_enc = vector_length_encoding(this);
21863 switch (dst_elem_bt) {
21864 case T_BYTE:
21865 if (!VM_Version::supports_avx512vl()) {
21866 src_vlen_enc = Assembler::AVX_512bit;
21867 }
21868 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21869 break;
21870 case T_SHORT:
21871 if (!VM_Version::supports_avx512vl()) {
21872 src_vlen_enc = Assembler::AVX_512bit;
21873 }
21874 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21875 break;
21876 case T_FLOAT:
21877 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21878 break;
21879 case T_LONG:
21880 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21881 break;
21882 case T_DOUBLE:
21883 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21884 break;
21885 default:
21886 ShouldNotReachHere();
21887 }
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 instruct vcastLtoBS(vec dst, vec src) %{
21893 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21894 UseAVX <= 2);
21895 match(Set dst (VectorCastL2X src));
21896 format %{ "vector_cast_l2x $dst,$src" %}
21897 ins_encode %{
21898 assert(UseAVX > 0, "required");
21899
21900 int vlen = Matcher::vector_length_in_bytes(this, $src);
21901 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21902 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21903 : ExternalAddress(vector_int_to_short_mask());
21904 if (vlen <= 16) {
21905 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21906 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21907 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21908 } else {
21909 assert(vlen <= 32, "required");
21910 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21911 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21912 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21913 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21914 }
21915 if (to_elem_bt == T_BYTE) {
21916 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21917 }
21918 %}
21919 ins_pipe( pipe_slow );
21920 %}
21921
21922 instruct vcastLtoX_evex(vec dst, vec src) %{
21923 predicate(UseAVX > 2 ||
21924 (Matcher::vector_element_basic_type(n) == T_INT ||
21925 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21926 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21927 match(Set dst (VectorCastL2X src));
21928 format %{ "vector_cast_l2x $dst,$src\t!" %}
21929 ins_encode %{
21930 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21931 int vlen = Matcher::vector_length_in_bytes(this, $src);
21932 int vlen_enc = vector_length_encoding(this, $src);
21933 switch (to_elem_bt) {
21934 case T_BYTE:
21935 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21936 vlen_enc = Assembler::AVX_512bit;
21937 }
21938 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21939 break;
21940 case T_SHORT:
21941 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21942 vlen_enc = Assembler::AVX_512bit;
21943 }
21944 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21945 break;
21946 case T_INT:
21947 if (vlen == 8) {
21948 if ($dst$$XMMRegister != $src$$XMMRegister) {
21949 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21950 }
21951 } else if (vlen == 16) {
21952 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21953 } else if (vlen == 32) {
21954 if (UseAVX > 2) {
21955 if (!VM_Version::supports_avx512vl()) {
21956 vlen_enc = Assembler::AVX_512bit;
21957 }
21958 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21959 } else {
21960 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21961 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21962 }
21963 } else { // vlen == 64
21964 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21965 }
21966 break;
21967 case T_FLOAT:
21968 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21969 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21970 break;
21971 case T_DOUBLE:
21972 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21973 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21974 break;
21975
21976 default: assert(false, "%s", type2name(to_elem_bt));
21977 }
21978 %}
21979 ins_pipe( pipe_slow );
21980 %}
21981
21982 instruct vcastFtoD_reg(vec dst, vec src) %{
21983 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21984 match(Set dst (VectorCastF2X src));
21985 format %{ "vector_cast_f2d $dst,$src\t!" %}
21986 ins_encode %{
21987 int vlen_enc = vector_length_encoding(this);
21988 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21989 %}
21990 ins_pipe( pipe_slow );
21991 %}
21992
21993
21994 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21995 predicate(!VM_Version::supports_avx10_2() &&
21996 !VM_Version::supports_avx512vl() &&
21997 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21998 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21999 is_integral_type(Matcher::vector_element_basic_type(n)));
22000 match(Set dst (VectorCastF2X src));
22001 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22002 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22003 ins_encode %{
22004 int vlen_enc = vector_length_encoding(this, $src);
22005 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22006 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22007 // 32 bit addresses for register indirect addressing mode since stub constants
22008 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22009 // However, targets are free to increase this limit, but having a large code cache size
22010 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22011 // cap we save a temporary register allocation which in limiting case can prevent
22012 // spilling in high register pressure blocks.
22013 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22014 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22015 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22016 %}
22017 ins_pipe( pipe_slow );
22018 %}
22019
22020 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22021 predicate(!VM_Version::supports_avx10_2() &&
22022 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22023 is_integral_type(Matcher::vector_element_basic_type(n)));
22024 match(Set dst (VectorCastF2X src));
22025 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22026 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22027 ins_encode %{
22028 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22029 if (to_elem_bt == T_LONG) {
22030 int vlen_enc = vector_length_encoding(this);
22031 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22032 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22033 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22034 } else {
22035 int vlen_enc = vector_length_encoding(this, $src);
22036 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22037 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22038 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22039 }
22040 %}
22041 ins_pipe( pipe_slow );
22042 %}
22043
22044 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22045 predicate(VM_Version::supports_avx10_2() &&
22046 is_integral_type(Matcher::vector_element_basic_type(n)));
22047 match(Set dst (VectorCastF2X src));
22048 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22049 ins_encode %{
22050 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22051 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22052 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22053 %}
22054 ins_pipe( pipe_slow );
22055 %}
22056
22057 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22058 predicate(VM_Version::supports_avx10_2() &&
22059 is_integral_type(Matcher::vector_element_basic_type(n)));
22060 match(Set dst (VectorCastF2X (LoadVector src)));
22061 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22062 ins_encode %{
22063 int vlen = Matcher::vector_length(this);
22064 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22065 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22066 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22067 %}
22068 ins_pipe( pipe_slow );
22069 %}
22070
22071 instruct vcastDtoF_reg(vec dst, vec src) %{
22072 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22073 match(Set dst (VectorCastD2X src));
22074 format %{ "vector_cast_d2x $dst,$src\t!" %}
22075 ins_encode %{
22076 int vlen_enc = vector_length_encoding(this, $src);
22077 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22078 %}
22079 ins_pipe( pipe_slow );
22080 %}
22081
22082 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22083 predicate(!VM_Version::supports_avx10_2() &&
22084 !VM_Version::supports_avx512vl() &&
22085 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22086 is_integral_type(Matcher::vector_element_basic_type(n)));
22087 match(Set dst (VectorCastD2X src));
22088 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22089 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22090 ins_encode %{
22091 int vlen_enc = vector_length_encoding(this, $src);
22092 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22093 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22094 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22095 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22096 %}
22097 ins_pipe( pipe_slow );
22098 %}
22099
22100 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22101 predicate(!VM_Version::supports_avx10_2() &&
22102 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22103 is_integral_type(Matcher::vector_element_basic_type(n)));
22104 match(Set dst (VectorCastD2X src));
22105 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22106 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22107 ins_encode %{
22108 int vlen_enc = vector_length_encoding(this, $src);
22109 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22110 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22111 ExternalAddress(vector_float_signflip());
22112 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22113 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22114 %}
22115 ins_pipe( pipe_slow );
22116 %}
22117
22118 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22119 predicate(VM_Version::supports_avx10_2() &&
22120 is_integral_type(Matcher::vector_element_basic_type(n)));
22121 match(Set dst (VectorCastD2X src));
22122 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22123 ins_encode %{
22124 int vlen_enc = vector_length_encoding(this, $src);
22125 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22126 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22127 %}
22128 ins_pipe( pipe_slow );
22129 %}
22130
22131 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22132 predicate(VM_Version::supports_avx10_2() &&
22133 is_integral_type(Matcher::vector_element_basic_type(n)));
22134 match(Set dst (VectorCastD2X (LoadVector src)));
22135 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22136 ins_encode %{
22137 int vlen = Matcher::vector_length(this);
22138 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22139 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22140 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22141 %}
22142 ins_pipe( pipe_slow );
22143 %}
22144
22145 instruct vucast(vec dst, vec src) %{
22146 match(Set dst (VectorUCastB2X src));
22147 match(Set dst (VectorUCastS2X src));
22148 match(Set dst (VectorUCastI2X src));
22149 format %{ "vector_ucast $dst,$src\t!" %}
22150 ins_encode %{
22151 assert(UseAVX > 0, "required");
22152
22153 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22154 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22155 int vlen_enc = vector_length_encoding(this);
22156 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22157 %}
22158 ins_pipe( pipe_slow );
22159 %}
22160
22161 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22162 predicate(!VM_Version::supports_avx512vl() &&
22163 Matcher::vector_length_in_bytes(n) < 64 &&
22164 Matcher::vector_element_basic_type(n) == T_INT);
22165 match(Set dst (RoundVF src));
22166 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22167 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22168 ins_encode %{
22169 int vlen_enc = vector_length_encoding(this);
22170 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22171 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22172 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22173 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22174 %}
22175 ins_pipe( pipe_slow );
22176 %}
22177
22178 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22179 predicate((VM_Version::supports_avx512vl() ||
22180 Matcher::vector_length_in_bytes(n) == 64) &&
22181 Matcher::vector_element_basic_type(n) == T_INT);
22182 match(Set dst (RoundVF src));
22183 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22184 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22185 ins_encode %{
22186 int vlen_enc = vector_length_encoding(this);
22187 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22188 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22189 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22190 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22191 %}
22192 ins_pipe( pipe_slow );
22193 %}
22194
22195 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22196 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22197 match(Set dst (RoundVD src));
22198 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22199 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22200 ins_encode %{
22201 int vlen_enc = vector_length_encoding(this);
22202 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22203 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22204 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22205 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22206 %}
22207 ins_pipe( pipe_slow );
22208 %}
22209
22210 // --------------------------------- VectorMaskCmp --------------------------------------
22211
22212 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22213 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22214 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22215 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22216 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22217 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22218 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22219 ins_encode %{
22220 int vlen_enc = vector_length_encoding(this, $src1);
22221 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22222 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22223 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22224 } else {
22225 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22226 }
22227 %}
22228 ins_pipe( pipe_slow );
22229 %}
22230
22231 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22232 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22233 n->bottom_type()->isa_vectmask() == nullptr &&
22234 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22235 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22236 effect(TEMP ktmp);
22237 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22238 ins_encode %{
22239 int vlen_enc = Assembler::AVX_512bit;
22240 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22241 KRegister mask = k0; // The comparison itself is not being masked.
22242 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22243 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22244 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22245 } else {
22246 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22247 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22248 }
22249 %}
22250 ins_pipe( pipe_slow );
22251 %}
22252
22253 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22254 predicate(n->bottom_type()->isa_vectmask() &&
22255 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22256 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22257 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22258 ins_encode %{
22259 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22260 int vlen_enc = vector_length_encoding(this, $src1);
22261 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22262 KRegister mask = k0; // The comparison itself is not being masked.
22263 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22264 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22265 } else {
22266 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22267 }
22268 %}
22269 ins_pipe( pipe_slow );
22270 %}
22271
22272 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22273 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22274 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22275 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22276 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22277 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22278 (n->in(2)->get_int() == BoolTest::eq ||
22279 n->in(2)->get_int() == BoolTest::lt ||
22280 n->in(2)->get_int() == BoolTest::gt)); // cond
22281 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22282 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22283 ins_encode %{
22284 int vlen_enc = vector_length_encoding(this, $src1);
22285 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22286 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22287 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22288 %}
22289 ins_pipe( pipe_slow );
22290 %}
22291
22292 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22293 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22294 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22295 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22296 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22297 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22298 (n->in(2)->get_int() == BoolTest::ne ||
22299 n->in(2)->get_int() == BoolTest::le ||
22300 n->in(2)->get_int() == BoolTest::ge)); // cond
22301 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22302 effect(TEMP dst, TEMP xtmp);
22303 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22304 ins_encode %{
22305 int vlen_enc = vector_length_encoding(this, $src1);
22306 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22307 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22308 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22309 %}
22310 ins_pipe( pipe_slow );
22311 %}
22312
22313 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22314 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22315 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22316 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22317 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22318 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22319 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22320 effect(TEMP dst, TEMP xtmp);
22321 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22322 ins_encode %{
22323 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22324 int vlen_enc = vector_length_encoding(this, $src1);
22325 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22326 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22327
22328 if (vlen_enc == Assembler::AVX_128bit) {
22329 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22330 } else {
22331 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22332 }
22333 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22334 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22335 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22336 %}
22337 ins_pipe( pipe_slow );
22338 %}
22339
22340 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22341 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22342 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22343 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22344 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22345 effect(TEMP ktmp);
22346 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22347 ins_encode %{
22348 assert(UseAVX > 2, "required");
22349
22350 int vlen_enc = vector_length_encoding(this, $src1);
22351 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22352 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22353 KRegister mask = k0; // The comparison itself is not being masked.
22354 bool merge = false;
22355 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22356
22357 switch (src1_elem_bt) {
22358 case T_INT: {
22359 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22360 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22361 break;
22362 }
22363 case T_LONG: {
22364 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22365 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22366 break;
22367 }
22368 default: assert(false, "%s", type2name(src1_elem_bt));
22369 }
22370 %}
22371 ins_pipe( pipe_slow );
22372 %}
22373
22374
22375 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22376 predicate(n->bottom_type()->isa_vectmask() &&
22377 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22378 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22379 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22380 ins_encode %{
22381 assert(UseAVX > 2, "required");
22382 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22383
22384 int vlen_enc = vector_length_encoding(this, $src1);
22385 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22386 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22387 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22388
22389 // Comparison i
22390 switch (src1_elem_bt) {
22391 case T_BYTE: {
22392 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22393 break;
22394 }
22395 case T_SHORT: {
22396 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22397 break;
22398 }
22399 case T_INT: {
22400 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22401 break;
22402 }
22403 case T_LONG: {
22404 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22405 break;
22406 }
22407 default: assert(false, "%s", type2name(src1_elem_bt));
22408 }
22409 %}
22410 ins_pipe( pipe_slow );
22411 %}
22412
22413 // Extract
22414
22415 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22416 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22417 match(Set dst (ExtractI src idx));
22418 match(Set dst (ExtractS src idx));
22419 match(Set dst (ExtractB src idx));
22420 format %{ "extractI $dst,$src,$idx\t!" %}
22421 ins_encode %{
22422 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22423
22424 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22425 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22426 %}
22427 ins_pipe( pipe_slow );
22428 %}
22429
22430 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22431 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22432 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22433 match(Set dst (ExtractI src idx));
22434 match(Set dst (ExtractS src idx));
22435 match(Set dst (ExtractB src idx));
22436 effect(TEMP vtmp);
22437 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22438 ins_encode %{
22439 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22440
22441 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22442 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22443 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22444 %}
22445 ins_pipe( pipe_slow );
22446 %}
22447
22448 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22449 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22450 match(Set dst (ExtractL src idx));
22451 format %{ "extractL $dst,$src,$idx\t!" %}
22452 ins_encode %{
22453 assert(UseSSE >= 4, "required");
22454 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22455
22456 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22457 %}
22458 ins_pipe( pipe_slow );
22459 %}
22460
22461 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22462 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22463 Matcher::vector_length(n->in(1)) == 8); // src
22464 match(Set dst (ExtractL src idx));
22465 effect(TEMP vtmp);
22466 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22467 ins_encode %{
22468 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22469
22470 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22471 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22472 %}
22473 ins_pipe( pipe_slow );
22474 %}
22475
22476 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22477 predicate(Matcher::vector_length(n->in(1)) <= 4);
22478 match(Set dst (ExtractF src idx));
22479 effect(TEMP dst, TEMP vtmp);
22480 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22481 ins_encode %{
22482 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22483
22484 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22485 %}
22486 ins_pipe( pipe_slow );
22487 %}
22488
22489 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22490 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22491 Matcher::vector_length(n->in(1)/*src*/) == 16);
22492 match(Set dst (ExtractF src idx));
22493 effect(TEMP vtmp);
22494 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22495 ins_encode %{
22496 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22497
22498 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22499 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22500 %}
22501 ins_pipe( pipe_slow );
22502 %}
22503
22504 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22505 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22506 match(Set dst (ExtractD src idx));
22507 format %{ "extractD $dst,$src,$idx\t!" %}
22508 ins_encode %{
22509 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22510
22511 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22512 %}
22513 ins_pipe( pipe_slow );
22514 %}
22515
22516 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22517 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22518 Matcher::vector_length(n->in(1)) == 8); // src
22519 match(Set dst (ExtractD src idx));
22520 effect(TEMP vtmp);
22521 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22522 ins_encode %{
22523 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22524
22525 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22526 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22527 %}
22528 ins_pipe( pipe_slow );
22529 %}
22530
22531 // --------------------------------- Vector Blend --------------------------------------
22532
22533 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22534 predicate(UseAVX == 0);
22535 match(Set dst (VectorBlend (Binary dst src) mask));
22536 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22537 effect(TEMP tmp);
22538 ins_encode %{
22539 assert(UseSSE >= 4, "required");
22540
22541 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22542 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22543 }
22544 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22545 %}
22546 ins_pipe( pipe_slow );
22547 %}
22548
22549 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22550 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22551 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22552 Matcher::vector_length_in_bytes(n) <= 32 &&
22553 is_integral_type(Matcher::vector_element_basic_type(n)));
22554 match(Set dst (VectorBlend (Binary src1 src2) mask));
22555 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22556 ins_encode %{
22557 int vlen_enc = vector_length_encoding(this);
22558 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22559 %}
22560 ins_pipe( pipe_slow );
22561 %}
22562
22563 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22564 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22565 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22566 Matcher::vector_length_in_bytes(n) <= 32 &&
22567 !is_integral_type(Matcher::vector_element_basic_type(n)));
22568 match(Set dst (VectorBlend (Binary src1 src2) mask));
22569 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22570 ins_encode %{
22571 int vlen_enc = vector_length_encoding(this);
22572 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22573 %}
22574 ins_pipe( pipe_slow );
22575 %}
22576
22577 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22578 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22579 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22580 Matcher::vector_length_in_bytes(n) <= 32);
22581 match(Set dst (VectorBlend (Binary src1 src2) mask));
22582 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22583 effect(TEMP vtmp, TEMP dst);
22584 ins_encode %{
22585 int vlen_enc = vector_length_encoding(this);
22586 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22587 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22588 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22589 %}
22590 ins_pipe( pipe_slow );
22591 %}
22592
22593 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22594 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22595 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22596 match(Set dst (VectorBlend (Binary src1 src2) mask));
22597 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22598 effect(TEMP ktmp);
22599 ins_encode %{
22600 int vlen_enc = Assembler::AVX_512bit;
22601 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22602 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22603 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22604 %}
22605 ins_pipe( pipe_slow );
22606 %}
22607
22608
22609 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22610 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22611 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22612 VM_Version::supports_avx512bw()));
22613 match(Set dst (VectorBlend (Binary src1 src2) mask));
22614 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22615 ins_encode %{
22616 int vlen_enc = vector_length_encoding(this);
22617 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22618 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22619 %}
22620 ins_pipe( pipe_slow );
22621 %}
22622
22623 // --------------------------------- ABS --------------------------------------
22624 // a = |a|
22625 instruct vabsB_reg(vec dst, vec src) %{
22626 match(Set dst (AbsVB src));
22627 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22628 ins_encode %{
22629 uint vlen = Matcher::vector_length(this);
22630 if (vlen <= 16) {
22631 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22632 } else {
22633 int vlen_enc = vector_length_encoding(this);
22634 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22635 }
22636 %}
22637 ins_pipe( pipe_slow );
22638 %}
22639
22640 instruct vabsS_reg(vec dst, vec src) %{
22641 match(Set dst (AbsVS src));
22642 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22643 ins_encode %{
22644 uint vlen = Matcher::vector_length(this);
22645 if (vlen <= 8) {
22646 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22647 } else {
22648 int vlen_enc = vector_length_encoding(this);
22649 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22650 }
22651 %}
22652 ins_pipe( pipe_slow );
22653 %}
22654
22655 instruct vabsI_reg(vec dst, vec src) %{
22656 match(Set dst (AbsVI src));
22657 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22658 ins_encode %{
22659 uint vlen = Matcher::vector_length(this);
22660 if (vlen <= 4) {
22661 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22662 } else {
22663 int vlen_enc = vector_length_encoding(this);
22664 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22665 }
22666 %}
22667 ins_pipe( pipe_slow );
22668 %}
22669
22670 instruct vabsL_reg(vec dst, vec src) %{
22671 match(Set dst (AbsVL src));
22672 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22673 ins_encode %{
22674 assert(UseAVX > 2, "required");
22675 int vlen_enc = vector_length_encoding(this);
22676 if (!VM_Version::supports_avx512vl()) {
22677 vlen_enc = Assembler::AVX_512bit;
22678 }
22679 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22680 %}
22681 ins_pipe( pipe_slow );
22682 %}
22683
22684 // --------------------------------- ABSNEG --------------------------------------
22685
22686 instruct vabsnegF(vec dst, vec src) %{
22687 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22688 match(Set dst (AbsVF src));
22689 match(Set dst (NegVF src));
22690 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22691 ins_cost(150);
22692 ins_encode %{
22693 int opcode = this->ideal_Opcode();
22694 int vlen = Matcher::vector_length(this);
22695 if (vlen == 2) {
22696 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22697 } else {
22698 assert(vlen == 8 || vlen == 16, "required");
22699 int vlen_enc = vector_length_encoding(this);
22700 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22701 }
22702 %}
22703 ins_pipe( pipe_slow );
22704 %}
22705
22706 instruct vabsneg4F(vec dst) %{
22707 predicate(Matcher::vector_length(n) == 4);
22708 match(Set dst (AbsVF dst));
22709 match(Set dst (NegVF dst));
22710 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22711 ins_cost(150);
22712 ins_encode %{
22713 int opcode = this->ideal_Opcode();
22714 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22715 %}
22716 ins_pipe( pipe_slow );
22717 %}
22718
22719 instruct vabsnegD(vec dst, vec src) %{
22720 match(Set dst (AbsVD src));
22721 match(Set dst (NegVD src));
22722 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22723 ins_encode %{
22724 int opcode = this->ideal_Opcode();
22725 uint vlen = Matcher::vector_length(this);
22726 if (vlen == 2) {
22727 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22728 } else {
22729 int vlen_enc = vector_length_encoding(this);
22730 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22731 }
22732 %}
22733 ins_pipe( pipe_slow );
22734 %}
22735
22736 //------------------------------------- VectorTest --------------------------------------------
22737
22738 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22739 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22740 match(Set cr (VectorTest src1 src2));
22741 effect(TEMP vtmp);
22742 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22743 ins_encode %{
22744 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22745 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22746 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22747 %}
22748 ins_pipe( pipe_slow );
22749 %}
22750
22751 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22752 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22753 match(Set cr (VectorTest src1 src2));
22754 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22755 ins_encode %{
22756 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22757 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22758 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22759 %}
22760 ins_pipe( pipe_slow );
22761 %}
22762
22763 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22764 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22765 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22766 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22767 match(Set cr (VectorTest src1 src2));
22768 effect(TEMP tmp);
22769 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22770 ins_encode %{
22771 uint masklen = Matcher::vector_length(this, $src1);
22772 __ kmovwl($tmp$$Register, $src1$$KRegister);
22773 __ andl($tmp$$Register, (1 << masklen) - 1);
22774 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22775 %}
22776 ins_pipe( pipe_slow );
22777 %}
22778
22779 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22780 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22781 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22782 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22783 match(Set cr (VectorTest src1 src2));
22784 effect(TEMP tmp);
22785 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22786 ins_encode %{
22787 uint masklen = Matcher::vector_length(this, $src1);
22788 __ kmovwl($tmp$$Register, $src1$$KRegister);
22789 __ andl($tmp$$Register, (1 << masklen) - 1);
22790 %}
22791 ins_pipe( pipe_slow );
22792 %}
22793
22794 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22795 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22796 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22797 match(Set cr (VectorTest src1 src2));
22798 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22799 ins_encode %{
22800 uint masklen = Matcher::vector_length(this, $src1);
22801 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22802 %}
22803 ins_pipe( pipe_slow );
22804 %}
22805
22806 //------------------------------------- LoadMask --------------------------------------------
22807
22808 instruct loadMask(legVec dst, legVec src) %{
22809 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22810 match(Set dst (VectorLoadMask src));
22811 effect(TEMP dst);
22812 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22813 ins_encode %{
22814 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22815 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22816 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22817 %}
22818 ins_pipe( pipe_slow );
22819 %}
22820
22821 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22822 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22823 match(Set dst (VectorLoadMask src));
22824 effect(TEMP xtmp);
22825 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22826 ins_encode %{
22827 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22828 true, Assembler::AVX_512bit);
22829 %}
22830 ins_pipe( pipe_slow );
22831 %}
22832
22833 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22834 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22835 match(Set dst (VectorLoadMask src));
22836 effect(TEMP xtmp);
22837 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22838 ins_encode %{
22839 int vlen_enc = vector_length_encoding(in(1));
22840 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22841 false, vlen_enc);
22842 %}
22843 ins_pipe( pipe_slow );
22844 %}
22845
22846 //------------------------------------- StoreMask --------------------------------------------
22847
22848 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22849 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22850 match(Set dst (VectorStoreMask src size));
22851 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22852 ins_encode %{
22853 int vlen = Matcher::vector_length(this);
22854 if (vlen <= 16 && UseAVX <= 2) {
22855 assert(UseSSE >= 3, "required");
22856 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22857 } else {
22858 assert(UseAVX > 0, "required");
22859 int src_vlen_enc = vector_length_encoding(this, $src);
22860 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22861 }
22862 %}
22863 ins_pipe( pipe_slow );
22864 %}
22865
22866 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22867 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22868 match(Set dst (VectorStoreMask src size));
22869 effect(TEMP_DEF dst, TEMP xtmp);
22870 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22871 ins_encode %{
22872 int vlen_enc = Assembler::AVX_128bit;
22873 int vlen = Matcher::vector_length(this);
22874 if (vlen <= 8) {
22875 assert(UseSSE >= 3, "required");
22876 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22877 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22878 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22879 } else {
22880 assert(UseAVX > 0, "required");
22881 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22882 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22883 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22884 }
22885 %}
22886 ins_pipe( pipe_slow );
22887 %}
22888
22889 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22890 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22891 match(Set dst (VectorStoreMask src size));
22892 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22893 effect(TEMP_DEF dst, TEMP xtmp);
22894 ins_encode %{
22895 int vlen_enc = Assembler::AVX_128bit;
22896 int vlen = Matcher::vector_length(this);
22897 if (vlen <= 4) {
22898 assert(UseSSE >= 3, "required");
22899 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22900 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22901 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22902 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22903 } else {
22904 assert(UseAVX > 0, "required");
22905 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22906 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22907 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22908 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22909 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22910 }
22911 %}
22912 ins_pipe( pipe_slow );
22913 %}
22914
22915 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22916 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22917 match(Set dst (VectorStoreMask src size));
22918 effect(TEMP_DEF dst, TEMP xtmp);
22919 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22920 ins_encode %{
22921 assert(UseSSE >= 3, "required");
22922 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22923 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22924 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22925 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22926 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22927 %}
22928 ins_pipe( pipe_slow );
22929 %}
22930
22931 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22932 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22933 match(Set dst (VectorStoreMask src size));
22934 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22935 effect(TEMP_DEF dst, TEMP vtmp);
22936 ins_encode %{
22937 int vlen_enc = Assembler::AVX_128bit;
22938 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22939 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22940 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22941 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22942 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22943 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22944 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22945 %}
22946 ins_pipe( pipe_slow );
22947 %}
22948
22949 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22950 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22951 match(Set dst (VectorStoreMask src size));
22952 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22953 ins_encode %{
22954 int src_vlen_enc = vector_length_encoding(this, $src);
22955 int dst_vlen_enc = vector_length_encoding(this);
22956 if (!VM_Version::supports_avx512vl()) {
22957 src_vlen_enc = Assembler::AVX_512bit;
22958 }
22959 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22960 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22961 %}
22962 ins_pipe( pipe_slow );
22963 %}
22964
22965 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22966 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22967 match(Set dst (VectorStoreMask src size));
22968 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22969 ins_encode %{
22970 int src_vlen_enc = vector_length_encoding(this, $src);
22971 int dst_vlen_enc = vector_length_encoding(this);
22972 if (!VM_Version::supports_avx512vl()) {
22973 src_vlen_enc = Assembler::AVX_512bit;
22974 }
22975 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22976 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22977 %}
22978 ins_pipe( pipe_slow );
22979 %}
22980
22981 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22982 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22983 match(Set dst (VectorStoreMask mask size));
22984 effect(TEMP_DEF dst);
22985 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22986 ins_encode %{
22987 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22988 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22989 false, Assembler::AVX_512bit, noreg);
22990 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22991 %}
22992 ins_pipe( pipe_slow );
22993 %}
22994
22995 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22996 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22997 match(Set dst (VectorStoreMask mask size));
22998 effect(TEMP_DEF dst);
22999 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23000 ins_encode %{
23001 int dst_vlen_enc = vector_length_encoding(this);
23002 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23003 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23004 %}
23005 ins_pipe( pipe_slow );
23006 %}
23007
23008 instruct vmaskcast_evex(kReg dst) %{
23009 match(Set dst (VectorMaskCast dst));
23010 ins_cost(0);
23011 format %{ "vector_mask_cast $dst" %}
23012 ins_encode %{
23013 // empty
23014 %}
23015 ins_pipe(empty);
23016 %}
23017
23018 instruct vmaskcast(vec dst) %{
23019 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23020 match(Set dst (VectorMaskCast dst));
23021 ins_cost(0);
23022 format %{ "vector_mask_cast $dst" %}
23023 ins_encode %{
23024 // empty
23025 %}
23026 ins_pipe(empty);
23027 %}
23028
23029 instruct vmaskcast_avx(vec dst, vec src) %{
23030 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23031 match(Set dst (VectorMaskCast src));
23032 format %{ "vector_mask_cast $dst, $src" %}
23033 ins_encode %{
23034 int vlen = Matcher::vector_length(this);
23035 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23036 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23037 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23038 %}
23039 ins_pipe(pipe_slow);
23040 %}
23041
23042 //-------------------------------- Load Iota Indices ----------------------------------
23043
23044 instruct loadIotaIndices(vec dst, immI_0 src) %{
23045 match(Set dst (VectorLoadConst src));
23046 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23047 ins_encode %{
23048 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23049 BasicType bt = Matcher::vector_element_basic_type(this);
23050 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23051 %}
23052 ins_pipe( pipe_slow );
23053 %}
23054
23055 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23056 match(Set dst (PopulateIndex src1 src2));
23057 effect(TEMP dst, TEMP vtmp);
23058 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23059 ins_encode %{
23060 assert($src2$$constant == 1, "required");
23061 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23062 int vlen_enc = vector_length_encoding(this);
23063 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23064 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23065 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23066 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23067 %}
23068 ins_pipe( pipe_slow );
23069 %}
23070
23071 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23072 match(Set dst (PopulateIndex src1 src2));
23073 effect(TEMP dst, TEMP vtmp);
23074 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23075 ins_encode %{
23076 assert($src2$$constant == 1, "required");
23077 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23078 int vlen_enc = vector_length_encoding(this);
23079 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23080 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23081 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23082 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23083 %}
23084 ins_pipe( pipe_slow );
23085 %}
23086
23087 //-------------------------------- Rearrange ----------------------------------
23088
23089 // LoadShuffle/Rearrange for Byte
23090 instruct rearrangeB(vec dst, vec shuffle) %{
23091 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23092 Matcher::vector_length(n) < 32);
23093 match(Set dst (VectorRearrange dst shuffle));
23094 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23095 ins_encode %{
23096 assert(UseSSE >= 4, "required");
23097 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23098 %}
23099 ins_pipe( pipe_slow );
23100 %}
23101
23102 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23103 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23104 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23105 match(Set dst (VectorRearrange src shuffle));
23106 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23107 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23108 ins_encode %{
23109 assert(UseAVX >= 2, "required");
23110 // Swap src into vtmp1
23111 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23112 // Shuffle swapped src to get entries from other 128 bit lane
23113 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23114 // Shuffle original src to get entries from self 128 bit lane
23115 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23116 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23117 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23118 // Perform the blend
23119 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23120 %}
23121 ins_pipe( pipe_slow );
23122 %}
23123
23124
23125 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23126 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23127 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23128 match(Set dst (VectorRearrange src shuffle));
23129 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23130 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23131 ins_encode %{
23132 int vlen_enc = vector_length_encoding(this);
23133 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23134 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23135 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23136 %}
23137 ins_pipe( pipe_slow );
23138 %}
23139
23140 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23141 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23142 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23143 match(Set dst (VectorRearrange src shuffle));
23144 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23145 ins_encode %{
23146 int vlen_enc = vector_length_encoding(this);
23147 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23148 %}
23149 ins_pipe( pipe_slow );
23150 %}
23151
23152 // LoadShuffle/Rearrange for Short
23153
23154 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23155 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23156 !VM_Version::supports_avx512bw());
23157 match(Set dst (VectorLoadShuffle src));
23158 effect(TEMP dst, TEMP vtmp);
23159 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23160 ins_encode %{
23161 // Create a byte shuffle mask from short shuffle mask
23162 // only byte shuffle instruction available on these platforms
23163 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23164 if (UseAVX == 0) {
23165 assert(vlen_in_bytes <= 16, "required");
23166 // Multiply each shuffle by two to get byte index
23167 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23168 __ psllw($vtmp$$XMMRegister, 1);
23169
23170 // Duplicate to create 2 copies of byte index
23171 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23172 __ psllw($dst$$XMMRegister, 8);
23173 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23174
23175 // Add one to get alternate byte index
23176 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23177 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23178 } else {
23179 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23180 int vlen_enc = vector_length_encoding(this);
23181 // Multiply each shuffle by two to get byte index
23182 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23183
23184 // Duplicate to create 2 copies of byte index
23185 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23186 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23187
23188 // Add one to get alternate byte index
23189 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23190 }
23191 %}
23192 ins_pipe( pipe_slow );
23193 %}
23194
23195 instruct rearrangeS(vec dst, vec shuffle) %{
23196 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23197 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23198 match(Set dst (VectorRearrange dst shuffle));
23199 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23200 ins_encode %{
23201 assert(UseSSE >= 4, "required");
23202 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23203 %}
23204 ins_pipe( pipe_slow );
23205 %}
23206
23207 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23208 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23209 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23210 match(Set dst (VectorRearrange src shuffle));
23211 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23212 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23213 ins_encode %{
23214 assert(UseAVX >= 2, "required");
23215 // Swap src into vtmp1
23216 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23217 // Shuffle swapped src to get entries from other 128 bit lane
23218 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23219 // Shuffle original src to get entries from self 128 bit lane
23220 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23221 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23222 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23223 // Perform the blend
23224 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23225 %}
23226 ins_pipe( pipe_slow );
23227 %}
23228
23229 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23230 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23231 VM_Version::supports_avx512bw());
23232 match(Set dst (VectorRearrange src shuffle));
23233 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23234 ins_encode %{
23235 int vlen_enc = vector_length_encoding(this);
23236 if (!VM_Version::supports_avx512vl()) {
23237 vlen_enc = Assembler::AVX_512bit;
23238 }
23239 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23240 %}
23241 ins_pipe( pipe_slow );
23242 %}
23243
23244 // LoadShuffle/Rearrange for Integer and Float
23245
23246 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23247 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23248 Matcher::vector_length(n) == 4 && UseAVX == 0);
23249 match(Set dst (VectorLoadShuffle src));
23250 effect(TEMP dst, TEMP vtmp);
23251 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23252 ins_encode %{
23253 assert(UseSSE >= 4, "required");
23254
23255 // Create a byte shuffle mask from int shuffle mask
23256 // only byte shuffle instruction available on these platforms
23257
23258 // Duplicate and multiply each shuffle by 4
23259 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23260 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23261 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23262 __ psllw($vtmp$$XMMRegister, 2);
23263
23264 // Duplicate again to create 4 copies of byte index
23265 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23266 __ psllw($dst$$XMMRegister, 8);
23267 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23268
23269 // Add 3,2,1,0 to get alternate byte index
23270 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23271 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23272 %}
23273 ins_pipe( pipe_slow );
23274 %}
23275
23276 instruct rearrangeI(vec dst, vec shuffle) %{
23277 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23278 UseAVX == 0);
23279 match(Set dst (VectorRearrange dst shuffle));
23280 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23281 ins_encode %{
23282 assert(UseSSE >= 4, "required");
23283 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23284 %}
23285 ins_pipe( pipe_slow );
23286 %}
23287
23288 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23289 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23290 UseAVX > 0);
23291 match(Set dst (VectorRearrange src shuffle));
23292 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23293 ins_encode %{
23294 int vlen_enc = vector_length_encoding(this);
23295 BasicType bt = Matcher::vector_element_basic_type(this);
23296 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23297 %}
23298 ins_pipe( pipe_slow );
23299 %}
23300
23301 // LoadShuffle/Rearrange for Long and Double
23302
23303 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23304 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23305 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23306 match(Set dst (VectorLoadShuffle src));
23307 effect(TEMP dst, TEMP vtmp);
23308 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23309 ins_encode %{
23310 assert(UseAVX >= 2, "required");
23311
23312 int vlen_enc = vector_length_encoding(this);
23313 // Create a double word shuffle mask from long shuffle mask
23314 // only double word shuffle instruction available on these platforms
23315
23316 // Multiply each shuffle by two to get double word index
23317 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23318
23319 // Duplicate each double word shuffle
23320 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23321 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23322
23323 // Add one to get alternate double word index
23324 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23330 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23331 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23332 match(Set dst (VectorRearrange src shuffle));
23333 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23334 ins_encode %{
23335 assert(UseAVX >= 2, "required");
23336
23337 int vlen_enc = vector_length_encoding(this);
23338 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23339 %}
23340 ins_pipe( pipe_slow );
23341 %}
23342
23343 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23344 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23345 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23346 match(Set dst (VectorRearrange src shuffle));
23347 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23348 ins_encode %{
23349 assert(UseAVX > 2, "required");
23350
23351 int vlen_enc = vector_length_encoding(this);
23352 if (vlen_enc == Assembler::AVX_128bit) {
23353 vlen_enc = Assembler::AVX_256bit;
23354 }
23355 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23356 %}
23357 ins_pipe( pipe_slow );
23358 %}
23359
23360 // --------------------------------- FMA --------------------------------------
23361 // a * b + c
23362
23363 instruct vfmaF_reg(vec a, vec b, vec c) %{
23364 match(Set c (FmaVF c (Binary a b)));
23365 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23366 ins_cost(150);
23367 ins_encode %{
23368 assert(UseFMA, "not enabled");
23369 int vlen_enc = vector_length_encoding(this);
23370 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23371 %}
23372 ins_pipe( pipe_slow );
23373 %}
23374
23375 instruct vfmaF_mem(vec a, memory b, vec c) %{
23376 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23377 match(Set c (FmaVF c (Binary a (LoadVector b))));
23378 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23379 ins_cost(150);
23380 ins_encode %{
23381 assert(UseFMA, "not enabled");
23382 int vlen_enc = vector_length_encoding(this);
23383 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23384 %}
23385 ins_pipe( pipe_slow );
23386 %}
23387
23388 instruct vfmaD_reg(vec a, vec b, vec c) %{
23389 match(Set c (FmaVD c (Binary a b)));
23390 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23391 ins_cost(150);
23392 ins_encode %{
23393 assert(UseFMA, "not enabled");
23394 int vlen_enc = vector_length_encoding(this);
23395 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23396 %}
23397 ins_pipe( pipe_slow );
23398 %}
23399
23400 instruct vfmaD_mem(vec a, memory b, vec c) %{
23401 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23402 match(Set c (FmaVD c (Binary a (LoadVector b))));
23403 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23404 ins_cost(150);
23405 ins_encode %{
23406 assert(UseFMA, "not enabled");
23407 int vlen_enc = vector_length_encoding(this);
23408 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23409 %}
23410 ins_pipe( pipe_slow );
23411 %}
23412
23413 // --------------------------------- Vector Multiply Add --------------------------------------
23414
23415 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23416 predicate(UseAVX == 0);
23417 match(Set dst (MulAddVS2VI dst src1));
23418 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23419 ins_encode %{
23420 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23421 %}
23422 ins_pipe( pipe_slow );
23423 %}
23424
23425 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23426 predicate(UseAVX > 0);
23427 match(Set dst (MulAddVS2VI src1 src2));
23428 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23429 ins_encode %{
23430 int vlen_enc = vector_length_encoding(this);
23431 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23432 %}
23433 ins_pipe( pipe_slow );
23434 %}
23435
23436 // --------------------------------- Vector Multiply Add Add ----------------------------------
23437
23438 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23439 predicate(VM_Version::supports_avx512_vnni());
23440 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23441 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23442 ins_encode %{
23443 assert(UseAVX > 2, "required");
23444 int vlen_enc = vector_length_encoding(this);
23445 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23446 %}
23447 ins_pipe( pipe_slow );
23448 ins_cost(10);
23449 %}
23450
23451 // --------------------------------- PopCount --------------------------------------
23452
23453 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23454 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23455 match(Set dst (PopCountVI src));
23456 match(Set dst (PopCountVL src));
23457 format %{ "vector_popcount_integral $dst, $src" %}
23458 ins_encode %{
23459 int opcode = this->ideal_Opcode();
23460 int vlen_enc = vector_length_encoding(this, $src);
23461 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23462 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23463 %}
23464 ins_pipe( pipe_slow );
23465 %}
23466
23467 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23468 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23469 match(Set dst (PopCountVI src mask));
23470 match(Set dst (PopCountVL src mask));
23471 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23472 ins_encode %{
23473 int vlen_enc = vector_length_encoding(this, $src);
23474 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23475 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23476 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23477 %}
23478 ins_pipe( pipe_slow );
23479 %}
23480
23481 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23482 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23483 match(Set dst (PopCountVI src));
23484 match(Set dst (PopCountVL src));
23485 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23486 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23487 ins_encode %{
23488 int opcode = this->ideal_Opcode();
23489 int vlen_enc = vector_length_encoding(this, $src);
23490 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23491 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23492 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23493 %}
23494 ins_pipe( pipe_slow );
23495 %}
23496
23497 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23498
23499 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23500 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23501 Matcher::vector_length_in_bytes(n->in(1))));
23502 match(Set dst (CountTrailingZerosV src));
23503 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23504 ins_cost(400);
23505 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23506 ins_encode %{
23507 int vlen_enc = vector_length_encoding(this, $src);
23508 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23509 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23510 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23511 %}
23512 ins_pipe( pipe_slow );
23513 %}
23514
23515 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23516 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23517 VM_Version::supports_avx512cd() &&
23518 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23519 match(Set dst (CountTrailingZerosV src));
23520 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23521 ins_cost(400);
23522 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23523 ins_encode %{
23524 int vlen_enc = vector_length_encoding(this, $src);
23525 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23526 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23527 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23528 %}
23529 ins_pipe( pipe_slow );
23530 %}
23531
23532 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23533 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23534 match(Set dst (CountTrailingZerosV src));
23535 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23536 ins_cost(400);
23537 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23538 ins_encode %{
23539 int vlen_enc = vector_length_encoding(this, $src);
23540 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23541 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23542 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23543 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23544 %}
23545 ins_pipe( pipe_slow );
23546 %}
23547
23548 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23549 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23550 match(Set dst (CountTrailingZerosV src));
23551 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23552 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23553 ins_encode %{
23554 int vlen_enc = vector_length_encoding(this, $src);
23555 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23556 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23557 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23558 %}
23559 ins_pipe( pipe_slow );
23560 %}
23561
23562
23563 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23564
23565 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23566 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23567 effect(TEMP dst);
23568 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23569 ins_encode %{
23570 int vector_len = vector_length_encoding(this);
23571 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23572 %}
23573 ins_pipe( pipe_slow );
23574 %}
23575
23576 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23577 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23578 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23579 effect(TEMP dst);
23580 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23581 ins_encode %{
23582 int vector_len = vector_length_encoding(this);
23583 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23584 %}
23585 ins_pipe( pipe_slow );
23586 %}
23587
23588 // --------------------------------- Rotation Operations ----------------------------------
23589 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23590 match(Set dst (RotateLeftV src shift));
23591 match(Set dst (RotateRightV src shift));
23592 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23593 ins_encode %{
23594 int opcode = this->ideal_Opcode();
23595 int vector_len = vector_length_encoding(this);
23596 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23597 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23598 %}
23599 ins_pipe( pipe_slow );
23600 %}
23601
23602 instruct vprorate(vec dst, vec src, vec shift) %{
23603 match(Set dst (RotateLeftV src shift));
23604 match(Set dst (RotateRightV src shift));
23605 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23606 ins_encode %{
23607 int opcode = this->ideal_Opcode();
23608 int vector_len = vector_length_encoding(this);
23609 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23610 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23611 %}
23612 ins_pipe( pipe_slow );
23613 %}
23614
23615 // ---------------------------------- Masked Operations ------------------------------------
23616 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23617 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23618 match(Set dst (LoadVectorMasked mem mask));
23619 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23620 ins_encode %{
23621 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23622 int vlen_enc = vector_length_encoding(this);
23623 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23624 %}
23625 ins_pipe( pipe_slow );
23626 %}
23627
23628
23629 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23630 predicate(n->in(3)->bottom_type()->isa_vectmask());
23631 match(Set dst (LoadVectorMasked mem mask));
23632 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23633 ins_encode %{
23634 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23635 int vector_len = vector_length_encoding(this);
23636 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23637 %}
23638 ins_pipe( pipe_slow );
23639 %}
23640
23641 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23642 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23643 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23644 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23645 ins_encode %{
23646 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23647 int vlen_enc = vector_length_encoding(src_node);
23648 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23649 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23650 %}
23651 ins_pipe( pipe_slow );
23652 %}
23653
23654 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23655 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23656 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23657 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23658 ins_encode %{
23659 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23660 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23661 int vlen_enc = vector_length_encoding(src_node);
23662 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23663 %}
23664 ins_pipe( pipe_slow );
23665 %}
23666
23667 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23668 match(Set addr (VerifyVectorAlignment addr mask));
23669 effect(KILL cr);
23670 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23671 ins_encode %{
23672 Label Lskip;
23673 // check if masked bits of addr are zero
23674 __ testq($addr$$Register, $mask$$constant);
23675 __ jccb(Assembler::equal, Lskip);
23676 __ stop("verify_vector_alignment found a misaligned vector memory access");
23677 __ bind(Lskip);
23678 %}
23679 ins_pipe(pipe_slow);
23680 %}
23681
23682 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23683 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23684 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23685 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23686 ins_encode %{
23687 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23688 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23689
23690 Label DONE;
23691 int vlen_enc = vector_length_encoding(this, $src1);
23692 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23693
23694 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23695 __ mov64($dst$$Register, -1L);
23696 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23697 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23698 __ jccb(Assembler::carrySet, DONE);
23699 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23700 __ notq($dst$$Register);
23701 __ tzcntq($dst$$Register, $dst$$Register);
23702 __ bind(DONE);
23703 %}
23704 ins_pipe( pipe_slow );
23705 %}
23706
23707
23708 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23709 match(Set dst (VectorMaskGen len));
23710 effect(TEMP temp, KILL cr);
23711 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23712 ins_encode %{
23713 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23714 %}
23715 ins_pipe( pipe_slow );
23716 %}
23717
23718 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23719 match(Set dst (VectorMaskGen len));
23720 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23721 effect(TEMP temp);
23722 ins_encode %{
23723 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23724 __ kmovql($dst$$KRegister, $temp$$Register);
23725 %}
23726 ins_pipe( pipe_slow );
23727 %}
23728
23729 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23730 predicate(n->in(1)->bottom_type()->isa_vectmask());
23731 match(Set dst (VectorMaskToLong mask));
23732 effect(TEMP dst, KILL cr);
23733 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23734 ins_encode %{
23735 int opcode = this->ideal_Opcode();
23736 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23737 int mask_len = Matcher::vector_length(this, $mask);
23738 int mask_size = mask_len * type2aelembytes(mbt);
23739 int vlen_enc = vector_length_encoding(this, $mask);
23740 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23741 $dst$$Register, mask_len, mask_size, vlen_enc);
23742 %}
23743 ins_pipe( pipe_slow );
23744 %}
23745
23746 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23747 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23748 match(Set dst (VectorMaskToLong mask));
23749 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23750 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23751 ins_encode %{
23752 int opcode = this->ideal_Opcode();
23753 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23754 int mask_len = Matcher::vector_length(this, $mask);
23755 int vlen_enc = vector_length_encoding(this, $mask);
23756 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23757 $dst$$Register, mask_len, mbt, vlen_enc);
23758 %}
23759 ins_pipe( pipe_slow );
23760 %}
23761
23762 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23763 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23764 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23765 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23766 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23767 ins_encode %{
23768 int opcode = this->ideal_Opcode();
23769 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23770 int mask_len = Matcher::vector_length(this, $mask);
23771 int vlen_enc = vector_length_encoding(this, $mask);
23772 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23773 $dst$$Register, mask_len, mbt, vlen_enc);
23774 %}
23775 ins_pipe( pipe_slow );
23776 %}
23777
23778 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23779 predicate(n->in(1)->bottom_type()->isa_vectmask());
23780 match(Set dst (VectorMaskTrueCount mask));
23781 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23782 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23783 ins_encode %{
23784 int opcode = this->ideal_Opcode();
23785 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23786 int mask_len = Matcher::vector_length(this, $mask);
23787 int mask_size = mask_len * type2aelembytes(mbt);
23788 int vlen_enc = vector_length_encoding(this, $mask);
23789 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23790 $tmp$$Register, mask_len, mask_size, vlen_enc);
23791 %}
23792 ins_pipe( pipe_slow );
23793 %}
23794
23795 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23796 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23797 match(Set dst (VectorMaskTrueCount mask));
23798 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23799 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23800 ins_encode %{
23801 int opcode = this->ideal_Opcode();
23802 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23803 int mask_len = Matcher::vector_length(this, $mask);
23804 int vlen_enc = vector_length_encoding(this, $mask);
23805 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23806 $tmp$$Register, mask_len, mbt, vlen_enc);
23807 %}
23808 ins_pipe( pipe_slow );
23809 %}
23810
23811 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23812 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23813 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23814 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23815 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23816 ins_encode %{
23817 int opcode = this->ideal_Opcode();
23818 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23819 int mask_len = Matcher::vector_length(this, $mask);
23820 int vlen_enc = vector_length_encoding(this, $mask);
23821 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23822 $tmp$$Register, mask_len, mbt, vlen_enc);
23823 %}
23824 ins_pipe( pipe_slow );
23825 %}
23826
23827 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23828 predicate(n->in(1)->bottom_type()->isa_vectmask());
23829 match(Set dst (VectorMaskFirstTrue mask));
23830 match(Set dst (VectorMaskLastTrue mask));
23831 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23832 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23833 ins_encode %{
23834 int opcode = this->ideal_Opcode();
23835 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23836 int mask_len = Matcher::vector_length(this, $mask);
23837 int mask_size = mask_len * type2aelembytes(mbt);
23838 int vlen_enc = vector_length_encoding(this, $mask);
23839 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23840 $tmp$$Register, mask_len, mask_size, vlen_enc);
23841 %}
23842 ins_pipe( pipe_slow );
23843 %}
23844
23845 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23846 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23847 match(Set dst (VectorMaskFirstTrue mask));
23848 match(Set dst (VectorMaskLastTrue mask));
23849 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23850 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23851 ins_encode %{
23852 int opcode = this->ideal_Opcode();
23853 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23854 int mask_len = Matcher::vector_length(this, $mask);
23855 int vlen_enc = vector_length_encoding(this, $mask);
23856 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23857 $tmp$$Register, mask_len, mbt, vlen_enc);
23858 %}
23859 ins_pipe( pipe_slow );
23860 %}
23861
23862 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23863 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23864 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23865 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23866 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23867 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23868 ins_encode %{
23869 int opcode = this->ideal_Opcode();
23870 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23871 int mask_len = Matcher::vector_length(this, $mask);
23872 int vlen_enc = vector_length_encoding(this, $mask);
23873 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23874 $tmp$$Register, mask_len, mbt, vlen_enc);
23875 %}
23876 ins_pipe( pipe_slow );
23877 %}
23878
23879 // --------------------------------- Compress/Expand Operations ---------------------------
23880 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23881 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23882 match(Set dst (CompressV src mask));
23883 match(Set dst (ExpandV src mask));
23884 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23885 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23886 ins_encode %{
23887 int opcode = this->ideal_Opcode();
23888 int vlen_enc = vector_length_encoding(this);
23889 BasicType bt = Matcher::vector_element_basic_type(this);
23890 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23891 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23892 %}
23893 ins_pipe( pipe_slow );
23894 %}
23895
23896 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23897 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23898 match(Set dst (CompressV src mask));
23899 match(Set dst (ExpandV src mask));
23900 format %{ "vector_compress_expand $dst, $src, $mask" %}
23901 ins_encode %{
23902 int opcode = this->ideal_Opcode();
23903 int vector_len = vector_length_encoding(this);
23904 BasicType bt = Matcher::vector_element_basic_type(this);
23905 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23906 %}
23907 ins_pipe( pipe_slow );
23908 %}
23909
23910 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23911 match(Set dst (CompressM mask));
23912 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23913 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23914 ins_encode %{
23915 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23916 int mask_len = Matcher::vector_length(this);
23917 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23918 %}
23919 ins_pipe( pipe_slow );
23920 %}
23921
23922 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23923
23924 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23925 predicate(!VM_Version::supports_gfni());
23926 match(Set dst (ReverseV src));
23927 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23928 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23929 ins_encode %{
23930 int vec_enc = vector_length_encoding(this);
23931 BasicType bt = Matcher::vector_element_basic_type(this);
23932 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23933 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23934 %}
23935 ins_pipe( pipe_slow );
23936 %}
23937
23938 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23939 predicate(VM_Version::supports_gfni());
23940 match(Set dst (ReverseV src));
23941 effect(TEMP dst, TEMP xtmp);
23942 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23943 ins_encode %{
23944 int vec_enc = vector_length_encoding(this);
23945 BasicType bt = Matcher::vector_element_basic_type(this);
23946 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23947 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23948 $xtmp$$XMMRegister);
23949 %}
23950 ins_pipe( pipe_slow );
23951 %}
23952
23953 instruct vreverse_byte_reg(vec dst, vec src) %{
23954 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23955 match(Set dst (ReverseBytesV src));
23956 effect(TEMP dst);
23957 format %{ "vector_reverse_byte $dst, $src" %}
23958 ins_encode %{
23959 int vec_enc = vector_length_encoding(this);
23960 BasicType bt = Matcher::vector_element_basic_type(this);
23961 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23962 %}
23963 ins_pipe( pipe_slow );
23964 %}
23965
23966 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23967 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23968 match(Set dst (ReverseBytesV src));
23969 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23970 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23971 ins_encode %{
23972 int vec_enc = vector_length_encoding(this);
23973 BasicType bt = Matcher::vector_element_basic_type(this);
23974 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23975 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23976 %}
23977 ins_pipe( pipe_slow );
23978 %}
23979
23980 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23981
23982 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23983 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23984 Matcher::vector_length_in_bytes(n->in(1))));
23985 match(Set dst (CountLeadingZerosV src));
23986 format %{ "vector_count_leading_zeros $dst, $src" %}
23987 ins_encode %{
23988 int vlen_enc = vector_length_encoding(this, $src);
23989 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23990 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23991 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23992 %}
23993 ins_pipe( pipe_slow );
23994 %}
23995
23996 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23997 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23998 Matcher::vector_length_in_bytes(n->in(1))));
23999 match(Set dst (CountLeadingZerosV src mask));
24000 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24001 ins_encode %{
24002 int vlen_enc = vector_length_encoding(this, $src);
24003 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24004 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24005 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24006 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24007 %}
24008 ins_pipe( pipe_slow );
24009 %}
24010
24011 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24012 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24013 VM_Version::supports_avx512cd() &&
24014 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24015 match(Set dst (CountLeadingZerosV src));
24016 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24017 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24018 ins_encode %{
24019 int vlen_enc = vector_length_encoding(this, $src);
24020 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24021 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24022 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24023 %}
24024 ins_pipe( pipe_slow );
24025 %}
24026
24027 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24028 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24029 match(Set dst (CountLeadingZerosV src));
24030 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24031 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24032 ins_encode %{
24033 int vlen_enc = vector_length_encoding(this, $src);
24034 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24036 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24037 $rtmp$$Register, true, vlen_enc);
24038 %}
24039 ins_pipe( pipe_slow );
24040 %}
24041
24042 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24043 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24044 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24045 match(Set dst (CountLeadingZerosV src));
24046 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24047 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24048 ins_encode %{
24049 int vlen_enc = vector_length_encoding(this, $src);
24050 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24051 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24052 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24053 %}
24054 ins_pipe( pipe_slow );
24055 %}
24056
24057 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24058 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24059 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24060 match(Set dst (CountLeadingZerosV src));
24061 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24062 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24063 ins_encode %{
24064 int vlen_enc = vector_length_encoding(this, $src);
24065 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24066 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24067 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24068 %}
24069 ins_pipe( pipe_slow );
24070 %}
24071
24072 // ---------------------------------- Vector Masked Operations ------------------------------------
24073
24074 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24075 match(Set dst (AddVB (Binary dst src2) mask));
24076 match(Set dst (AddVS (Binary dst src2) mask));
24077 match(Set dst (AddVI (Binary dst src2) mask));
24078 match(Set dst (AddVL (Binary dst src2) mask));
24079 match(Set dst (AddVF (Binary dst src2) mask));
24080 match(Set dst (AddVD (Binary dst src2) mask));
24081 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24082 ins_encode %{
24083 int vlen_enc = vector_length_encoding(this);
24084 BasicType bt = Matcher::vector_element_basic_type(this);
24085 int opc = this->ideal_Opcode();
24086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24088 %}
24089 ins_pipe( pipe_slow );
24090 %}
24091
24092 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24093 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24094 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24095 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24096 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24097 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24098 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24099 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24100 ins_encode %{
24101 int vlen_enc = vector_length_encoding(this);
24102 BasicType bt = Matcher::vector_element_basic_type(this);
24103 int opc = this->ideal_Opcode();
24104 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24105 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24106 %}
24107 ins_pipe( pipe_slow );
24108 %}
24109
24110 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24111 match(Set dst (XorV (Binary dst src2) mask));
24112 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24113 ins_encode %{
24114 int vlen_enc = vector_length_encoding(this);
24115 BasicType bt = Matcher::vector_element_basic_type(this);
24116 int opc = this->ideal_Opcode();
24117 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24118 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24119 %}
24120 ins_pipe( pipe_slow );
24121 %}
24122
24123 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24124 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24125 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24126 ins_encode %{
24127 int vlen_enc = vector_length_encoding(this);
24128 BasicType bt = Matcher::vector_element_basic_type(this);
24129 int opc = this->ideal_Opcode();
24130 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24131 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24132 %}
24133 ins_pipe( pipe_slow );
24134 %}
24135
24136 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24137 match(Set dst (OrV (Binary dst src2) mask));
24138 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24139 ins_encode %{
24140 int vlen_enc = vector_length_encoding(this);
24141 BasicType bt = Matcher::vector_element_basic_type(this);
24142 int opc = this->ideal_Opcode();
24143 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24144 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24145 %}
24146 ins_pipe( pipe_slow );
24147 %}
24148
24149 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24150 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24151 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24152 ins_encode %{
24153 int vlen_enc = vector_length_encoding(this);
24154 BasicType bt = Matcher::vector_element_basic_type(this);
24155 int opc = this->ideal_Opcode();
24156 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24157 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24158 %}
24159 ins_pipe( pipe_slow );
24160 %}
24161
24162 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24163 match(Set dst (AndV (Binary dst src2) mask));
24164 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24165 ins_encode %{
24166 int vlen_enc = vector_length_encoding(this);
24167 BasicType bt = Matcher::vector_element_basic_type(this);
24168 int opc = this->ideal_Opcode();
24169 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24170 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24171 %}
24172 ins_pipe( pipe_slow );
24173 %}
24174
24175 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24176 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24177 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24178 ins_encode %{
24179 int vlen_enc = vector_length_encoding(this);
24180 BasicType bt = Matcher::vector_element_basic_type(this);
24181 int opc = this->ideal_Opcode();
24182 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24183 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24184 %}
24185 ins_pipe( pipe_slow );
24186 %}
24187
24188 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24189 match(Set dst (SubVB (Binary dst src2) mask));
24190 match(Set dst (SubVS (Binary dst src2) mask));
24191 match(Set dst (SubVI (Binary dst src2) mask));
24192 match(Set dst (SubVL (Binary dst src2) mask));
24193 match(Set dst (SubVF (Binary dst src2) mask));
24194 match(Set dst (SubVD (Binary dst src2) mask));
24195 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24196 ins_encode %{
24197 int vlen_enc = vector_length_encoding(this);
24198 BasicType bt = Matcher::vector_element_basic_type(this);
24199 int opc = this->ideal_Opcode();
24200 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24201 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24202 %}
24203 ins_pipe( pipe_slow );
24204 %}
24205
24206 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24207 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24208 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24209 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24210 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24211 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24212 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24213 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24214 ins_encode %{
24215 int vlen_enc = vector_length_encoding(this);
24216 BasicType bt = Matcher::vector_element_basic_type(this);
24217 int opc = this->ideal_Opcode();
24218 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24219 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24220 %}
24221 ins_pipe( pipe_slow );
24222 %}
24223
24224 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24225 match(Set dst (MulVS (Binary dst src2) mask));
24226 match(Set dst (MulVI (Binary dst src2) mask));
24227 match(Set dst (MulVL (Binary dst src2) mask));
24228 match(Set dst (MulVF (Binary dst src2) mask));
24229 match(Set dst (MulVD (Binary dst src2) mask));
24230 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24231 ins_encode %{
24232 int vlen_enc = vector_length_encoding(this);
24233 BasicType bt = Matcher::vector_element_basic_type(this);
24234 int opc = this->ideal_Opcode();
24235 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24236 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24237 %}
24238 ins_pipe( pipe_slow );
24239 %}
24240
24241 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24242 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24243 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24244 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24245 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24246 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24247 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24248 ins_encode %{
24249 int vlen_enc = vector_length_encoding(this);
24250 BasicType bt = Matcher::vector_element_basic_type(this);
24251 int opc = this->ideal_Opcode();
24252 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24253 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24254 %}
24255 ins_pipe( pipe_slow );
24256 %}
24257
24258 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24259 match(Set dst (SqrtVF dst mask));
24260 match(Set dst (SqrtVD dst mask));
24261 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24262 ins_encode %{
24263 int vlen_enc = vector_length_encoding(this);
24264 BasicType bt = Matcher::vector_element_basic_type(this);
24265 int opc = this->ideal_Opcode();
24266 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24267 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24268 %}
24269 ins_pipe( pipe_slow );
24270 %}
24271
24272 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24273 match(Set dst (DivVF (Binary dst src2) mask));
24274 match(Set dst (DivVD (Binary dst src2) mask));
24275 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24276 ins_encode %{
24277 int vlen_enc = vector_length_encoding(this);
24278 BasicType bt = Matcher::vector_element_basic_type(this);
24279 int opc = this->ideal_Opcode();
24280 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24282 %}
24283 ins_pipe( pipe_slow );
24284 %}
24285
24286 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24287 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24288 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24289 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24290 ins_encode %{
24291 int vlen_enc = vector_length_encoding(this);
24292 BasicType bt = Matcher::vector_element_basic_type(this);
24293 int opc = this->ideal_Opcode();
24294 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24295 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24296 %}
24297 ins_pipe( pipe_slow );
24298 %}
24299
24300
24301 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24302 match(Set dst (RotateLeftV (Binary dst shift) mask));
24303 match(Set dst (RotateRightV (Binary dst shift) mask));
24304 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24305 ins_encode %{
24306 int vlen_enc = vector_length_encoding(this);
24307 BasicType bt = Matcher::vector_element_basic_type(this);
24308 int opc = this->ideal_Opcode();
24309 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24310 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24311 %}
24312 ins_pipe( pipe_slow );
24313 %}
24314
24315 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24316 match(Set dst (RotateLeftV (Binary dst src2) mask));
24317 match(Set dst (RotateRightV (Binary dst src2) mask));
24318 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24319 ins_encode %{
24320 int vlen_enc = vector_length_encoding(this);
24321 BasicType bt = Matcher::vector_element_basic_type(this);
24322 int opc = this->ideal_Opcode();
24323 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24324 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24325 %}
24326 ins_pipe( pipe_slow );
24327 %}
24328
24329 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24330 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24331 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24332 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24333 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24334 ins_encode %{
24335 int vlen_enc = vector_length_encoding(this);
24336 BasicType bt = Matcher::vector_element_basic_type(this);
24337 int opc = this->ideal_Opcode();
24338 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24339 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24340 %}
24341 ins_pipe( pipe_slow );
24342 %}
24343
24344 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24345 predicate(!n->as_ShiftV()->is_var_shift());
24346 match(Set dst (LShiftVS (Binary dst src2) mask));
24347 match(Set dst (LShiftVI (Binary dst src2) mask));
24348 match(Set dst (LShiftVL (Binary dst src2) mask));
24349 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24350 ins_encode %{
24351 int vlen_enc = vector_length_encoding(this);
24352 BasicType bt = Matcher::vector_element_basic_type(this);
24353 int opc = this->ideal_Opcode();
24354 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24356 %}
24357 ins_pipe( pipe_slow );
24358 %}
24359
24360 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24361 predicate(n->as_ShiftV()->is_var_shift());
24362 match(Set dst (LShiftVS (Binary dst src2) mask));
24363 match(Set dst (LShiftVI (Binary dst src2) mask));
24364 match(Set dst (LShiftVL (Binary dst src2) mask));
24365 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24366 ins_encode %{
24367 int vlen_enc = vector_length_encoding(this);
24368 BasicType bt = Matcher::vector_element_basic_type(this);
24369 int opc = this->ideal_Opcode();
24370 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24372 %}
24373 ins_pipe( pipe_slow );
24374 %}
24375
24376 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24377 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24378 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24379 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24380 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24381 ins_encode %{
24382 int vlen_enc = vector_length_encoding(this);
24383 BasicType bt = Matcher::vector_element_basic_type(this);
24384 int opc = this->ideal_Opcode();
24385 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24386 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24387 %}
24388 ins_pipe( pipe_slow );
24389 %}
24390
24391 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24392 predicate(!n->as_ShiftV()->is_var_shift());
24393 match(Set dst (RShiftVS (Binary dst src2) mask));
24394 match(Set dst (RShiftVI (Binary dst src2) mask));
24395 match(Set dst (RShiftVL (Binary dst src2) mask));
24396 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24397 ins_encode %{
24398 int vlen_enc = vector_length_encoding(this);
24399 BasicType bt = Matcher::vector_element_basic_type(this);
24400 int opc = this->ideal_Opcode();
24401 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24403 %}
24404 ins_pipe( pipe_slow );
24405 %}
24406
24407 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24408 predicate(n->as_ShiftV()->is_var_shift());
24409 match(Set dst (RShiftVS (Binary dst src2) mask));
24410 match(Set dst (RShiftVI (Binary dst src2) mask));
24411 match(Set dst (RShiftVL (Binary dst src2) mask));
24412 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24413 ins_encode %{
24414 int vlen_enc = vector_length_encoding(this);
24415 BasicType bt = Matcher::vector_element_basic_type(this);
24416 int opc = this->ideal_Opcode();
24417 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24419 %}
24420 ins_pipe( pipe_slow );
24421 %}
24422
24423 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24424 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24425 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24426 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24427 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24428 ins_encode %{
24429 int vlen_enc = vector_length_encoding(this);
24430 BasicType bt = Matcher::vector_element_basic_type(this);
24431 int opc = this->ideal_Opcode();
24432 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24433 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24434 %}
24435 ins_pipe( pipe_slow );
24436 %}
24437
24438 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24439 predicate(!n->as_ShiftV()->is_var_shift());
24440 match(Set dst (URShiftVS (Binary dst src2) mask));
24441 match(Set dst (URShiftVI (Binary dst src2) mask));
24442 match(Set dst (URShiftVL (Binary dst src2) mask));
24443 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24444 ins_encode %{
24445 int vlen_enc = vector_length_encoding(this);
24446 BasicType bt = Matcher::vector_element_basic_type(this);
24447 int opc = this->ideal_Opcode();
24448 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24449 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24450 %}
24451 ins_pipe( pipe_slow );
24452 %}
24453
24454 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24455 predicate(n->as_ShiftV()->is_var_shift());
24456 match(Set dst (URShiftVS (Binary dst src2) mask));
24457 match(Set dst (URShiftVI (Binary dst src2) mask));
24458 match(Set dst (URShiftVL (Binary dst src2) mask));
24459 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24460 ins_encode %{
24461 int vlen_enc = vector_length_encoding(this);
24462 BasicType bt = Matcher::vector_element_basic_type(this);
24463 int opc = this->ideal_Opcode();
24464 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24465 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24466 %}
24467 ins_pipe( pipe_slow );
24468 %}
24469
24470 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24471 match(Set dst (MaxV (Binary dst src2) mask));
24472 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24473 ins_encode %{
24474 int vlen_enc = vector_length_encoding(this);
24475 BasicType bt = Matcher::vector_element_basic_type(this);
24476 int opc = this->ideal_Opcode();
24477 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24479 %}
24480 ins_pipe( pipe_slow );
24481 %}
24482
24483 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24484 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24485 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24486 ins_encode %{
24487 int vlen_enc = vector_length_encoding(this);
24488 BasicType bt = Matcher::vector_element_basic_type(this);
24489 int opc = this->ideal_Opcode();
24490 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24492 %}
24493 ins_pipe( pipe_slow );
24494 %}
24495
24496 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24497 match(Set dst (MinV (Binary dst src2) mask));
24498 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24499 ins_encode %{
24500 int vlen_enc = vector_length_encoding(this);
24501 BasicType bt = Matcher::vector_element_basic_type(this);
24502 int opc = this->ideal_Opcode();
24503 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24504 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24505 %}
24506 ins_pipe( pipe_slow );
24507 %}
24508
24509 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24510 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24511 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24512 ins_encode %{
24513 int vlen_enc = vector_length_encoding(this);
24514 BasicType bt = Matcher::vector_element_basic_type(this);
24515 int opc = this->ideal_Opcode();
24516 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24517 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24518 %}
24519 ins_pipe( pipe_slow );
24520 %}
24521
24522 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24523 match(Set dst (VectorRearrange (Binary dst src2) mask));
24524 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24525 ins_encode %{
24526 int vlen_enc = vector_length_encoding(this);
24527 BasicType bt = Matcher::vector_element_basic_type(this);
24528 int opc = this->ideal_Opcode();
24529 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24531 %}
24532 ins_pipe( pipe_slow );
24533 %}
24534
24535 instruct vabs_masked(vec dst, kReg mask) %{
24536 match(Set dst (AbsVB dst mask));
24537 match(Set dst (AbsVS dst mask));
24538 match(Set dst (AbsVI dst mask));
24539 match(Set dst (AbsVL dst mask));
24540 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24541 ins_encode %{
24542 int vlen_enc = vector_length_encoding(this);
24543 BasicType bt = Matcher::vector_element_basic_type(this);
24544 int opc = this->ideal_Opcode();
24545 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24546 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24547 %}
24548 ins_pipe( pipe_slow );
24549 %}
24550
24551 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24552 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24553 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24554 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24555 ins_encode %{
24556 assert(UseFMA, "Needs FMA instructions support.");
24557 int vlen_enc = vector_length_encoding(this);
24558 BasicType bt = Matcher::vector_element_basic_type(this);
24559 int opc = this->ideal_Opcode();
24560 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24561 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24562 %}
24563 ins_pipe( pipe_slow );
24564 %}
24565
24566 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24567 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24568 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24569 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24570 ins_encode %{
24571 assert(UseFMA, "Needs FMA instructions support.");
24572 int vlen_enc = vector_length_encoding(this);
24573 BasicType bt = Matcher::vector_element_basic_type(this);
24574 int opc = this->ideal_Opcode();
24575 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24577 %}
24578 ins_pipe( pipe_slow );
24579 %}
24580
24581 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24582 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24583 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24584 ins_encode %{
24585 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24586 int vlen_enc = vector_length_encoding(this, $src1);
24587 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24588
24589 // Comparison i
24590 switch (src1_elem_bt) {
24591 case T_BYTE: {
24592 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24593 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24594 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24595 break;
24596 }
24597 case T_SHORT: {
24598 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24599 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24600 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24601 break;
24602 }
24603 case T_INT: {
24604 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24605 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24606 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24607 break;
24608 }
24609 case T_LONG: {
24610 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24611 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24612 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24613 break;
24614 }
24615 case T_FLOAT: {
24616 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24617 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24618 break;
24619 }
24620 case T_DOUBLE: {
24621 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24622 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24623 break;
24624 }
24625 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24626 }
24627 %}
24628 ins_pipe( pipe_slow );
24629 %}
24630
24631 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24632 predicate(Matcher::vector_length(n) <= 32);
24633 match(Set dst (MaskAll src));
24634 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24635 ins_encode %{
24636 int mask_len = Matcher::vector_length(this);
24637 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24638 %}
24639 ins_pipe( pipe_slow );
24640 %}
24641
24642 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24643 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24644 match(Set dst (XorVMask src (MaskAll cnt)));
24645 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24646 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24647 ins_encode %{
24648 uint masklen = Matcher::vector_length(this);
24649 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24650 %}
24651 ins_pipe( pipe_slow );
24652 %}
24653
24654 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24655 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24656 (Matcher::vector_length(n) == 16) ||
24657 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24658 match(Set dst (XorVMask src (MaskAll cnt)));
24659 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24660 ins_encode %{
24661 uint masklen = Matcher::vector_length(this);
24662 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24663 %}
24664 ins_pipe( pipe_slow );
24665 %}
24666
24667 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24668 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24669 match(Set dst (VectorLongToMask src));
24670 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24671 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24672 ins_encode %{
24673 int mask_len = Matcher::vector_length(this);
24674 int vec_enc = vector_length_encoding(mask_len);
24675 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24676 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24677 %}
24678 ins_pipe( pipe_slow );
24679 %}
24680
24681
24682 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24683 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24684 match(Set dst (VectorLongToMask src));
24685 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24686 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24687 ins_encode %{
24688 int mask_len = Matcher::vector_length(this);
24689 assert(mask_len <= 32, "invalid mask length");
24690 int vec_enc = vector_length_encoding(mask_len);
24691 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24692 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24693 %}
24694 ins_pipe( pipe_slow );
24695 %}
24696
24697 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24698 predicate(n->bottom_type()->isa_vectmask());
24699 match(Set dst (VectorLongToMask src));
24700 format %{ "long_to_mask_evex $dst, $src\t!" %}
24701 ins_encode %{
24702 __ kmov($dst$$KRegister, $src$$Register);
24703 %}
24704 ins_pipe( pipe_slow );
24705 %}
24706
24707 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24708 match(Set dst (AndVMask src1 src2));
24709 match(Set dst (OrVMask src1 src2));
24710 match(Set dst (XorVMask src1 src2));
24711 effect(TEMP kscratch);
24712 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24713 ins_encode %{
24714 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24715 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24716 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24717 uint masklen = Matcher::vector_length(this);
24718 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24719 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24720 %}
24721 ins_pipe( pipe_slow );
24722 %}
24723
24724 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24725 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24726 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24727 ins_encode %{
24728 int vlen_enc = vector_length_encoding(this);
24729 BasicType bt = Matcher::vector_element_basic_type(this);
24730 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24731 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24732 %}
24733 ins_pipe( pipe_slow );
24734 %}
24735
24736 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24737 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24738 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24739 ins_encode %{
24740 int vlen_enc = vector_length_encoding(this);
24741 BasicType bt = Matcher::vector_element_basic_type(this);
24742 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24743 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24744 %}
24745 ins_pipe( pipe_slow );
24746 %}
24747
24748 instruct castMM(kReg dst)
24749 %{
24750 match(Set dst (CastVV dst));
24751
24752 size(0);
24753 format %{ "# castVV of $dst" %}
24754 ins_encode(/* empty encoding */);
24755 ins_cost(0);
24756 ins_pipe(empty);
24757 %}
24758
24759 instruct castVV(vec dst)
24760 %{
24761 match(Set dst (CastVV dst));
24762
24763 size(0);
24764 format %{ "# castVV of $dst" %}
24765 ins_encode(/* empty encoding */);
24766 ins_cost(0);
24767 ins_pipe(empty);
24768 %}
24769
24770 instruct castVVLeg(legVec dst)
24771 %{
24772 match(Set dst (CastVV dst));
24773
24774 size(0);
24775 format %{ "# castVV of $dst" %}
24776 ins_encode(/* empty encoding */);
24777 ins_cost(0);
24778 ins_pipe(empty);
24779 %}
24780
24781 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24782 %{
24783 match(Set dst (IsInfiniteF src));
24784 effect(TEMP ktmp, KILL cr);
24785 format %{ "float_class_check $dst, $src" %}
24786 ins_encode %{
24787 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24788 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24789 %}
24790 ins_pipe(pipe_slow);
24791 %}
24792
24793 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24794 %{
24795 match(Set dst (IsInfiniteD src));
24796 effect(TEMP ktmp, KILL cr);
24797 format %{ "double_class_check $dst, $src" %}
24798 ins_encode %{
24799 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24800 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24801 %}
24802 ins_pipe(pipe_slow);
24803 %}
24804
24805 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24806 %{
24807 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24808 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24809 match(Set dst (SaturatingAddV src1 src2));
24810 match(Set dst (SaturatingSubV src1 src2));
24811 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24812 ins_encode %{
24813 int vlen_enc = vector_length_encoding(this);
24814 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24815 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24816 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24817 %}
24818 ins_pipe(pipe_slow);
24819 %}
24820
24821 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24822 %{
24823 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24824 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24825 match(Set dst (SaturatingAddV src1 src2));
24826 match(Set dst (SaturatingSubV src1 src2));
24827 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24828 ins_encode %{
24829 int vlen_enc = vector_length_encoding(this);
24830 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24831 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24832 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24833 %}
24834 ins_pipe(pipe_slow);
24835 %}
24836
24837 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24838 %{
24839 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24840 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24841 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24842 match(Set dst (SaturatingAddV src1 src2));
24843 match(Set dst (SaturatingSubV src1 src2));
24844 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24845 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24846 ins_encode %{
24847 int vlen_enc = vector_length_encoding(this);
24848 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24849 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24850 $src1$$XMMRegister, $src2$$XMMRegister,
24851 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24852 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24853 %}
24854 ins_pipe(pipe_slow);
24855 %}
24856
24857 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24858 %{
24859 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24860 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24861 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24862 match(Set dst (SaturatingAddV src1 src2));
24863 match(Set dst (SaturatingSubV src1 src2));
24864 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24865 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24866 ins_encode %{
24867 int vlen_enc = vector_length_encoding(this);
24868 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24869 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24870 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24871 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24872 %}
24873 ins_pipe(pipe_slow);
24874 %}
24875
24876 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24877 %{
24878 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24879 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24880 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24881 match(Set dst (SaturatingAddV src1 src2));
24882 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24883 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24884 ins_encode %{
24885 int vlen_enc = vector_length_encoding(this);
24886 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24887 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24888 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24889 %}
24890 ins_pipe(pipe_slow);
24891 %}
24892
24893 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24894 %{
24895 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24896 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24897 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24898 match(Set dst (SaturatingAddV src1 src2));
24899 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24900 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24901 ins_encode %{
24902 int vlen_enc = vector_length_encoding(this);
24903 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24904 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24905 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24906 %}
24907 ins_pipe(pipe_slow);
24908 %}
24909
24910 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24911 %{
24912 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24913 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24914 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24915 match(Set dst (SaturatingSubV src1 src2));
24916 effect(TEMP ktmp);
24917 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24918 ins_encode %{
24919 int vlen_enc = vector_length_encoding(this);
24920 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24921 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24922 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24923 %}
24924 ins_pipe(pipe_slow);
24925 %}
24926
24927 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24928 %{
24929 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24930 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24931 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24932 match(Set dst (SaturatingSubV src1 src2));
24933 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24934 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24935 ins_encode %{
24936 int vlen_enc = vector_length_encoding(this);
24937 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24938 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24939 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24940 %}
24941 ins_pipe(pipe_slow);
24942 %}
24943
24944 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24945 %{
24946 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24947 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24948 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24949 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24950 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24951 ins_encode %{
24952 int vlen_enc = vector_length_encoding(this);
24953 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24954 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24955 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24956 %}
24957 ins_pipe(pipe_slow);
24958 %}
24959
24960 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24961 %{
24962 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24963 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24964 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24965 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24966 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24967 ins_encode %{
24968 int vlen_enc = vector_length_encoding(this);
24969 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24970 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24971 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24972 %}
24973 ins_pipe(pipe_slow);
24974 %}
24975
24976 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24977 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24978 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24979 match(Set dst (SaturatingAddV (Binary dst src) mask));
24980 match(Set dst (SaturatingSubV (Binary dst src) mask));
24981 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24982 ins_encode %{
24983 int vlen_enc = vector_length_encoding(this);
24984 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24985 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24986 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24987 %}
24988 ins_pipe( pipe_slow );
24989 %}
24990
24991 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24992 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24993 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24994 match(Set dst (SaturatingAddV (Binary dst src) mask));
24995 match(Set dst (SaturatingSubV (Binary dst src) mask));
24996 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24997 ins_encode %{
24998 int vlen_enc = vector_length_encoding(this);
24999 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25000 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25001 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25002 %}
25003 ins_pipe( pipe_slow );
25004 %}
25005
25006 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25007 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25008 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25009 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25010 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25011 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25012 ins_encode %{
25013 int vlen_enc = vector_length_encoding(this);
25014 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25015 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25016 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25017 %}
25018 ins_pipe( pipe_slow );
25019 %}
25020
25021 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25022 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25023 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25024 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25025 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25026 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25027 ins_encode %{
25028 int vlen_enc = vector_length_encoding(this);
25029 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25030 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25031 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25032 %}
25033 ins_pipe( pipe_slow );
25034 %}
25035
25036 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25037 %{
25038 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25039 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25040 ins_encode %{
25041 int vlen_enc = vector_length_encoding(this);
25042 BasicType bt = Matcher::vector_element_basic_type(this);
25043 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25044 %}
25045 ins_pipe(pipe_slow);
25046 %}
25047
25048 instruct reinterpretS2HF(regF dst, rRegI src)
25049 %{
25050 match(Set dst (ReinterpretS2HF src));
25051 format %{ "vmovw $dst, $src" %}
25052 ins_encode %{
25053 __ vmovw($dst$$XMMRegister, $src$$Register);
25054 %}
25055 ins_pipe(pipe_slow);
25056 %}
25057
25058 instruct reinterpretHF2S(rRegI dst, regF src)
25059 %{
25060 match(Set dst (ReinterpretHF2S src));
25061 format %{ "vmovw $dst, $src" %}
25062 ins_encode %{
25063 __ vmovw($dst$$Register, $src$$XMMRegister);
25064 %}
25065 ins_pipe(pipe_slow);
25066 %}
25067
25068 instruct convF2HFAndS2HF(regF dst, regF src)
25069 %{
25070 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25071 format %{ "convF2HFAndS2HF $dst, $src" %}
25072 ins_encode %{
25073 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25074 %}
25075 ins_pipe(pipe_slow);
25076 %}
25077
25078 instruct convHF2SAndHF2F(regF dst, regF src)
25079 %{
25080 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25081 format %{ "convHF2SAndHF2F $dst, $src" %}
25082 ins_encode %{
25083 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25084 %}
25085 ins_pipe(pipe_slow);
25086 %}
25087
25088 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25089 %{
25090 match(Set dst (SqrtHF src));
25091 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25092 ins_encode %{
25093 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25094 %}
25095 ins_pipe(pipe_slow);
25096 %}
25097
25098 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25099 %{
25100 match(Set dst (AddHF src1 src2));
25101 match(Set dst (DivHF src1 src2));
25102 match(Set dst (MulHF src1 src2));
25103 match(Set dst (SubHF src1 src2));
25104 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25105 ins_encode %{
25106 int opcode = this->ideal_Opcode();
25107 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25108 %}
25109 ins_pipe(pipe_slow);
25110 %}
25111
25112 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25113 %{
25114 predicate(VM_Version::supports_avx10_2());
25115 match(Set dst (MaxHF src1 src2));
25116 match(Set dst (MinHF src1 src2));
25117 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25118 ins_encode %{
25119 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25120 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25121 %}
25122 ins_pipe( pipe_slow );
25123 %}
25124
25125 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25126 %{
25127 predicate(!VM_Version::supports_avx10_2());
25128 match(Set dst (MaxHF src1 src2));
25129 match(Set dst (MinHF src1 src2));
25130 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25131 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25132 ins_encode %{
25133 int opcode = this->ideal_Opcode();
25134 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25135 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25136 %}
25137 ins_pipe( pipe_slow );
25138 %}
25139
25140 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25141 %{
25142 match(Set dst (FmaHF src2 (Binary dst src1)));
25143 effect(DEF dst);
25144 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25145 ins_encode %{
25146 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25147 %}
25148 ins_pipe( pipe_slow );
25149 %}
25150
25151
25152 instruct vector_sqrt_HF_reg(vec dst, vec src)
25153 %{
25154 match(Set dst (SqrtVHF src));
25155 format %{ "vector_sqrt_fp16 $dst, $src" %}
25156 ins_encode %{
25157 int vlen_enc = vector_length_encoding(this);
25158 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25159 %}
25160 ins_pipe(pipe_slow);
25161 %}
25162
25163 instruct vector_sqrt_HF_mem(vec dst, memory src)
25164 %{
25165 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25166 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25167 ins_encode %{
25168 int vlen_enc = vector_length_encoding(this);
25169 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25170 %}
25171 ins_pipe(pipe_slow);
25172 %}
25173
25174 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25175 %{
25176 match(Set dst (AddVHF src1 src2));
25177 match(Set dst (DivVHF src1 src2));
25178 match(Set dst (MulVHF src1 src2));
25179 match(Set dst (SubVHF src1 src2));
25180 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25181 ins_encode %{
25182 int vlen_enc = vector_length_encoding(this);
25183 int opcode = this->ideal_Opcode();
25184 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25185 %}
25186 ins_pipe(pipe_slow);
25187 %}
25188
25189
25190 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25191 %{
25192 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25193 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25194 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25195 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25196 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25197 ins_encode %{
25198 int vlen_enc = vector_length_encoding(this);
25199 int opcode = this->ideal_Opcode();
25200 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25201 %}
25202 ins_pipe(pipe_slow);
25203 %}
25204
25205 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25206 %{
25207 match(Set dst (FmaVHF src2 (Binary dst src1)));
25208 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25209 ins_encode %{
25210 int vlen_enc = vector_length_encoding(this);
25211 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25212 %}
25213 ins_pipe( pipe_slow );
25214 %}
25215
25216 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25217 %{
25218 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25219 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25220 ins_encode %{
25221 int vlen_enc = vector_length_encoding(this);
25222 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25223 %}
25224 ins_pipe( pipe_slow );
25225 %}
25226
25227 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25228 %{
25229 predicate(VM_Version::supports_avx10_2());
25230 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25231 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25232 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25233 ins_encode %{
25234 int vlen_enc = vector_length_encoding(this);
25235 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25236 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25237 %}
25238 ins_pipe( pipe_slow );
25239 %}
25240
25241 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25242 %{
25243 predicate(VM_Version::supports_avx10_2());
25244 match(Set dst (MinVHF src1 src2));
25245 match(Set dst (MaxVHF src1 src2));
25246 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25247 ins_encode %{
25248 int vlen_enc = vector_length_encoding(this);
25249 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25250 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25251 %}
25252 ins_pipe( pipe_slow );
25253 %}
25254
25255 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25256 %{
25257 predicate(!VM_Version::supports_avx10_2());
25258 match(Set dst (MinVHF src1 src2));
25259 match(Set dst (MaxVHF src1 src2));
25260 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25261 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25262 ins_encode %{
25263 int vlen_enc = vector_length_encoding(this);
25264 int opcode = this->ideal_Opcode();
25265 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25266 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25267 %}
25268 ins_pipe( pipe_slow );
25269 %}
25270
25271 //----------PEEPHOLE RULES-----------------------------------------------------
25272 // These must follow all instruction definitions as they use the names
25273 // defined in the instructions definitions.
25274 //
25275 // peeppredicate ( rule_predicate );
25276 // // the predicate unless which the peephole rule will be ignored
25277 //
25278 // peepmatch ( root_instr_name [preceding_instruction]* );
25279 //
25280 // peepprocedure ( procedure_name );
25281 // // provide a procedure name to perform the optimization, the procedure should
25282 // // reside in the architecture dependent peephole file, the method has the
25283 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25284 // // with the arguments being the basic block, the current node index inside the
25285 // // block, the register allocator, the functions upon invoked return a new node
25286 // // defined in peepreplace, and the rules of the nodes appearing in the
25287 // // corresponding peepmatch, the function return true if successful, else
25288 // // return false
25289 //
25290 // peepconstraint %{
25291 // (instruction_number.operand_name relational_op instruction_number.operand_name
25292 // [, ...] );
25293 // // instruction numbers are zero-based using left to right order in peepmatch
25294 //
25295 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25296 // // provide an instruction_number.operand_name for each operand that appears
25297 // // in the replacement instruction's match rule
25298 //
25299 // ---------VM FLAGS---------------------------------------------------------
25300 //
25301 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25302 //
25303 // Each peephole rule is given an identifying number starting with zero and
25304 // increasing by one in the order seen by the parser. An individual peephole
25305 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25306 // on the command-line.
25307 //
25308 // ---------CURRENT LIMITATIONS----------------------------------------------
25309 //
25310 // Only transformations inside a basic block (do we need more for peephole)
25311 //
25312 // ---------EXAMPLE----------------------------------------------------------
25313 //
25314 // // pertinent parts of existing instructions in architecture description
25315 // instruct movI(rRegI dst, rRegI src)
25316 // %{
25317 // match(Set dst (CopyI src));
25318 // %}
25319 //
25320 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25321 // %{
25322 // match(Set dst (AddI dst src));
25323 // effect(KILL cr);
25324 // %}
25325 //
25326 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25327 // %{
25328 // match(Set dst (AddI dst src));
25329 // %}
25330 //
25331 // 1. Simple replacement
25332 // - Only match adjacent instructions in same basic block
25333 // - Only equality constraints
25334 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25335 // - Only one replacement instruction
25336 //
25337 // // Change (inc mov) to lea
25338 // peephole %{
25339 // // lea should only be emitted when beneficial
25340 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25341 // // increment preceded by register-register move
25342 // peepmatch ( incI_rReg movI );
25343 // // require that the destination register of the increment
25344 // // match the destination register of the move
25345 // peepconstraint ( 0.dst == 1.dst );
25346 // // construct a replacement instruction that sets
25347 // // the destination to ( move's source register + one )
25348 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25349 // %}
25350 //
25351 // 2. Procedural replacement
25352 // - More flexible finding relevent nodes
25353 // - More flexible constraints
25354 // - More flexible transformations
25355 // - May utilise architecture-dependent API more effectively
25356 // - Currently only one replacement instruction due to adlc parsing capabilities
25357 //
25358 // // Change (inc mov) to lea
25359 // peephole %{
25360 // // lea should only be emitted when beneficial
25361 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25362 // // the rule numbers of these nodes inside are passed into the function below
25363 // peepmatch ( incI_rReg movI );
25364 // // the method that takes the responsibility of transformation
25365 // peepprocedure ( inc_mov_to_lea );
25366 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25367 // // node is passed into the function above
25368 // peepreplace ( leaI_rReg_immI() );
25369 // %}
25370
25371 // These instructions is not matched by the matcher but used by the peephole
25372 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25373 %{
25374 predicate(false);
25375 match(Set dst (AddI src1 src2));
25376 format %{ "leal $dst, [$src1 + $src2]" %}
25377 ins_encode %{
25378 Register dst = $dst$$Register;
25379 Register src1 = $src1$$Register;
25380 Register src2 = $src2$$Register;
25381 if (src1 != rbp && src1 != r13) {
25382 __ leal(dst, Address(src1, src2, Address::times_1));
25383 } else {
25384 assert(src2 != rbp && src2 != r13, "");
25385 __ leal(dst, Address(src2, src1, Address::times_1));
25386 }
25387 %}
25388 ins_pipe(ialu_reg_reg);
25389 %}
25390
25391 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25392 %{
25393 predicate(false);
25394 match(Set dst (AddI src1 src2));
25395 format %{ "leal $dst, [$src1 + $src2]" %}
25396 ins_encode %{
25397 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25398 %}
25399 ins_pipe(ialu_reg_reg);
25400 %}
25401
25402 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25403 %{
25404 predicate(false);
25405 match(Set dst (LShiftI src shift));
25406 format %{ "leal $dst, [$src << $shift]" %}
25407 ins_encode %{
25408 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25409 Register src = $src$$Register;
25410 if (scale == Address::times_2 && src != rbp && src != r13) {
25411 __ leal($dst$$Register, Address(src, src, Address::times_1));
25412 } else {
25413 __ leal($dst$$Register, Address(noreg, src, scale));
25414 }
25415 %}
25416 ins_pipe(ialu_reg_reg);
25417 %}
25418
25419 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25420 %{
25421 predicate(false);
25422 match(Set dst (AddL src1 src2));
25423 format %{ "leaq $dst, [$src1 + $src2]" %}
25424 ins_encode %{
25425 Register dst = $dst$$Register;
25426 Register src1 = $src1$$Register;
25427 Register src2 = $src2$$Register;
25428 if (src1 != rbp && src1 != r13) {
25429 __ leaq(dst, Address(src1, src2, Address::times_1));
25430 } else {
25431 assert(src2 != rbp && src2 != r13, "");
25432 __ leaq(dst, Address(src2, src1, Address::times_1));
25433 }
25434 %}
25435 ins_pipe(ialu_reg_reg);
25436 %}
25437
25438 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25439 %{
25440 predicate(false);
25441 match(Set dst (AddL src1 src2));
25442 format %{ "leaq $dst, [$src1 + $src2]" %}
25443 ins_encode %{
25444 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25445 %}
25446 ins_pipe(ialu_reg_reg);
25447 %}
25448
25449 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25450 %{
25451 predicate(false);
25452 match(Set dst (LShiftL src shift));
25453 format %{ "leaq $dst, [$src << $shift]" %}
25454 ins_encode %{
25455 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25456 Register src = $src$$Register;
25457 if (scale == Address::times_2 && src != rbp && src != r13) {
25458 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25459 } else {
25460 __ leaq($dst$$Register, Address(noreg, src, scale));
25461 }
25462 %}
25463 ins_pipe(ialu_reg_reg);
25464 %}
25465
25466 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25467 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25468 // processors with at least partial ALU support for lea
25469 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25470 // beneficial for processors with full ALU support
25471 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25472
25473 peephole
25474 %{
25475 peeppredicate(VM_Version::supports_fast_2op_lea());
25476 peepmatch (addI_rReg);
25477 peepprocedure (lea_coalesce_reg);
25478 peepreplace (leaI_rReg_rReg_peep());
25479 %}
25480
25481 peephole
25482 %{
25483 peeppredicate(VM_Version::supports_fast_2op_lea());
25484 peepmatch (addI_rReg_imm);
25485 peepprocedure (lea_coalesce_imm);
25486 peepreplace (leaI_rReg_immI_peep());
25487 %}
25488
25489 peephole
25490 %{
25491 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25492 VM_Version::is_intel_cascade_lake());
25493 peepmatch (incI_rReg);
25494 peepprocedure (lea_coalesce_imm);
25495 peepreplace (leaI_rReg_immI_peep());
25496 %}
25497
25498 peephole
25499 %{
25500 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25501 VM_Version::is_intel_cascade_lake());
25502 peepmatch (decI_rReg);
25503 peepprocedure (lea_coalesce_imm);
25504 peepreplace (leaI_rReg_immI_peep());
25505 %}
25506
25507 peephole
25508 %{
25509 peeppredicate(VM_Version::supports_fast_2op_lea());
25510 peepmatch (salI_rReg_immI2);
25511 peepprocedure (lea_coalesce_imm);
25512 peepreplace (leaI_rReg_immI2_peep());
25513 %}
25514
25515 peephole
25516 %{
25517 peeppredicate(VM_Version::supports_fast_2op_lea());
25518 peepmatch (addL_rReg);
25519 peepprocedure (lea_coalesce_reg);
25520 peepreplace (leaL_rReg_rReg_peep());
25521 %}
25522
25523 peephole
25524 %{
25525 peeppredicate(VM_Version::supports_fast_2op_lea());
25526 peepmatch (addL_rReg_imm);
25527 peepprocedure (lea_coalesce_imm);
25528 peepreplace (leaL_rReg_immL32_peep());
25529 %}
25530
25531 peephole
25532 %{
25533 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25534 VM_Version::is_intel_cascade_lake());
25535 peepmatch (incL_rReg);
25536 peepprocedure (lea_coalesce_imm);
25537 peepreplace (leaL_rReg_immL32_peep());
25538 %}
25539
25540 peephole
25541 %{
25542 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25543 VM_Version::is_intel_cascade_lake());
25544 peepmatch (decL_rReg);
25545 peepprocedure (lea_coalesce_imm);
25546 peepreplace (leaL_rReg_immL32_peep());
25547 %}
25548
25549 peephole
25550 %{
25551 peeppredicate(VM_Version::supports_fast_2op_lea());
25552 peepmatch (salL_rReg_immI2);
25553 peepprocedure (lea_coalesce_imm);
25554 peepreplace (leaL_rReg_immI2_peep());
25555 %}
25556
25557 peephole
25558 %{
25559 peepmatch (leaPCompressedOopOffset);
25560 peepprocedure (lea_remove_redundant);
25561 %}
25562
25563 peephole
25564 %{
25565 peepmatch (leaP8Narrow);
25566 peepprocedure (lea_remove_redundant);
25567 %}
25568
25569 peephole
25570 %{
25571 peepmatch (leaP32Narrow);
25572 peepprocedure (lea_remove_redundant);
25573 %}
25574
25575 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25576 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25577
25578 //int variant
25579 peephole
25580 %{
25581 peepmatch (testI_reg);
25582 peepprocedure (test_may_remove);
25583 %}
25584
25585 //long variant
25586 peephole
25587 %{
25588 peepmatch (testL_reg);
25589 peepprocedure (test_may_remove);
25590 %}
25591
25592
25593 //----------SMARTSPILL RULES---------------------------------------------------
25594 // These must follow all instruction definitions as they use the names
25595 // defined in the instructions definitions.