1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je #
1746 // |-jz -> a | b # a & b
1747 // | -> a #
1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1749 XMMRegister a, XMMRegister b,
1750 XMMRegister xmmt, Register rt,
1751 bool min, enum FP_PREC pt) {
1752
1753 Label nan, zero, below, above, done;
1754
1755 emit_fp_ucom(masm, pt, a, b);
1756
1757 if (dst->encoding() != (min ? b : a)->encoding()) {
1758 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1759 } else {
1760 __ jccb(Assembler::above, done);
1761 }
1762
1763 __ jccb(Assembler::parity, nan); // PF=1
1764 __ jccb(Assembler::below, below); // CF=1
1765
1766 // equal
1767 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1768 emit_fp_ucom(masm, pt, a, xmmt);
1769
1770 __ jccb(Assembler::equal, zero);
1771 movfp(masm, pt, dst, a, rt);
1772
1773 __ jmp(done);
1774
1775 __ bind(zero);
1776 if (min) {
1777 __ vpor(dst, a, b, Assembler::AVX_128bit);
1778 } else {
1779 __ vpand(dst, a, b, Assembler::AVX_128bit);
1780 }
1781
1782 __ jmp(done);
1783
1784 __ bind(above);
1785 movfp(masm, pt, dst, min ? b : a, rt);
1786
1787 __ jmp(done);
1788
1789 __ bind(nan);
1790 if (pt == fp_prec_hlf) {
1791 __ movl(rt, 0x00007e00); // Float16.NaN
1792 __ evmovw(dst, rt);
1793 } else if (pt == fp_prec_flt) {
1794 __ movl(rt, 0x7fc00000); // Float.NaN
1795 __ movdl(dst, rt);
1796 } else {
1797 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1798 __ movdq(dst, rt);
1799 }
1800 __ jmp(done);
1801
1802 __ bind(below);
1803 movfp(masm, pt, dst, min ? a : b, rt);
1804
1805 __ bind(done);
1806 }
1807
1808 //=============================================================================
1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1810
1811 int ConstantTable::calculate_table_base_offset() const {
1812 return 0; // absolute addressing, no offset
1813 }
1814
1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1817 ShouldNotReachHere();
1818 }
1819
1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1821 // Empty encoding
1822 }
1823
1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1825 return 0;
1826 }
1827
1828 #ifndef PRODUCT
1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1830 st->print("# MachConstantBaseNode (empty encoding)");
1831 }
1832 #endif
1833
1834
1835 //=============================================================================
1836 #ifndef PRODUCT
1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1838 Compile* C = ra_->C;
1839
1840 int framesize = C->output()->frame_size_in_bytes();
1841 int bangsize = C->output()->bang_size_in_bytes();
1842 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1843 // Remove wordSize for return addr which is already pushed.
1844 framesize -= wordSize;
1845
1846 if (C->output()->need_stack_bang(bangsize)) {
1847 framesize -= wordSize;
1848 st->print("# stack bang (%d bytes)", bangsize);
1849 st->print("\n\t");
1850 st->print("pushq rbp\t# Save rbp");
1851 if (PreserveFramePointer) {
1852 st->print("\n\t");
1853 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1854 }
1855 if (framesize) {
1856 st->print("\n\t");
1857 st->print("subq rsp, #%d\t# Create frame",framesize);
1858 }
1859 } else {
1860 st->print("subq rsp, #%d\t# Create frame",framesize);
1861 st->print("\n\t");
1862 framesize -= wordSize;
1863 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1864 if (PreserveFramePointer) {
1865 st->print("\n\t");
1866 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1867 if (framesize > 0) {
1868 st->print("\n\t");
1869 st->print("addq rbp, #%d", framesize);
1870 }
1871 }
1872 }
1873
1874 if (VerifyStackAtCalls) {
1875 st->print("\n\t");
1876 framesize -= wordSize;
1877 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1878 #ifdef ASSERT
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1954 framesize -= 2*wordSize;
1955
1956 if (framesize) {
1957 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1958 st->print("\t");
1959 }
1960
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2038 static enum RC rc_class(OptoReg::Name reg)
2039 {
2040 if( !OptoReg::is_valid(reg) ) return rc_bad;
2041
2042 if (OptoReg::is_stack(reg)) return rc_stack;
2043
2044 VMReg r = OptoReg::as_VMReg(reg);
2045
2046 if (r->is_Register()) return rc_int;
2047
2048 if (r->is_KRegister()) return rc_kreg;
2049
2050 assert(r->is_XMMRegister(), "must be");
2051 return rc_float;
2052 }
2053
2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2056 int src_hi, int dst_hi, uint ireg, outputStream* st);
2057
2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2059 int stack_offset, int reg, uint ireg, outputStream* st);
2060
2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2062 int dst_offset, uint ireg, outputStream* st) {
2063 if (masm) {
2064 switch (ireg) {
2065 case Op_VecS:
2066 __ movq(Address(rsp, -8), rax);
2067 __ movl(rax, Address(rsp, src_offset));
2068 __ movl(Address(rsp, dst_offset), rax);
2069 __ movq(rax, Address(rsp, -8));
2070 break;
2071 case Op_VecD:
2072 __ pushq(Address(rsp, src_offset));
2073 __ popq (Address(rsp, dst_offset));
2074 break;
2075 case Op_VecX:
2076 __ pushq(Address(rsp, src_offset));
2077 __ popq (Address(rsp, dst_offset));
2078 __ pushq(Address(rsp, src_offset+8));
2079 __ popq (Address(rsp, dst_offset+8));
2080 break;
2081 case Op_VecY:
2082 __ vmovdqu(Address(rsp, -32), xmm0);
2083 __ vmovdqu(xmm0, Address(rsp, src_offset));
2084 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2085 __ vmovdqu(xmm0, Address(rsp, -32));
2086 break;
2087 case Op_VecZ:
2088 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2089 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2090 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2091 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2092 break;
2093 default:
2094 ShouldNotReachHere();
2095 }
2096 #ifndef PRODUCT
2097 } else {
2098 switch (ireg) {
2099 case Op_VecS:
2100 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2101 "movl rax, [rsp + #%d]\n\t"
2102 "movl [rsp + #%d], rax\n\t"
2103 "movq rax, [rsp - #8]",
2104 src_offset, dst_offset);
2105 break;
2106 case Op_VecD:
2107 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2108 "popq [rsp + #%d]",
2109 src_offset, dst_offset);
2110 break;
2111 case Op_VecX:
2112 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2113 "popq [rsp + #%d]\n\t"
2114 "pushq [rsp + #%d]\n\t"
2115 "popq [rsp + #%d]",
2116 src_offset, dst_offset, src_offset+8, dst_offset+8);
2117 break;
2118 case Op_VecY:
2119 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2120 "vmovdqu xmm0, [rsp + #%d]\n\t"
2121 "vmovdqu [rsp + #%d], xmm0\n\t"
2122 "vmovdqu xmm0, [rsp - #32]",
2123 src_offset, dst_offset);
2124 break;
2125 case Op_VecZ:
2126 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2127 "vmovdqu xmm0, [rsp + #%d]\n\t"
2128 "vmovdqu [rsp + #%d], xmm0\n\t"
2129 "vmovdqu xmm0, [rsp - #64]",
2130 src_offset, dst_offset);
2131 break;
2132 default:
2133 ShouldNotReachHere();
2134 }
2135 #endif
2136 }
2137 }
2138
2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2140 PhaseRegAlloc* ra_,
2141 bool do_size,
2142 outputStream* st) const {
2143 assert(masm != nullptr || st != nullptr, "sanity");
2144 // Get registers to move
2145 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2146 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2147 OptoReg::Name dst_second = ra_->get_reg_second(this);
2148 OptoReg::Name dst_first = ra_->get_reg_first(this);
2149
2150 enum RC src_second_rc = rc_class(src_second);
2151 enum RC src_first_rc = rc_class(src_first);
2152 enum RC dst_second_rc = rc_class(dst_second);
2153 enum RC dst_first_rc = rc_class(dst_first);
2154
2155 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2156 "must move at least 1 register" );
2157
2158 if (src_first == dst_first && src_second == dst_second) {
2159 // Self copy, no move
2160 return 0;
2161 }
2162 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2163 uint ireg = ideal_reg();
2164 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2165 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2166 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2167 // mem -> mem
2168 int src_offset = ra_->reg2offset(src_first);
2169 int dst_offset = ra_->reg2offset(dst_first);
2170 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2171 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2172 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2173 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2174 int stack_offset = ra_->reg2offset(dst_first);
2175 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2176 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2177 int stack_offset = ra_->reg2offset(src_first);
2178 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2179 } else {
2180 ShouldNotReachHere();
2181 }
2182 return 0;
2183 }
2184 if (src_first_rc == rc_stack) {
2185 // mem ->
2186 if (dst_first_rc == rc_stack) {
2187 // mem -> mem
2188 assert(src_second != dst_first, "overlap");
2189 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2190 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2191 // 64-bit
2192 int src_offset = ra_->reg2offset(src_first);
2193 int dst_offset = ra_->reg2offset(dst_first);
2194 if (masm) {
2195 __ pushq(Address(rsp, src_offset));
2196 __ popq (Address(rsp, dst_offset));
2197 #ifndef PRODUCT
2198 } else {
2199 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2200 "popq [rsp + #%d]",
2201 src_offset, dst_offset);
2202 #endif
2203 }
2204 } else {
2205 // 32-bit
2206 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2207 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2208 // No pushl/popl, so:
2209 int src_offset = ra_->reg2offset(src_first);
2210 int dst_offset = ra_->reg2offset(dst_first);
2211 if (masm) {
2212 __ movq(Address(rsp, -8), rax);
2213 __ movl(rax, Address(rsp, src_offset));
2214 __ movl(Address(rsp, dst_offset), rax);
2215 __ movq(rax, Address(rsp, -8));
2216 #ifndef PRODUCT
2217 } else {
2218 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2219 "movl rax, [rsp + #%d]\n\t"
2220 "movl [rsp + #%d], rax\n\t"
2221 "movq rax, [rsp - #8]",
2222 src_offset, dst_offset);
2223 #endif
2224 }
2225 }
2226 return 0;
2227 } else if (dst_first_rc == rc_int) {
2228 // mem -> gpr
2229 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2230 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2231 // 64-bit
2232 int offset = ra_->reg2offset(src_first);
2233 if (masm) {
2234 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2235 #ifndef PRODUCT
2236 } else {
2237 st->print("movq %s, [rsp + #%d]\t# spill",
2238 Matcher::regName[dst_first],
2239 offset);
2240 #endif
2241 }
2242 } else {
2243 // 32-bit
2244 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2245 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2246 int offset = ra_->reg2offset(src_first);
2247 if (masm) {
2248 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2249 #ifndef PRODUCT
2250 } else {
2251 st->print("movl %s, [rsp + #%d]\t# spill",
2252 Matcher::regName[dst_first],
2253 offset);
2254 #endif
2255 }
2256 }
2257 return 0;
2258 } else if (dst_first_rc == rc_float) {
2259 // mem-> xmm
2260 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2261 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2262 // 64-bit
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("%s %s, [rsp + #%d]\t# spill",
2269 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2270 Matcher::regName[dst_first],
2271 offset);
2272 #endif
2273 }
2274 } else {
2275 // 32-bit
2276 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2277 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2278 int offset = ra_->reg2offset(src_first);
2279 if (masm) {
2280 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2281 #ifndef PRODUCT
2282 } else {
2283 st->print("movss %s, [rsp + #%d]\t# spill",
2284 Matcher::regName[dst_first],
2285 offset);
2286 #endif
2287 }
2288 }
2289 return 0;
2290 } else if (dst_first_rc == rc_kreg) {
2291 // mem -> kreg
2292 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2293 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2294 // 64-bit
2295 int offset = ra_->reg2offset(src_first);
2296 if (masm) {
2297 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2298 #ifndef PRODUCT
2299 } else {
2300 st->print("kmovq %s, [rsp + #%d]\t# spill",
2301 Matcher::regName[dst_first],
2302 offset);
2303 #endif
2304 }
2305 }
2306 return 0;
2307 }
2308 } else if (src_first_rc == rc_int) {
2309 // gpr ->
2310 if (dst_first_rc == rc_stack) {
2311 // gpr -> mem
2312 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2313 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2314 // 64-bit
2315 int offset = ra_->reg2offset(dst_first);
2316 if (masm) {
2317 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2318 #ifndef PRODUCT
2319 } else {
2320 st->print("movq [rsp + #%d], %s\t# spill",
2321 offset,
2322 Matcher::regName[src_first]);
2323 #endif
2324 }
2325 } else {
2326 // 32-bit
2327 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2328 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2329 int offset = ra_->reg2offset(dst_first);
2330 if (masm) {
2331 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2332 #ifndef PRODUCT
2333 } else {
2334 st->print("movl [rsp + #%d], %s\t# spill",
2335 offset,
2336 Matcher::regName[src_first]);
2337 #endif
2338 }
2339 }
2340 return 0;
2341 } else if (dst_first_rc == rc_int) {
2342 // gpr -> gpr
2343 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2344 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2345 // 64-bit
2346 if (masm) {
2347 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movq %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 } else {
2358 // 32-bit
2359 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2360 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2361 if (masm) {
2362 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2363 as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movl %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 return 0;
2372 }
2373 } else if (dst_first_rc == rc_float) {
2374 // gpr -> xmm
2375 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2376 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2377 // 64-bit
2378 if (masm) {
2379 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2380 #ifndef PRODUCT
2381 } else {
2382 st->print("movdq %s, %s\t# spill",
2383 Matcher::regName[dst_first],
2384 Matcher::regName[src_first]);
2385 #endif
2386 }
2387 } else {
2388 // 32-bit
2389 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2390 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2391 if (masm) {
2392 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("movdl %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 return 0;
2402 } else if (dst_first_rc == rc_kreg) {
2403 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2404 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2405 // 64-bit
2406 if (masm) {
2407 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2408 #ifndef PRODUCT
2409 } else {
2410 st->print("kmovq %s, %s\t# spill",
2411 Matcher::regName[dst_first],
2412 Matcher::regName[src_first]);
2413 #endif
2414 }
2415 }
2416 Unimplemented();
2417 return 0;
2418 }
2419 } else if (src_first_rc == rc_float) {
2420 // xmm ->
2421 if (dst_first_rc == rc_stack) {
2422 // xmm -> mem
2423 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2424 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2425 // 64-bit
2426 int offset = ra_->reg2offset(dst_first);
2427 if (masm) {
2428 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2429 #ifndef PRODUCT
2430 } else {
2431 st->print("movsd [rsp + #%d], %s\t# spill",
2432 offset,
2433 Matcher::regName[src_first]);
2434 #endif
2435 }
2436 } else {
2437 // 32-bit
2438 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2439 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2440 int offset = ra_->reg2offset(dst_first);
2441 if (masm) {
2442 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movss [rsp + #%d], %s\t# spill",
2446 offset,
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 }
2451 return 0;
2452 } else if (dst_first_rc == rc_int) {
2453 // xmm -> gpr
2454 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2455 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2456 // 64-bit
2457 if (masm) {
2458 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2459 #ifndef PRODUCT
2460 } else {
2461 st->print("movdq %s, %s\t# spill",
2462 Matcher::regName[dst_first],
2463 Matcher::regName[src_first]);
2464 #endif
2465 }
2466 } else {
2467 // 32-bit
2468 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2469 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2470 if (masm) {
2471 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("movdl %s, %s\t# spill",
2475 Matcher::regName[dst_first],
2476 Matcher::regName[src_first]);
2477 #endif
2478 }
2479 }
2480 return 0;
2481 } else if (dst_first_rc == rc_float) {
2482 // xmm -> xmm
2483 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2484 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2485 // 64-bit
2486 if (masm) {
2487 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2488 #ifndef PRODUCT
2489 } else {
2490 st->print("%s %s, %s\t# spill",
2491 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2492 Matcher::regName[dst_first],
2493 Matcher::regName[src_first]);
2494 #endif
2495 }
2496 } else {
2497 // 32-bit
2498 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2499 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2500 if (masm) {
2501 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2502 #ifndef PRODUCT
2503 } else {
2504 st->print("%s %s, %s\t# spill",
2505 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2506 Matcher::regName[dst_first],
2507 Matcher::regName[src_first]);
2508 #endif
2509 }
2510 }
2511 return 0;
2512 } else if (dst_first_rc == rc_kreg) {
2513 assert(false, "Illegal spilling");
2514 return 0;
2515 }
2516 } else if (src_first_rc == rc_kreg) {
2517 if (dst_first_rc == rc_stack) {
2518 // mem -> kreg
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 int offset = ra_->reg2offset(dst_first);
2523 if (masm) {
2524 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq [rsp + #%d] , %s\t# spill",
2528 offset,
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 return 0;
2534 } else if (dst_first_rc == rc_int) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 Unimplemented();
2549 return 0;
2550 } else if (dst_first_rc == rc_kreg) {
2551 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2552 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2553 // 64-bit
2554 if (masm) {
2555 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2556 #ifndef PRODUCT
2557 } else {
2558 st->print("kmovq %s, %s\t# spill",
2559 Matcher::regName[dst_first],
2560 Matcher::regName[src_first]);
2561 #endif
2562 }
2563 }
2564 return 0;
2565 } else if (dst_first_rc == rc_float) {
2566 assert(false, "Illegal spill");
2567 return 0;
2568 }
2569 }
2570
2571 assert(0," foo ");
2572 Unimplemented();
2573 return 0;
2574 }
2575
2576 #ifndef PRODUCT
2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2578 implementation(nullptr, ra_, false, st);
2579 }
2580 #endif
2581
2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2583 implementation(masm, ra_, false, nullptr);
2584 }
2585
2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2587 return MachNode::size(ra_);
2588 }
2589
2590 //=============================================================================
2591 #ifndef PRODUCT
2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_reg_first(this);
2596 st->print("leaq %s, [rsp + #%d]\t# box lock",
2597 Matcher::regName[reg], offset);
2598 }
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
2660
2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2662 int oper_index) {
2663 if (mdef == nullptr) {
2664 return false;
2665 }
2666
2667 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2668 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2669 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2670 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2671 return false;
2672 }
2673
2674 // Complex memory operand covers multiple incoming edges needed for
2675 // address computation. Biasing def towards any address component will not
2676 // result in NDD demotion by assembler.
2677 if (mdef->operand_num_edges(oper_index) != 1) {
2678 return false;
2679 }
2680
2681 // Demotion candidate must be register mask compatible with definition.
2682 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2683 if (!oper_mask.overlap(mdef->out_RegMask())) {
2684 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2685 return false;
2686 }
2687
2688 switch (oper_index) {
2689 // First operand of MachNode corresponding to Intel APX NDD selection
2690 // pattern can share its assigned register with definition operand if
2691 // their live ranges do not overlap. In such a scenario we can demote
2692 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2693 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2694 // are decorated with a special flag by instruction selector.
2695 case 1:
2696 return is_ndd_demotable_opr1(mdef);
2697
2698 // Definition operand of commutative operation can be biased towards second
2699 // operand.
2700 case 2:
2701 return is_ndd_demotable_opr2(mdef);
2702
2703 // Current scheme only selects up to two biasing candidates
2704 default:
2705 assert(false, "unhandled operand index: %s", mdef->Name());
2706 break;
2707 }
2708
2709 return false;
2710 }
2711
2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2713 assert(EnableVectorSupport, "sanity");
2714 int lo = XMM0_num;
2715 int hi = XMM0b_num;
2716 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2717 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2718 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2719 return OptoRegPair(hi, lo);
2720 }
2721
2722 // Is this branch offset short enough that a short branch can be used?
2723 //
2724 // NOTE: If the platform does not provide any short branch variants, then
2725 // this method should return false for offset 0.
2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2727 // The passed offset is relative to address of the branch.
2728 // On 86 a branch displacement is calculated relative to address
2729 // of a next instruction.
2730 offset -= br_size;
2731
2732 // the short version of jmpConUCF2 contains multiple branches,
2733 // making the reach slightly less
2734 if (rule == jmpConUCF2_rule)
2735 return (-126 <= offset && offset <= 125);
2736 return (-128 <= offset && offset <= 127);
2737 }
2738
2739 #ifdef ASSERT
2740 // Return whether or not this register is ever used as an argument.
2741 bool Matcher::can_be_java_arg(int reg)
2742 {
2743 return
2744 reg == RDI_num || reg == RDI_H_num ||
2745 reg == RSI_num || reg == RSI_H_num ||
2746 reg == RDX_num || reg == RDX_H_num ||
2747 reg == RCX_num || reg == RCX_H_num ||
2748 reg == R8_num || reg == R8_H_num ||
2749 reg == R9_num || reg == R9_H_num ||
2750 reg == R12_num || reg == R12_H_num ||
2751 reg == XMM0_num || reg == XMM0b_num ||
2752 reg == XMM1_num || reg == XMM1b_num ||
2753 reg == XMM2_num || reg == XMM2b_num ||
2754 reg == XMM3_num || reg == XMM3b_num ||
2755 reg == XMM4_num || reg == XMM4b_num ||
2756 reg == XMM5_num || reg == XMM5b_num ||
2757 reg == XMM6_num || reg == XMM6b_num ||
2758 reg == XMM7_num || reg == XMM7b_num;
2759 }
2760 #endif
2761
2762 uint Matcher::int_pressure_limit()
2763 {
2764 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2765 }
2766
2767 uint Matcher::float_pressure_limit()
2768 {
2769 // After experiment around with different values, the following default threshold
2770 // works best for LCM's register pressure scheduling on x64.
2771 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2772 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2773 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2774 }
2775
2776 // Register for DIVI projection of divmodI
2777 const RegMask& Matcher::divI_proj_mask() {
2778 return INT_RAX_REG_mask();
2779 }
2780
2781 // Register for MODI projection of divmodI
2782 const RegMask& Matcher::modI_proj_mask() {
2783 return INT_RDX_REG_mask();
2784 }
2785
2786 // Register for DIVL projection of divmodL
2787 const RegMask& Matcher::divL_proj_mask() {
2788 return LONG_RAX_REG_mask();
2789 }
2790
2791 // Register for MODL projection of divmodL
2792 const RegMask& Matcher::modL_proj_mask() {
2793 return LONG_RDX_REG_mask();
2794 }
2795
2796 %}
2797
2798 source_hpp %{
2799 // Header information of the source block.
2800 // Method declarations/definitions which are used outside
2801 // the ad-scope can conveniently be defined here.
2802 //
2803 // To keep related declarations/definitions/uses close together,
2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2805
2806 #include "runtime/vm_version.hpp"
2807
2808 class NativeJump;
2809
2810 class CallStubImpl {
2811
2812 //--------------------------------------------------------------
2813 //---< Used for optimization in Compile::shorten_branches >---
2814 //--------------------------------------------------------------
2815
2816 public:
2817 // Size of call trampoline stub.
2818 static uint size_call_trampoline() {
2819 return 0; // no call trampolines on this platform
2820 }
2821
2822 // number of relocations needed by a call trampoline stub
2823 static uint reloc_call_trampoline() {
2824 return 0; // no call trampolines on this platform
2825 }
2826 };
2827
2828 class HandlerImpl {
2829
2830 public:
2831
2832 static int emit_deopt_handler(C2_MacroAssembler* masm);
2833
2834 static uint size_deopt_handler() {
2835 // one call and one jmp.
2836 return 7;
2837 }
2838 };
2839
2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2841 switch(bytes) {
2842 case 4: // fall-through
2843 case 8: // fall-through
2844 case 16: return Assembler::AVX_128bit;
2845 case 32: return Assembler::AVX_256bit;
2846 case 64: return Assembler::AVX_512bit;
2847
2848 default: {
2849 ShouldNotReachHere();
2850 return Assembler::AVX_NoVec;
2851 }
2852 }
2853 }
2854
2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2856 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2857 }
2858
2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2860 uint def_idx = use->operand_index(opnd);
2861 Node* def = use->in(def_idx);
2862 return vector_length_encoding(def);
2863 }
2864
2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
2866 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2867 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2868 }
2869
2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2871 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2872 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2873 }
2874
2875 class Node::PD {
2876 public:
2877 enum NodeFlags : uint64_t {
2878 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2879 Flag_sets_carry_flag = Node::_last_flag << 2,
2880 Flag_sets_parity_flag = Node::_last_flag << 3,
2881 Flag_sets_zero_flag = Node::_last_flag << 4,
2882 Flag_sets_overflow_flag = Node::_last_flag << 5,
2883 Flag_sets_sign_flag = Node::_last_flag << 6,
2884 Flag_clears_carry_flag = Node::_last_flag << 7,
2885 Flag_clears_parity_flag = Node::_last_flag << 8,
2886 Flag_clears_zero_flag = Node::_last_flag << 9,
2887 Flag_clears_overflow_flag = Node::_last_flag << 10,
2888 Flag_clears_sign_flag = Node::_last_flag << 11,
2889 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2890 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2891 _last_flag = Flag_ndd_demotable_opr2
2892 };
2893 };
2894
2895 %} // end source_hpp
2896
2897 source %{
2898
2899 #include "opto/addnode.hpp"
2900 #include "c2_intelJccErratum_x86.hpp"
2901
2902 void PhaseOutput::pd_perform_mach_node_analysis() {
2903 if (VM_Version::has_intel_jcc_erratum()) {
2904 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2905 _buf_sizes._code += extra_padding;
2906 }
2907 }
2908
2909 int MachNode::pd_alignment_required() const {
2910 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2911 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2912 return IntelJccErratum::largest_jcc_size() + 1;
2913 } else {
2914 return 1;
2915 }
2916 }
2917
2918 int MachNode::compute_padding(int current_offset) const {
2919 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2920 Compile* C = Compile::current();
2921 PhaseOutput* output = C->output();
2922 Block* block = output->block();
2923 int index = output->index();
2924 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2925 } else {
2926 return 0;
2927 }
2928 }
2929
2930 // Emit deopt handler code.
2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2932
2933 // Note that the code buffer's insts_mark is always relative to insts.
2934 // That's why we must use the macroassembler to generate a handler.
2935 address base = __ start_a_stub(size_deopt_handler());
2936 if (base == nullptr) {
2937 ciEnv::current()->record_failure("CodeCache is full");
2938 return 0; // CodeBuffer::expand failed
2939 }
2940 int offset = __ offset();
2941
2942 Label start;
2943 __ bind(start);
2944
2945 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2946
2947 int entry_offset = __ offset();
2948
2949 __ jmp(start);
2950
2951 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2952 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2953 "out of bounds read in post-call NOP check");
2954 __ end_a_stub();
2955 return entry_offset;
2956 }
2957
2958 static Assembler::Width widthForType(BasicType bt) {
2959 if (bt == T_BYTE) {
2960 return Assembler::B;
2961 } else if (bt == T_SHORT) {
2962 return Assembler::W;
2963 } else if (bt == T_INT) {
2964 return Assembler::D;
2965 } else {
2966 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2967 return Assembler::Q;
2968 }
2969 }
2970
2971 //=============================================================================
2972
2973 // Float masks come from different places depending on platform.
2974 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2975 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2976 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2977 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2978 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2979 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2980 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2981 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2982 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2983 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2984 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2985 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2986 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2987 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2988 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2989 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2990 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2991 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2992 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2993
2994 //=============================================================================
2995 bool Matcher::match_rule_supported(int opcode) {
2996 if (!has_match_rule(opcode)) {
2997 return false; // no match rule present
2998 }
2999 switch (opcode) {
3000 case Op_AbsVL:
3001 case Op_StoreVectorScatter:
3002 if (UseAVX < 3) {
3003 return false;
3004 }
3005 break;
3006 case Op_PopCountI:
3007 case Op_PopCountL:
3008 if (!UsePopCountInstruction) {
3009 return false;
3010 }
3011 break;
3012 case Op_PopCountVI:
3013 if (UseAVX < 2) {
3014 return false;
3015 }
3016 break;
3017 case Op_CompressV:
3018 case Op_ExpandV:
3019 case Op_PopCountVL:
3020 if (UseAVX < 2) {
3021 return false;
3022 }
3023 break;
3024 case Op_MulVI:
3025 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3026 return false;
3027 }
3028 break;
3029 case Op_MulVL:
3030 if (UseSSE < 4) { // only with SSE4_1 or AVX
3031 return false;
3032 }
3033 break;
3034 case Op_MulReductionVL:
3035 if (VM_Version::supports_avx512dq() == false) {
3036 return false;
3037 }
3038 break;
3039 case Op_AbsVB:
3040 case Op_AbsVS:
3041 case Op_AbsVI:
3042 case Op_AddReductionVI:
3043 case Op_AndReductionV:
3044 case Op_OrReductionV:
3045 case Op_XorReductionV:
3046 if (UseSSE < 3) { // requires at least SSSE3
3047 return false;
3048 }
3049 break;
3050 case Op_MaxHF:
3051 case Op_MinHF:
3052 if (!VM_Version::supports_avx512vlbw()) {
3053 return false;
3054 } // fallthrough
3055 case Op_AddHF:
3056 case Op_DivHF:
3057 case Op_FmaHF:
3058 case Op_MulHF:
3059 case Op_ReinterpretS2HF:
3060 case Op_ReinterpretHF2S:
3061 case Op_SubHF:
3062 case Op_SqrtHF:
3063 if (!VM_Version::supports_avx512_fp16()) {
3064 return false;
3065 }
3066 break;
3067 case Op_VectorLoadShuffle:
3068 case Op_VectorRearrange:
3069 case Op_MulReductionVI:
3070 if (UseSSE < 4) { // requires at least SSE4
3071 return false;
3072 }
3073 break;
3074 case Op_IsInfiniteF:
3075 case Op_IsInfiniteD:
3076 if (!VM_Version::supports_avx512dq()) {
3077 return false;
3078 }
3079 break;
3080 case Op_SqrtVD:
3081 case Op_SqrtVF:
3082 case Op_VectorMaskCmp:
3083 case Op_VectorCastB2X:
3084 case Op_VectorCastS2X:
3085 case Op_VectorCastI2X:
3086 case Op_VectorCastL2X:
3087 case Op_VectorCastF2X:
3088 case Op_VectorCastD2X:
3089 case Op_VectorUCastB2X:
3090 case Op_VectorUCastS2X:
3091 case Op_VectorUCastI2X:
3092 case Op_VectorMaskCast:
3093 if (UseAVX < 1) { // enabled for AVX only
3094 return false;
3095 }
3096 break;
3097 case Op_PopulateIndex:
3098 if (UseAVX < 2) {
3099 return false;
3100 }
3101 break;
3102 case Op_RoundVF:
3103 if (UseAVX < 2) { // enabled for AVX2 only
3104 return false;
3105 }
3106 break;
3107 case Op_RoundVD:
3108 if (UseAVX < 3) {
3109 return false; // enabled for AVX3 only
3110 }
3111 break;
3112 case Op_CompareAndSwapL:
3113 case Op_CompareAndSwapP:
3114 break;
3115 case Op_StrIndexOf:
3116 if (!UseSSE42Intrinsics) {
3117 return false;
3118 }
3119 break;
3120 case Op_StrIndexOfChar:
3121 if (!UseSSE42Intrinsics) {
3122 return false;
3123 }
3124 break;
3125 case Op_OnSpinWait:
3126 if (VM_Version::supports_on_spin_wait() == false) {
3127 return false;
3128 }
3129 break;
3130 case Op_MulVB:
3131 case Op_LShiftVB:
3132 case Op_RShiftVB:
3133 case Op_URShiftVB:
3134 case Op_VectorInsert:
3135 case Op_VectorLoadMask:
3136 case Op_VectorStoreMask:
3137 case Op_VectorBlend:
3138 if (UseSSE < 4) {
3139 return false;
3140 }
3141 break;
3142 case Op_MaxD:
3143 case Op_MaxF:
3144 case Op_MinD:
3145 case Op_MinF:
3146 if (UseAVX < 1) { // enabled for AVX only
3147 return false;
3148 }
3149 break;
3150 case Op_CacheWB:
3151 case Op_CacheWBPreSync:
3152 case Op_CacheWBPostSync:
3153 if (!VM_Version::supports_data_cache_line_flush()) {
3154 return false;
3155 }
3156 break;
3157 case Op_ExtractB:
3158 case Op_ExtractL:
3159 case Op_ExtractI:
3160 case Op_RoundDoubleMode:
3161 if (UseSSE < 4) {
3162 return false;
3163 }
3164 break;
3165 case Op_RoundDoubleModeV:
3166 if (VM_Version::supports_avx() == false) {
3167 return false; // 128bit vroundpd is not available
3168 }
3169 break;
3170 case Op_LoadVectorGather:
3171 case Op_LoadVectorGatherMasked:
3172 if (UseAVX < 2) {
3173 return false;
3174 }
3175 break;
3176 case Op_FmaF:
3177 case Op_FmaD:
3178 case Op_FmaVD:
3179 case Op_FmaVF:
3180 if (!UseFMA) {
3181 return false;
3182 }
3183 break;
3184 case Op_MacroLogicV:
3185 if (UseAVX < 3 || !UseVectorMacroLogic) {
3186 return false;
3187 }
3188 break;
3189
3190 case Op_VectorCmpMasked:
3191 case Op_VectorMaskGen:
3192 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3193 return false;
3194 }
3195 break;
3196 case Op_VectorMaskFirstTrue:
3197 case Op_VectorMaskLastTrue:
3198 case Op_VectorMaskTrueCount:
3199 case Op_VectorMaskToLong:
3200 if (UseAVX < 1) {
3201 return false;
3202 }
3203 break;
3204 case Op_RoundF:
3205 case Op_RoundD:
3206 break;
3207 case Op_CopySignD:
3208 case Op_CopySignF:
3209 if (UseAVX < 3) {
3210 return false;
3211 }
3212 if (!VM_Version::supports_avx512vl()) {
3213 return false;
3214 }
3215 break;
3216 case Op_CompressBits:
3217 case Op_ExpandBits:
3218 if (!VM_Version::supports_bmi2()) {
3219 return false;
3220 }
3221 break;
3222 case Op_CompressM:
3223 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3224 return false;
3225 }
3226 break;
3227 case Op_ConvF2HF:
3228 case Op_ConvHF2F:
3229 if (!VM_Version::supports_float16()) {
3230 return false;
3231 }
3232 break;
3233 case Op_VectorCastF2HF:
3234 case Op_VectorCastHF2F:
3235 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3236 return false;
3237 }
3238 break;
3239 }
3240 return true; // Match rules are supported by default.
3241 }
3242
3243 //------------------------------------------------------------------------
3244
3245 static inline bool is_pop_count_instr_target(BasicType bt) {
3246 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3247 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3248 }
3249
3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3251 return match_rule_supported_vector(opcode, vlen, bt);
3252 }
3253
3254 // Identify extra cases that we might want to provide match rules for vector nodes and
3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3257 if (!match_rule_supported(opcode)) {
3258 return false;
3259 }
3260 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3261 // * SSE2 supports 128bit vectors for all types;
3262 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3263 // * AVX2 supports 256bit vectors for all types;
3264 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3265 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3266 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3267 // And MaxVectorSize is taken into account as well.
3268 if (!vector_size_supported(bt, vlen)) {
3269 return false;
3270 }
3271 // Special cases which require vector length follow:
3272 // * implementation limitations
3273 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3274 // * 128bit vroundpd instruction is present only in AVX1
3275 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3276 switch (opcode) {
3277 case Op_MaxVHF:
3278 case Op_MinVHF:
3279 if (!VM_Version::supports_avx512bw()) {
3280 return false;
3281 }
3282 case Op_AddVHF:
3283 case Op_DivVHF:
3284 case Op_FmaVHF:
3285 case Op_MulVHF:
3286 case Op_SubVHF:
3287 case Op_SqrtVHF:
3288 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3289 return false;
3290 }
3291 if (!VM_Version::supports_avx512_fp16()) {
3292 return false;
3293 }
3294 break;
3295 case Op_AbsVF:
3296 case Op_NegVF:
3297 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3298 return false; // 512bit vandps and vxorps are not available
3299 }
3300 break;
3301 case Op_AbsVD:
3302 case Op_NegVD:
3303 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3304 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3305 }
3306 break;
3307 case Op_RotateRightV:
3308 case Op_RotateLeftV:
3309 if (bt != T_INT && bt != T_LONG) {
3310 return false;
3311 } // fallthrough
3312 case Op_MacroLogicV:
3313 if (!VM_Version::supports_evex() ||
3314 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3315 return false;
3316 }
3317 break;
3318 case Op_ClearArray:
3319 case Op_VectorMaskGen:
3320 case Op_VectorCmpMasked:
3321 if (!VM_Version::supports_avx512bw()) {
3322 return false;
3323 }
3324 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3325 return false;
3326 }
3327 break;
3328 case Op_LoadVectorMasked:
3329 case Op_StoreVectorMasked:
3330 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3331 return false;
3332 }
3333 break;
3334 case Op_UMinV:
3335 case Op_UMaxV:
3336 if (UseAVX == 0) {
3337 return false;
3338 }
3339 break;
3340 case Op_UMinReductionV:
3341 case Op_UMaxReductionV:
3342 if (UseAVX == 0) {
3343 return false;
3344 }
3345 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3349 return false;
3350 }
3351 break;
3352 case Op_MaxV:
3353 case Op_MinV:
3354 if (UseSSE < 4 && is_integral_type(bt)) {
3355 return false;
3356 }
3357 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3358 // Float/Double intrinsics are enabled for AVX family currently.
3359 if (UseAVX == 0) {
3360 return false;
3361 }
3362 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3363 return false;
3364 }
3365 }
3366 break;
3367 case Op_CallLeafVector:
3368 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3369 return false;
3370 }
3371 break;
3372 case Op_AddReductionVI:
3373 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3374 return false;
3375 }
3376 // fallthrough
3377 case Op_AndReductionV:
3378 case Op_OrReductionV:
3379 case Op_XorReductionV:
3380 if (is_subword_type(bt) && (UseSSE < 4)) {
3381 return false;
3382 }
3383 break;
3384 case Op_MinReductionV:
3385 case Op_MaxReductionV:
3386 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3387 return false;
3388 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3389 return false;
3390 }
3391 // Float/Double intrinsics enabled for AVX family.
3392 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3393 return false;
3394 }
3395 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3396 return false;
3397 }
3398 break;
3399 case Op_VectorBlend:
3400 if (UseAVX == 0 && size_in_bits < 128) {
3401 return false;
3402 }
3403 break;
3404 case Op_VectorTest:
3405 if (UseSSE < 4) {
3406 return false; // Implementation limitation
3407 } else if (size_in_bits < 32) {
3408 return false; // Implementation limitation
3409 }
3410 break;
3411 case Op_VectorLoadShuffle:
3412 case Op_VectorRearrange:
3413 if(vlen == 2) {
3414 return false; // Implementation limitation due to how shuffle is loaded
3415 } else if (size_in_bits == 256 && UseAVX < 2) {
3416 return false; // Implementation limitation
3417 }
3418 break;
3419 case Op_VectorLoadMask:
3420 case Op_VectorMaskCast:
3421 if (size_in_bits == 256 && UseAVX < 2) {
3422 return false; // Implementation limitation
3423 }
3424 // fallthrough
3425 case Op_VectorStoreMask:
3426 if (vlen == 2) {
3427 return false; // Implementation limitation
3428 }
3429 break;
3430 case Op_PopulateIndex:
3431 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3432 return false;
3433 }
3434 break;
3435 case Op_VectorCastB2X:
3436 case Op_VectorCastS2X:
3437 case Op_VectorCastI2X:
3438 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3439 return false;
3440 }
3441 break;
3442 case Op_VectorCastL2X:
3443 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3444 return false;
3445 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3446 return false;
3447 }
3448 break;
3449 case Op_VectorCastF2X: {
3450 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3451 // happen after intermediate conversion to integer and special handling
3452 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3453 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3454 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3455 return false;
3456 }
3457 }
3458 // fallthrough
3459 case Op_VectorCastD2X:
3460 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3461 return false;
3462 }
3463 break;
3464 case Op_VectorCastF2HF:
3465 case Op_VectorCastHF2F:
3466 if (!VM_Version::supports_f16c() &&
3467 ((!VM_Version::supports_evex() ||
3468 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3469 return false;
3470 }
3471 break;
3472 case Op_RoundVD:
3473 if (!VM_Version::supports_avx512dq()) {
3474 return false;
3475 }
3476 break;
3477 case Op_MulReductionVI:
3478 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3479 return false;
3480 }
3481 break;
3482 case Op_LoadVectorGatherMasked:
3483 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3484 return false;
3485 }
3486 if (is_subword_type(bt) &&
3487 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3488 (size_in_bits < 64) ||
3489 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3490 return false;
3491 }
3492 break;
3493 case Op_StoreVectorScatterMasked:
3494 case Op_StoreVectorScatter:
3495 if (is_subword_type(bt)) {
3496 return false;
3497 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3498 return false;
3499 }
3500 // fallthrough
3501 case Op_LoadVectorGather:
3502 if (!is_subword_type(bt) && size_in_bits == 64) {
3503 return false;
3504 }
3505 if (is_subword_type(bt) && size_in_bits < 64) {
3506 return false;
3507 }
3508 break;
3509 case Op_SaturatingAddV:
3510 case Op_SaturatingSubV:
3511 if (UseAVX < 1) {
3512 return false; // Implementation limitation
3513 }
3514 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3515 return false;
3516 }
3517 break;
3518 case Op_SelectFromTwoVector:
3519 if (size_in_bits < 128) {
3520 return false;
3521 }
3522 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3523 return false;
3524 }
3525 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3526 return false;
3527 }
3528 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3529 return false;
3530 }
3531 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3532 return false;
3533 }
3534 break;
3535 case Op_MaskAll:
3536 if (!VM_Version::supports_evex()) {
3537 return false;
3538 }
3539 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3540 return false;
3541 }
3542 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3543 return false;
3544 }
3545 break;
3546 case Op_VectorMaskCmp:
3547 if (vlen < 2 || size_in_bits < 32) {
3548 return false;
3549 }
3550 break;
3551 case Op_CompressM:
3552 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3553 return false;
3554 }
3555 break;
3556 case Op_CompressV:
3557 case Op_ExpandV:
3558 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3559 return false;
3560 }
3561 if (size_in_bits < 128 ) {
3562 return false;
3563 }
3564 case Op_VectorLongToMask:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3569 return false;
3570 }
3571 break;
3572 case Op_SignumVD:
3573 case Op_SignumVF:
3574 if (UseAVX < 1) {
3575 return false;
3576 }
3577 break;
3578 case Op_PopCountVI:
3579 case Op_PopCountVL: {
3580 if (!is_pop_count_instr_target(bt) &&
3581 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3582 return false;
3583 }
3584 }
3585 break;
3586 case Op_ReverseV:
3587 case Op_ReverseBytesV:
3588 if (UseAVX < 2) {
3589 return false;
3590 }
3591 break;
3592 case Op_CountTrailingZerosV:
3593 case Op_CountLeadingZerosV:
3594 if (UseAVX < 2) {
3595 return false;
3596 }
3597 break;
3598 }
3599 return true; // Per default match rules are supported.
3600 }
3601
3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3603 // ADLC based match_rule_supported routine checks for the existence of pattern based
3604 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3605 // of their non-masked counterpart with mask edge being the differentiator.
3606 // This routine does a strict check on the existence of masked operation patterns
3607 // by returning a default false value for all the other opcodes apart from the
3608 // ones whose masked instruction patterns are defined in this file.
3609 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3610 return false;
3611 }
3612
3613 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3614 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3615 return false;
3616 }
3617 switch(opcode) {
3618 // Unary masked operations
3619 case Op_AbsVB:
3620 case Op_AbsVS:
3621 if(!VM_Version::supports_avx512bw()) {
3622 return false; // Implementation limitation
3623 }
3624 case Op_AbsVI:
3625 case Op_AbsVL:
3626 return true;
3627
3628 // Ternary masked operations
3629 case Op_FmaVF:
3630 case Op_FmaVD:
3631 return true;
3632
3633 case Op_MacroLogicV:
3634 if(bt != T_INT && bt != T_LONG) {
3635 return false;
3636 }
3637 return true;
3638
3639 // Binary masked operations
3640 case Op_AddVB:
3641 case Op_AddVS:
3642 case Op_SubVB:
3643 case Op_SubVS:
3644 case Op_MulVS:
3645 case Op_LShiftVS:
3646 case Op_RShiftVS:
3647 case Op_URShiftVS:
3648 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3649 if (!VM_Version::supports_avx512bw()) {
3650 return false; // Implementation limitation
3651 }
3652 return true;
3653
3654 case Op_MulVL:
3655 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3656 if (!VM_Version::supports_avx512dq()) {
3657 return false; // Implementation limitation
3658 }
3659 return true;
3660
3661 case Op_AndV:
3662 case Op_OrV:
3663 case Op_XorV:
3664 case Op_RotateRightV:
3665 case Op_RotateLeftV:
3666 if (bt != T_INT && bt != T_LONG) {
3667 return false; // Implementation limitation
3668 }
3669 return true;
3670
3671 case Op_VectorLoadMask:
3672 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3673 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3674 return false;
3675 }
3676 return true;
3677
3678 case Op_AddVI:
3679 case Op_AddVL:
3680 case Op_AddVF:
3681 case Op_AddVD:
3682 case Op_SubVI:
3683 case Op_SubVL:
3684 case Op_SubVF:
3685 case Op_SubVD:
3686 case Op_MulVI:
3687 case Op_MulVF:
3688 case Op_MulVD:
3689 case Op_DivVF:
3690 case Op_DivVD:
3691 case Op_SqrtVF:
3692 case Op_SqrtVD:
3693 case Op_LShiftVI:
3694 case Op_LShiftVL:
3695 case Op_RShiftVI:
3696 case Op_RShiftVL:
3697 case Op_URShiftVI:
3698 case Op_URShiftVL:
3699 case Op_LoadVectorMasked:
3700 case Op_StoreVectorMasked:
3701 case Op_LoadVectorGatherMasked:
3702 case Op_StoreVectorScatterMasked:
3703 return true;
3704
3705 case Op_UMinV:
3706 case Op_UMaxV:
3707 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3708 return false;
3709 } // fallthrough
3710 case Op_MaxV:
3711 case Op_MinV:
3712 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3713 return false; // Implementation limitation
3714 }
3715 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719 case Op_SaturatingAddV:
3720 case Op_SaturatingSubV:
3721 if (!is_subword_type(bt)) {
3722 return false;
3723 }
3724 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3725 return false; // Implementation limitation
3726 }
3727 return true;
3728
3729 case Op_VectorMaskCmp:
3730 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3731 return false; // Implementation limitation
3732 }
3733 return true;
3734
3735 case Op_VectorRearrange:
3736 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3737 return false; // Implementation limitation
3738 }
3739 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3740 return false; // Implementation limitation
3741 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 // Binary Logical operations
3747 case Op_AndVMask:
3748 case Op_OrVMask:
3749 case Op_XorVMask:
3750 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3751 return false; // Implementation limitation
3752 }
3753 return true;
3754
3755 case Op_PopCountVI:
3756 case Op_PopCountVL:
3757 if (!is_pop_count_instr_target(bt)) {
3758 return false;
3759 }
3760 return true;
3761
3762 case Op_MaskAll:
3763 return true;
3764
3765 case Op_CountLeadingZerosV:
3766 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3767 return true;
3768 }
3769 default:
3770 return false;
3771 }
3772 }
3773
3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3775 return false;
3776 }
3777
3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3780 switch (elem_bt) {
3781 case T_BYTE: return false;
3782 case T_SHORT: return !VM_Version::supports_avx512bw();
3783 case T_INT: return !VM_Version::supports_avx();
3784 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3785 default:
3786 ShouldNotReachHere();
3787 return false;
3788 }
3789 }
3790
3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3792 // Prefer predicate if the mask type is "TypeVectMask".
3793 return vt->isa_vectmask() != nullptr;
3794 }
3795
3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3797 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3798 bool legacy = (generic_opnd->opcode() == LEGVEC);
3799 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3800 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3801 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3802 return new legVecZOper();
3803 }
3804 if (legacy) {
3805 switch (ideal_reg) {
3806 case Op_VecS: return new legVecSOper();
3807 case Op_VecD: return new legVecDOper();
3808 case Op_VecX: return new legVecXOper();
3809 case Op_VecY: return new legVecYOper();
3810 case Op_VecZ: return new legVecZOper();
3811 }
3812 } else {
3813 switch (ideal_reg) {
3814 case Op_VecS: return new vecSOper();
3815 case Op_VecD: return new vecDOper();
3816 case Op_VecX: return new vecXOper();
3817 case Op_VecY: return new vecYOper();
3818 case Op_VecZ: return new vecZOper();
3819 }
3820 }
3821 ShouldNotReachHere();
3822 return nullptr;
3823 }
3824
3825 bool Matcher::is_reg2reg_move(MachNode* m) {
3826 switch (m->rule()) {
3827 case MoveVec2Leg_rule:
3828 case MoveLeg2Vec_rule:
3829 case MoveF2VL_rule:
3830 case MoveF2LEG_rule:
3831 case MoveVL2F_rule:
3832 case MoveLEG2F_rule:
3833 case MoveD2VL_rule:
3834 case MoveD2LEG_rule:
3835 case MoveVL2D_rule:
3836 case MoveLEG2D_rule:
3837 return true;
3838 default:
3839 return false;
3840 }
3841 }
3842
3843 bool Matcher::is_generic_vector(MachOper* opnd) {
3844 switch (opnd->opcode()) {
3845 case VEC:
3846 case LEGVEC:
3847 return true;
3848 default:
3849 return false;
3850 }
3851 }
3852
3853 //------------------------------------------------------------------------
3854
3855 const RegMask* Matcher::predicate_reg_mask(void) {
3856 return &_VECTMASK_REG_mask;
3857 }
3858
3859 // Max vector size in bytes. 0 if not supported.
3860 int Matcher::vector_width_in_bytes(BasicType bt) {
3861 assert(is_java_primitive(bt), "only primitive type vectors");
3862 // SSE2 supports 128bit vectors for all types.
3863 // AVX2 supports 256bit vectors for all types.
3864 // AVX2/EVEX supports 512bit vectors for all types.
3865 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3866 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3867 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3868 size = (UseAVX > 2) ? 64 : 32;
3869 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3870 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3871 // Use flag to limit vector size.
3872 size = MIN2(size,(int)MaxVectorSize);
3873 // Minimum 2 values in vector (or 4 for bytes).
3874 switch (bt) {
3875 case T_DOUBLE:
3876 case T_LONG:
3877 if (size < 16) return 0;
3878 break;
3879 case T_FLOAT:
3880 case T_INT:
3881 if (size < 8) return 0;
3882 break;
3883 case T_BOOLEAN:
3884 if (size < 4) return 0;
3885 break;
3886 case T_CHAR:
3887 if (size < 4) return 0;
3888 break;
3889 case T_BYTE:
3890 if (size < 4) return 0;
3891 break;
3892 case T_SHORT:
3893 if (size < 4) return 0;
3894 break;
3895 default:
3896 ShouldNotReachHere();
3897 }
3898 return size;
3899 }
3900
3901 // Limits on vector size (number of elements) loaded into vector.
3902 int Matcher::max_vector_size(const BasicType bt) {
3903 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3904 }
3905 int Matcher::min_vector_size(const BasicType bt) {
3906 int max_size = max_vector_size(bt);
3907 // Min size which can be loaded into vector is 4 bytes.
3908 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3909 // Support for calling svml double64 vectors
3910 if (bt == T_DOUBLE) {
3911 size = 1;
3912 }
3913 return MIN2(size,max_size);
3914 }
3915
3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3917 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3918 // by default on Cascade Lake
3919 if (VM_Version::is_default_intel_cascade_lake()) {
3920 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3921 }
3922 return Matcher::max_vector_size(bt);
3923 }
3924
3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3926 return -1;
3927 }
3928
3929 // Vector ideal reg corresponding to specified size in bytes
3930 uint Matcher::vector_ideal_reg(int size) {
3931 assert(MaxVectorSize >= size, "");
3932 switch(size) {
3933 case 4: return Op_VecS;
3934 case 8: return Op_VecD;
3935 case 16: return Op_VecX;
3936 case 32: return Op_VecY;
3937 case 64: return Op_VecZ;
3938 }
3939 ShouldNotReachHere();
3940 return 0;
3941 }
3942
3943 // Check for shift by small constant as well
3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3945 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3946 shift->in(2)->get_int() <= 3 &&
3947 // Are there other uses besides address expressions?
3948 !matcher->is_visited(shift)) {
3949 address_visited.set(shift->_idx); // Flag as address_visited
3950 mstack.push(shift->in(2), Matcher::Visit);
3951 Node *conv = shift->in(1);
3952 // Allow Matcher to match the rule which bypass
3953 // ConvI2L operation for an array index on LP64
3954 // if the index value is positive.
3955 if (conv->Opcode() == Op_ConvI2L &&
3956 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3957 // Are there other uses besides address expressions?
3958 !matcher->is_visited(conv)) {
3959 address_visited.set(conv->_idx); // Flag as address_visited
3960 mstack.push(conv->in(1), Matcher::Pre_Visit);
3961 } else {
3962 mstack.push(conv, Matcher::Pre_Visit);
3963 }
3964 return true;
3965 }
3966 return false;
3967 }
3968
3969 // This function identifies sub-graphs in which a 'load' node is
3970 // input to two different nodes, and such that it can be matched
3971 // with BMI instructions like blsi, blsr, etc.
3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3974 // refers to the same node.
3975 //
3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3977 // This is a temporary solution until we make DAGs expressible in ADL.
3978 template<typename ConType>
3979 class FusedPatternMatcher {
3980 Node* _op1_node;
3981 Node* _mop_node;
3982 int _con_op;
3983
3984 static int match_next(Node* n, int next_op, int next_op_idx) {
3985 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3986 return -1;
3987 }
3988
3989 if (next_op_idx == -1) { // n is commutative, try rotations
3990 if (n->in(1)->Opcode() == next_op) {
3991 return 1;
3992 } else if (n->in(2)->Opcode() == next_op) {
3993 return 2;
3994 }
3995 } else {
3996 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3997 if (n->in(next_op_idx)->Opcode() == next_op) {
3998 return next_op_idx;
3999 }
4000 }
4001 return -1;
4002 }
4003
4004 public:
4005 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4006 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4007
4008 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4009 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4010 typename ConType::NativeType con_value) {
4011 if (_op1_node->Opcode() != op1) {
4012 return false;
4013 }
4014 if (_mop_node->outcnt() > 2) {
4015 return false;
4016 }
4017 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4018 if (op1_op2_idx == -1) {
4019 return false;
4020 }
4021 // Memory operation must be the other edge
4022 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4023
4024 // Check that the mop node is really what we want
4025 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4026 Node* op2_node = _op1_node->in(op1_op2_idx);
4027 if (op2_node->outcnt() > 1) {
4028 return false;
4029 }
4030 assert(op2_node->Opcode() == op2, "Should be");
4031 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4032 if (op2_con_idx == -1) {
4033 return false;
4034 }
4035 // Memory operation must be the other edge
4036 int op2_mop_idx = (op2_con_idx & 1) + 1;
4037 // Check that the memory operation is the same node
4038 if (op2_node->in(op2_mop_idx) == _mop_node) {
4039 // Now check the constant
4040 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4041 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4042 return true;
4043 }
4044 }
4045 }
4046 return false;
4047 }
4048 };
4049
4050 static bool is_bmi_pattern(Node* n, Node* m) {
4051 assert(UseBMI1Instructions, "sanity");
4052 if (n != nullptr && m != nullptr) {
4053 if (m->Opcode() == Op_LoadI) {
4054 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4055 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4056 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4057 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4058 } else if (m->Opcode() == Op_LoadL) {
4059 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4060 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4061 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4062 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4063 }
4064 }
4065 return false;
4066 }
4067
4068 // Should the matcher clone input 'm' of node 'n'?
4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4070 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4071 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4072 mstack.push(m, Visit);
4073 return true;
4074 }
4075 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4076 mstack.push(m, Visit); // m = ShiftCntV
4077 return true;
4078 }
4079 if (is_encode_and_store_pattern(n, m)) {
4080 mstack.push(m, Visit);
4081 return true;
4082 }
4083 return false;
4084 }
4085
4086 // Should the Matcher clone shifts on addressing modes, expecting them
4087 // to be subsumed into complex addressing expressions or compute them
4088 // into registers?
4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4090 Node *off = m->in(AddPNode::Offset);
4091 if (off->is_Con()) {
4092 address_visited.test_set(m->_idx); // Flag as address_visited
4093 Node *adr = m->in(AddPNode::Address);
4094
4095 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4096 // AtomicAdd is not an addressing expression.
4097 // Cheap to find it by looking for screwy base.
4098 if (adr->is_AddP() &&
4099 !adr->in(AddPNode::Base)->is_top() &&
4100 !adr->in(AddPNode::Offset)->is_Con() &&
4101 off->get_long() == (int) (off->get_long()) && // immL32
4102 // Are there other uses besides address expressions?
4103 !is_visited(adr)) {
4104 address_visited.set(adr->_idx); // Flag as address_visited
4105 Node *shift = adr->in(AddPNode::Offset);
4106 if (!clone_shift(shift, this, mstack, address_visited)) {
4107 mstack.push(shift, Pre_Visit);
4108 }
4109 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4110 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4111 } else {
4112 mstack.push(adr, Pre_Visit);
4113 }
4114
4115 // Clone X+offset as it also folds into most addressing expressions
4116 mstack.push(off, Visit);
4117 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4118 return true;
4119 } else if (clone_shift(off, this, mstack, address_visited)) {
4120 address_visited.test_set(m->_idx); // Flag as address_visited
4121 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4122 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4123 return true;
4124 }
4125 return false;
4126 }
4127
4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4129 switch (bt) {
4130 case BoolTest::eq:
4131 return Assembler::eq;
4132 case BoolTest::ne:
4133 return Assembler::neq;
4134 case BoolTest::le:
4135 case BoolTest::ule:
4136 return Assembler::le;
4137 case BoolTest::ge:
4138 case BoolTest::uge:
4139 return Assembler::nlt;
4140 case BoolTest::lt:
4141 case BoolTest::ult:
4142 return Assembler::lt;
4143 case BoolTest::gt:
4144 case BoolTest::ugt:
4145 return Assembler::nle;
4146 default : ShouldNotReachHere(); return Assembler::_false;
4147 }
4148 }
4149
4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4151 switch (bt) {
4152 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4153 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4154 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4155 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4156 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4157 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4158 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4159 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4160 }
4161 }
4162
4163 // Helper methods for MachSpillCopyNode::implementation().
4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4165 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4166 assert(ireg == Op_VecS || // 32bit vector
4167 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4168 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4169 "no non-adjacent vector moves" );
4170 if (masm) {
4171 switch (ireg) {
4172 case Op_VecS: // copy whole register
4173 case Op_VecD:
4174 case Op_VecX:
4175 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4176 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4177 } else {
4178 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4179 }
4180 break;
4181 case Op_VecY:
4182 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4183 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4184 } else {
4185 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4186 }
4187 break;
4188 case Op_VecZ:
4189 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4190 break;
4191 default:
4192 ShouldNotReachHere();
4193 }
4194 #ifndef PRODUCT
4195 } else {
4196 switch (ireg) {
4197 case Op_VecS:
4198 case Op_VecD:
4199 case Op_VecX:
4200 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4201 break;
4202 case Op_VecY:
4203 case Op_VecZ:
4204 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4205 break;
4206 default:
4207 ShouldNotReachHere();
4208 }
4209 #endif
4210 }
4211 }
4212
4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4214 int stack_offset, int reg, uint ireg, outputStream* st) {
4215 if (masm) {
4216 if (is_load) {
4217 switch (ireg) {
4218 case Op_VecS:
4219 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecD:
4222 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4223 break;
4224 case Op_VecX:
4225 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4226 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4227 } else {
4228 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4229 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4230 }
4231 break;
4232 case Op_VecY:
4233 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4234 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4235 } else {
4236 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4237 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4238 }
4239 break;
4240 case Op_VecZ:
4241 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4242 break;
4243 default:
4244 ShouldNotReachHere();
4245 }
4246 } else { // store
4247 switch (ireg) {
4248 case Op_VecS:
4249 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecD:
4252 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4253 break;
4254 case Op_VecX:
4255 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4256 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4257 }
4258 else {
4259 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4260 }
4261 break;
4262 case Op_VecY:
4263 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4264 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4265 }
4266 else {
4267 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4268 }
4269 break;
4270 case Op_VecZ:
4271 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4272 break;
4273 default:
4274 ShouldNotReachHere();
4275 }
4276 }
4277 #ifndef PRODUCT
4278 } else {
4279 if (is_load) {
4280 switch (ireg) {
4281 case Op_VecS:
4282 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecD:
4285 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecX:
4288 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4289 break;
4290 case Op_VecY:
4291 case Op_VecZ:
4292 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4293 break;
4294 default:
4295 ShouldNotReachHere();
4296 }
4297 } else { // store
4298 switch (ireg) {
4299 case Op_VecS:
4300 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecD:
4303 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecX:
4306 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4307 break;
4308 case Op_VecY:
4309 case Op_VecZ:
4310 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4311 break;
4312 default:
4313 ShouldNotReachHere();
4314 }
4315 }
4316 #endif
4317 }
4318 }
4319
4320 template <class T>
4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4322 int size = type2aelembytes(bt) * len;
4323 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4324 for (int i = 0; i < len; i++) {
4325 int offset = i * type2aelembytes(bt);
4326 switch (bt) {
4327 case T_BYTE: val->at(i) = con; break;
4328 case T_SHORT: {
4329 jshort c = con;
4330 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4331 break;
4332 }
4333 case T_INT: {
4334 jint c = con;
4335 memcpy(val->adr_at(offset), &c, sizeof(jint));
4336 break;
4337 }
4338 case T_LONG: {
4339 jlong c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4341 break;
4342 }
4343 case T_FLOAT: {
4344 jfloat c = con;
4345 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4346 break;
4347 }
4348 case T_DOUBLE: {
4349 jdouble c = con;
4350 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4351 break;
4352 }
4353 default: assert(false, "%s", type2name(bt));
4354 }
4355 }
4356 return val;
4357 }
4358
4359 static inline jlong high_bit_set(BasicType bt) {
4360 switch (bt) {
4361 case T_BYTE: return 0x8080808080808080;
4362 case T_SHORT: return 0x8000800080008000;
4363 case T_INT: return 0x8000000080000000;
4364 case T_LONG: return 0x8000000000000000;
4365 default:
4366 ShouldNotReachHere();
4367 return 0;
4368 }
4369 }
4370
4371 #ifndef PRODUCT
4372 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4373 st->print("nop \t# %d bytes pad for loops and calls", _count);
4374 }
4375 #endif
4376
4377 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4378 __ nop(_count);
4379 }
4380
4381 uint MachNopNode::size(PhaseRegAlloc*) const {
4382 return _count;
4383 }
4384
4385 #ifndef PRODUCT
4386 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4387 st->print("# breakpoint");
4388 }
4389 #endif
4390
4391 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4392 __ int3();
4393 }
4394
4395 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4396 return MachNode::size(ra_);
4397 }
4398
4399 %}
4400
4401 //----------ENCODING BLOCK-----------------------------------------------------
4402 // This block specifies the encoding classes used by the compiler to
4403 // output byte streams. Encoding classes are parameterized macros
4404 // used by Machine Instruction Nodes in order to generate the bit
4405 // encoding of the instruction. Operands specify their base encoding
4406 // interface with the interface keyword. There are currently
4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4408 // COND_INTER. REG_INTER causes an operand to generate a function
4409 // which returns its register number when queried. CONST_INTER causes
4410 // an operand to generate a function which returns the value of the
4411 // constant when queried. MEMORY_INTER causes an operand to generate
4412 // four functions which return the Base Register, the Index Register,
4413 // the Scale Value, and the Offset Value of the operand when queried.
4414 // COND_INTER causes an operand to generate six functions which return
4415 // the encoding code (ie - encoding bits for the instruction)
4416 // associated with each basic boolean condition for a conditional
4417 // instruction.
4418 //
4419 // Instructions specify two basic values for encoding. Again, a
4420 // function is available to check if the constant displacement is an
4421 // oop. They use the ins_encode keyword to specify their encoding
4422 // classes (which must be a sequence of enc_class names, and their
4423 // parameters, specified in the encoding block), and they use the
4424 // opcode keyword to specify, in order, their primary, secondary, and
4425 // tertiary opcode. Only the opcode sections which a particular
4426 // instruction needs for encoding need to be specified.
4427 encode %{
4428 enc_class cdql_enc(no_rax_rdx_RegI div)
4429 %{
4430 // Full implementation of Java idiv and irem; checks for
4431 // special case as described in JVM spec., p.243 & p.271.
4432 //
4433 // normal case special case
4434 //
4435 // input : rax: dividend min_int
4436 // reg: divisor -1
4437 //
4438 // output: rax: quotient (= rax idiv reg) min_int
4439 // rdx: remainder (= rax irem reg) 0
4440 //
4441 // Code sequnce:
4442 //
4443 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4444 // 5: 75 07/08 jne e <normal>
4445 // 7: 33 d2 xor %edx,%edx
4446 // [div >= 8 -> offset + 1]
4447 // [REX_B]
4448 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4449 // c: 74 03/04 je 11 <done>
4450 // 000000000000000e <normal>:
4451 // e: 99 cltd
4452 // [div >= 8 -> offset + 1]
4453 // [REX_B]
4454 // f: f7 f9 idiv $div
4455 // 0000000000000011 <done>:
4456 Label normal;
4457 Label done;
4458
4459 // cmp $0x80000000,%eax
4460 __ cmpl(as_Register(RAX_enc), 0x80000000);
4461
4462 // jne e <normal>
4463 __ jccb(Assembler::notEqual, normal);
4464
4465 // xor %edx,%edx
4466 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4467
4468 // cmp $0xffffffffffffffff,%ecx
4469 __ cmpl($div$$Register, -1);
4470
4471 // je 11 <done>
4472 __ jccb(Assembler::equal, done);
4473
4474 // <normal>
4475 // cltd
4476 __ bind(normal);
4477 __ cdql();
4478
4479 // idivl
4480 // <done>
4481 __ idivl($div$$Register);
4482 __ bind(done);
4483 %}
4484
4485 enc_class cdqq_enc(no_rax_rdx_RegL div)
4486 %{
4487 // Full implementation of Java ldiv and lrem; checks for
4488 // special case as described in JVM spec., p.243 & p.271.
4489 //
4490 // normal case special case
4491 //
4492 // input : rax: dividend min_long
4493 // reg: divisor -1
4494 //
4495 // output: rax: quotient (= rax idiv reg) min_long
4496 // rdx: remainder (= rax irem reg) 0
4497 //
4498 // Code sequnce:
4499 //
4500 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4501 // 7: 00 00 80
4502 // a: 48 39 d0 cmp %rdx,%rax
4503 // d: 75 08 jne 17 <normal>
4504 // f: 33 d2 xor %edx,%edx
4505 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4506 // 15: 74 05 je 1c <done>
4507 // 0000000000000017 <normal>:
4508 // 17: 48 99 cqto
4509 // 19: 48 f7 f9 idiv $div
4510 // 000000000000001c <done>:
4511 Label normal;
4512 Label done;
4513
4514 // mov $0x8000000000000000,%rdx
4515 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4516
4517 // cmp %rdx,%rax
4518 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4519
4520 // jne 17 <normal>
4521 __ jccb(Assembler::notEqual, normal);
4522
4523 // xor %edx,%edx
4524 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4525
4526 // cmp $0xffffffffffffffff,$div
4527 __ cmpq($div$$Register, -1);
4528
4529 // je 1e <done>
4530 __ jccb(Assembler::equal, done);
4531
4532 // <normal>
4533 // cqto
4534 __ bind(normal);
4535 __ cdqq();
4536
4537 // idivq (note: must be emitted by the user of this rule)
4538 // <done>
4539 __ idivq($div$$Register);
4540 __ bind(done);
4541 %}
4542
4543 enc_class clear_avx %{
4544 DEBUG_ONLY(int off0 = __ offset());
4545 if (generate_vzeroupper(Compile::current())) {
4546 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4547 // Clear upper bits of YMM registers when current compiled code uses
4548 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4549 __ vzeroupper();
4550 }
4551 DEBUG_ONLY(int off1 = __ offset());
4552 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4553 %}
4554
4555 enc_class Java_To_Runtime(method meth) %{
4556 __ lea(r10, RuntimeAddress((address)$meth$$method));
4557 __ call(r10);
4558 __ post_call_nop();
4559 %}
4560
4561 enc_class Java_Static_Call(method meth)
4562 %{
4563 // JAVA STATIC CALL
4564 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4565 // determine who we intended to call.
4566 if (!_method) {
4567 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4568 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4569 // The NOP here is purely to ensure that eliding a call to
4570 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4571 __ nop(5);
4572 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4573 } else {
4574 int method_index = resolved_method_index(masm);
4575 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4576 : static_call_Relocation::spec(method_index);
4577 address mark = __ pc();
4578 int call_offset = __ offset();
4579 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4580 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4581 // Calls of the same statically bound method can share
4582 // a stub to the interpreter.
4583 __ code()->shared_stub_to_interp_for(_method, call_offset);
4584 } else {
4585 // Emit stubs for static call.
4586 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4587 __ clear_inst_mark();
4588 if (stub == nullptr) {
4589 ciEnv::current()->record_failure("CodeCache is full");
4590 return;
4591 }
4592 }
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
4633 // | SP-+--------+----> Matcher::_old_SP, even aligned
4634 // | | in | 3 area for Intel ret address
4635 // Owned by |preserve| Empty on Sparc.
4636 // SELF +--------+
4637 // | | pad2 | 2 pad to align old SP
4638 // | +--------+ 1
4639 // | | locks | 0
4640 // | +--------+----> OptoReg::stack0(), even aligned
4641 // | | pad1 | 11 pad to align new SP
4642 // | +--------+
4643 // | | | 10
4644 // | | spills | 9 spills
4645 // V | | 8 (pad0 slot for callee)
4646 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4647 // ^ | out | 7
4648 // | | args | 6 Holes in outgoing args owned by CALLEE
4649 // Owned by +--------+
4650 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4651 // | new |preserve| Must be even-aligned.
4652 // | SP-+--------+----> Matcher::_new_SP, even aligned
4653 // | | |
4654 //
4655 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4656 // known from SELF's arguments and the Java calling convention.
4657 // Region 6-7 is determined per call site.
4658 // Note 2: If the calling convention leaves holes in the incoming argument
4659 // area, those holes are owned by SELF. Holes in the outgoing area
4660 // are owned by the CALLEE. Holes should not be necessary in the
4661 // incoming area, as the Java calling convention is completely under
4662 // the control of the AD file. Doubles can be sorted and packed to
4663 // avoid holes. Holes in the outgoing arguments may be necessary for
4664 // varargs C calling conventions.
4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4666 // even aligned with pad0 as needed.
4667 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4668 // region 6-11 is even aligned; it may be padded out more so that
4669 // the region from SP to FP meets the minimum stack alignment.
4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4671 // alignment. Region 11, pad1, may be dynamically extended so that
4672 // SP meets the minimum alignment.
4673
4674 frame
4675 %{
4676 // These three registers define part of the calling convention
4677 // between compiled code and the interpreter.
4678 inline_cache_reg(RAX); // Inline Cache Register
4679
4680 // Optional: name the operand used by cisc-spilling to access
4681 // [stack_pointer + offset]
4682 cisc_spilling_operand_name(indOffset32);
4683
4684 // Number of stack slots consumed by locking an object
4685 sync_stack_slots(2);
4686
4687 // Compiled code's Frame Pointer
4688 frame_pointer(RSP);
4689
4690 // Stack alignment requirement
4691 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4692
4693 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4694 // for calls to C. Supports the var-args backing area for register parms.
4695 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4696
4697 // The after-PROLOG location of the return address. Location of
4698 // return address specifies a type (REG or STACK) and a number
4699 // representing the register number (i.e. - use a register name) or
4700 // stack slot.
4701 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4702 // Otherwise, it is above the locks and verification slot and alignment word
4703 return_addr(STACK - 2 +
4704 align_up((Compile::current()->in_preserve_stack_slots() +
4705 Compile::current()->fixed_slots()),
4706 stack_alignment_in_slots()));
4707
4708 // Location of compiled Java return values. Same as C for now.
4709 return_value
4710 %{
4711 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4712 "only return normal values");
4713
4714 static const int lo[Op_RegL + 1] = {
4715 0,
4716 0,
4717 RAX_num, // Op_RegN
4718 RAX_num, // Op_RegI
4719 RAX_num, // Op_RegP
4720 XMM0_num, // Op_RegF
4721 XMM0_num, // Op_RegD
4722 RAX_num // Op_RegL
4723 };
4724 static const int hi[Op_RegL + 1] = {
4725 0,
4726 0,
4727 OptoReg::Bad, // Op_RegN
4728 OptoReg::Bad, // Op_RegI
4729 RAX_H_num, // Op_RegP
4730 OptoReg::Bad, // Op_RegF
4731 XMM0b_num, // Op_RegD
4732 RAX_H_num // Op_RegL
4733 };
4734 // Excluded flags and vector registers.
4735 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4736 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4737 %}
4738 %}
4739
4740 //----------ATTRIBUTES---------------------------------------------------------
4741 //----------Operand Attributes-------------------------------------------------
4742 op_attrib op_cost(0); // Required cost attribute
4743
4744 //----------Instruction Attributes---------------------------------------------
4745 ins_attrib ins_cost(100); // Required cost attribute
4746 ins_attrib ins_size(8); // Required size attribute (in bits)
4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4748 // a non-matching short branch variant
4749 // of some long branch?
4750 ins_attrib ins_alignment(1); // Required alignment attribute (must
4751 // be a power of 2) specifies the
4752 // alignment that some part of the
4753 // instruction (not necessarily the
4754 // start) requires. If > 1, a
4755 // compute_padding() function must be
4756 // provided for the instruction
4757
4758 // Whether this node is expanded during code emission into a sequence of
4759 // instructions and the first instruction can perform an implicit null check.
4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4761
4762 //----------OPERANDS-----------------------------------------------------------
4763 // Operand definitions must precede instruction definitions for correct parsing
4764 // in the ADLC because operands constitute user defined types which are used in
4765 // instruction definitions.
4766
4767 //----------Simple Operands----------------------------------------------------
4768 // Immediate Operands
4769 // Integer Immediate
4770 operand immI()
4771 %{
4772 match(ConI);
4773
4774 op_cost(10);
4775 format %{ %}
4776 interface(CONST_INTER);
4777 %}
4778
4779 // Constant for test vs zero
4780 operand immI_0()
4781 %{
4782 predicate(n->get_int() == 0);
4783 match(ConI);
4784
4785 op_cost(0);
4786 format %{ %}
4787 interface(CONST_INTER);
4788 %}
4789
4790 // Constant for increment
4791 operand immI_1()
4792 %{
4793 predicate(n->get_int() == 1);
4794 match(ConI);
4795
4796 op_cost(0);
4797 format %{ %}
4798 interface(CONST_INTER);
4799 %}
4800
4801 // Constant for decrement
4802 operand immI_M1()
4803 %{
4804 predicate(n->get_int() == -1);
4805 match(ConI);
4806
4807 op_cost(0);
4808 format %{ %}
4809 interface(CONST_INTER);
4810 %}
4811
4812 operand immI_2()
4813 %{
4814 predicate(n->get_int() == 2);
4815 match(ConI);
4816
4817 op_cost(0);
4818 format %{ %}
4819 interface(CONST_INTER);
4820 %}
4821
4822 operand immI_4()
4823 %{
4824 predicate(n->get_int() == 4);
4825 match(ConI);
4826
4827 op_cost(0);
4828 format %{ %}
4829 interface(CONST_INTER);
4830 %}
4831
4832 operand immI_8()
4833 %{
4834 predicate(n->get_int() == 8);
4835 match(ConI);
4836
4837 op_cost(0);
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 // Valid scale values for addressing modes
4843 operand immI2()
4844 %{
4845 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4846 match(ConI);
4847
4848 format %{ %}
4849 interface(CONST_INTER);
4850 %}
4851
4852 operand immU7()
4853 %{
4854 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4855 match(ConI);
4856
4857 op_cost(5);
4858 format %{ %}
4859 interface(CONST_INTER);
4860 %}
4861
4862 operand immI8()
4863 %{
4864 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4865 match(ConI);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 operand immU8()
4873 %{
4874 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4875 match(ConI);
4876
4877 op_cost(5);
4878 format %{ %}
4879 interface(CONST_INTER);
4880 %}
4881
4882 operand immI16()
4883 %{
4884 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4885 match(ConI);
4886
4887 op_cost(10);
4888 format %{ %}
4889 interface(CONST_INTER);
4890 %}
4891
4892 // Int Immediate non-negative
4893 operand immU31()
4894 %{
4895 predicate(n->get_int() >= 0);
4896 match(ConI);
4897
4898 op_cost(0);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 // Pointer Immediate
4904 operand immP()
4905 %{
4906 match(ConP);
4907
4908 op_cost(10);
4909 format %{ %}
4910 interface(CONST_INTER);
4911 %}
4912
4913 // Null Pointer Immediate
4914 operand immP0()
4915 %{
4916 predicate(n->get_ptr() == 0);
4917 match(ConP);
4918
4919 op_cost(5);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 // Pointer Immediate
4925 operand immN() %{
4926 match(ConN);
4927
4928 op_cost(10);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 operand immNKlass() %{
4934 match(ConNKlass);
4935
4936 op_cost(10);
4937 format %{ %}
4938 interface(CONST_INTER);
4939 %}
4940
4941 // Null Pointer Immediate
4942 operand immN0() %{
4943 predicate(n->get_narrowcon() == 0);
4944 match(ConN);
4945
4946 op_cost(5);
4947 format %{ %}
4948 interface(CONST_INTER);
4949 %}
4950
4951 operand immP31()
4952 %{
4953 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4954 && (n->get_ptr() >> 31) == 0);
4955 match(ConP);
4956
4957 op_cost(5);
4958 format %{ %}
4959 interface(CONST_INTER);
4960 %}
4961
4962
4963 // Long Immediate
4964 operand immL()
4965 %{
4966 match(ConL);
4967
4968 op_cost(20);
4969 format %{ %}
4970 interface(CONST_INTER);
4971 %}
4972
4973 // Long Immediate 8-bit
4974 operand immL8()
4975 %{
4976 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4977 match(ConL);
4978
4979 op_cost(5);
4980 format %{ %}
4981 interface(CONST_INTER);
4982 %}
4983
4984 // Long Immediate 32-bit unsigned
4985 operand immUL32()
4986 %{
4987 predicate(n->get_long() == (unsigned int) (n->get_long()));
4988 match(ConL);
4989
4990 op_cost(10);
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 // Long Immediate 32-bit signed
4996 operand immL32()
4997 %{
4998 predicate(n->get_long() == (int) (n->get_long()));
4999 match(ConL);
5000
5001 op_cost(15);
5002 format %{ %}
5003 interface(CONST_INTER);
5004 %}
5005
5006 operand immL_Pow2()
5007 %{
5008 predicate(is_power_of_2((julong)n->get_long()));
5009 match(ConL);
5010
5011 op_cost(15);
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 operand immL_NotPow2()
5017 %{
5018 predicate(is_power_of_2((julong)~n->get_long()));
5019 match(ConL);
5020
5021 op_cost(15);
5022 format %{ %}
5023 interface(CONST_INTER);
5024 %}
5025
5026 // Long Immediate zero
5027 operand immL0()
5028 %{
5029 predicate(n->get_long() == 0L);
5030 match(ConL);
5031
5032 op_cost(10);
5033 format %{ %}
5034 interface(CONST_INTER);
5035 %}
5036
5037 // Constant for increment
5038 operand immL1()
5039 %{
5040 predicate(n->get_long() == 1);
5041 match(ConL);
5042
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 // Constant for decrement
5048 operand immL_M1()
5049 %{
5050 predicate(n->get_long() == -1);
5051 match(ConL);
5052
5053 format %{ %}
5054 interface(CONST_INTER);
5055 %}
5056
5057 // Long Immediate: low 32-bit mask
5058 operand immL_32bits()
5059 %{
5060 predicate(n->get_long() == 0xFFFFFFFFL);
5061 match(ConL);
5062 op_cost(20);
5063
5064 format %{ %}
5065 interface(CONST_INTER);
5066 %}
5067
5068 // Int Immediate: 2^n-1, positive
5069 operand immI_Pow2M1()
5070 %{
5071 predicate((n->get_int() > 0)
5072 && is_power_of_2((juint)n->get_int() + 1));
5073 match(ConI);
5074
5075 op_cost(20);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Float Immediate zero
5081 operand immF0()
5082 %{
5083 predicate(jint_cast(n->getf()) == 0);
5084 match(ConF);
5085
5086 op_cost(5);
5087 format %{ %}
5088 interface(CONST_INTER);
5089 %}
5090
5091 // Float Immediate
5092 operand immF()
5093 %{
5094 match(ConF);
5095
5096 op_cost(15);
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Half Float Immediate
5102 operand immH()
5103 %{
5104 match(ConH);
5105
5106 op_cost(15);
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Double Immediate zero
5112 operand immD0()
5113 %{
5114 predicate(jlong_cast(n->getd()) == 0);
5115 match(ConD);
5116
5117 op_cost(5);
5118 format %{ %}
5119 interface(CONST_INTER);
5120 %}
5121
5122 // Double Immediate
5123 operand immD()
5124 %{
5125 match(ConD);
5126
5127 op_cost(15);
5128 format %{ %}
5129 interface(CONST_INTER);
5130 %}
5131
5132 // Immediates for special shifts (sign extend)
5133
5134 // Constants for increment
5135 operand immI_16()
5136 %{
5137 predicate(n->get_int() == 16);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 operand immI_24()
5145 %{
5146 predicate(n->get_int() == 24);
5147 match(ConI);
5148
5149 format %{ %}
5150 interface(CONST_INTER);
5151 %}
5152
5153 // Constant for byte-wide masking
5154 operand immI_255()
5155 %{
5156 predicate(n->get_int() == 255);
5157 match(ConI);
5158
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Constant for short-wide masking
5164 operand immI_65535()
5165 %{
5166 predicate(n->get_int() == 65535);
5167 match(ConI);
5168
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Constant for byte-wide masking
5174 operand immL_255()
5175 %{
5176 predicate(n->get_long() == 255);
5177 match(ConL);
5178
5179 format %{ %}
5180 interface(CONST_INTER);
5181 %}
5182
5183 // Constant for short-wide masking
5184 operand immL_65535()
5185 %{
5186 predicate(n->get_long() == 65535);
5187 match(ConL);
5188
5189 format %{ %}
5190 interface(CONST_INTER);
5191 %}
5192
5193 // AOT Runtime Constants Address
5194 operand immAOTRuntimeConstantsAddress()
5195 %{
5196 // Check if the address is in the range of AOT Runtime Constants
5197 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5198 match(ConP);
5199
5200 op_cost(0);
5201 format %{ %}
5202 interface(CONST_INTER);
5203 %}
5204
5205 operand kReg()
5206 %{
5207 constraint(ALLOC_IN_RC(vectmask_reg));
5208 match(RegVectMask);
5209 format %{%}
5210 interface(REG_INTER);
5211 %}
5212
5213 // Register Operands
5214 // Integer Register
5215 operand rRegI()
5216 %{
5217 constraint(ALLOC_IN_RC(int_reg));
5218 match(RegI);
5219
5220 match(rax_RegI);
5221 match(rbx_RegI);
5222 match(rcx_RegI);
5223 match(rdx_RegI);
5224 match(rdi_RegI);
5225
5226 format %{ %}
5227 interface(REG_INTER);
5228 %}
5229
5230 // Special Registers
5231 operand rax_RegI()
5232 %{
5233 constraint(ALLOC_IN_RC(int_rax_reg));
5234 match(RegI);
5235 match(rRegI);
5236
5237 format %{ "RAX" %}
5238 interface(REG_INTER);
5239 %}
5240
5241 // Special Registers
5242 operand rbx_RegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_rbx_reg));
5245 match(RegI);
5246 match(rRegI);
5247
5248 format %{ "RBX" %}
5249 interface(REG_INTER);
5250 %}
5251
5252 operand rcx_RegI()
5253 %{
5254 constraint(ALLOC_IN_RC(int_rcx_reg));
5255 match(RegI);
5256 match(rRegI);
5257
5258 format %{ "RCX" %}
5259 interface(REG_INTER);
5260 %}
5261
5262 operand rdx_RegI()
5263 %{
5264 constraint(ALLOC_IN_RC(int_rdx_reg));
5265 match(RegI);
5266 match(rRegI);
5267
5268 format %{ "RDX" %}
5269 interface(REG_INTER);
5270 %}
5271
5272 operand rdi_RegI()
5273 %{
5274 constraint(ALLOC_IN_RC(int_rdi_reg));
5275 match(RegI);
5276 match(rRegI);
5277
5278 format %{ "RDI" %}
5279 interface(REG_INTER);
5280 %}
5281
5282 operand no_rax_rdx_RegI()
5283 %{
5284 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5285 match(RegI);
5286 match(rbx_RegI);
5287 match(rcx_RegI);
5288 match(rdi_RegI);
5289
5290 format %{ %}
5291 interface(REG_INTER);
5292 %}
5293
5294 operand no_rbp_r13_RegI()
5295 %{
5296 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5297 match(RegI);
5298 match(rRegI);
5299 match(rax_RegI);
5300 match(rbx_RegI);
5301 match(rcx_RegI);
5302 match(rdx_RegI);
5303 match(rdi_RegI);
5304
5305 format %{ %}
5306 interface(REG_INTER);
5307 %}
5308
5309 // Pointer Register
5310 operand any_RegP()
5311 %{
5312 constraint(ALLOC_IN_RC(any_reg));
5313 match(RegP);
5314 match(rax_RegP);
5315 match(rbx_RegP);
5316 match(rdi_RegP);
5317 match(rsi_RegP);
5318 match(rbp_RegP);
5319 match(r15_RegP);
5320 match(rRegP);
5321
5322 format %{ %}
5323 interface(REG_INTER);
5324 %}
5325
5326 operand rRegP()
5327 %{
5328 constraint(ALLOC_IN_RC(ptr_reg));
5329 match(RegP);
5330 match(rax_RegP);
5331 match(rbx_RegP);
5332 match(rdi_RegP);
5333 match(rsi_RegP);
5334 match(rbp_RegP); // See Q&A below about
5335 match(r15_RegP); // r15_RegP and rbp_RegP.
5336
5337 format %{ %}
5338 interface(REG_INTER);
5339 %}
5340
5341 operand rRegN() %{
5342 constraint(ALLOC_IN_RC(int_reg));
5343 match(RegN);
5344
5345 format %{ %}
5346 interface(REG_INTER);
5347 %}
5348
5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5352 // The output of an instruction is controlled by the allocator, which respects
5353 // register class masks, not match rules. Unless an instruction mentions
5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5355 // by the allocator as an input.
5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5358 // result, RBP is not included in the output of the instruction either.
5359
5360 // This operand is not allowed to use RBP even if
5361 // RBP is not used to hold the frame pointer.
5362 operand no_rbp_RegP()
5363 %{
5364 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5365 match(RegP);
5366 match(rbx_RegP);
5367 match(rsi_RegP);
5368 match(rdi_RegP);
5369
5370 format %{ %}
5371 interface(REG_INTER);
5372 %}
5373
5374 // Special Registers
5375 // Return a pointer value
5376 operand rax_RegP()
5377 %{
5378 constraint(ALLOC_IN_RC(ptr_rax_reg));
5379 match(RegP);
5380 match(rRegP);
5381
5382 format %{ %}
5383 interface(REG_INTER);
5384 %}
5385
5386 // Special Registers
5387 // Return a compressed pointer value
5388 operand rax_RegN()
5389 %{
5390 constraint(ALLOC_IN_RC(int_rax_reg));
5391 match(RegN);
5392 match(rRegN);
5393
5394 format %{ %}
5395 interface(REG_INTER);
5396 %}
5397
5398 // Used in AtomicAdd
5399 operand rbx_RegP()
5400 %{
5401 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5402 match(RegP);
5403 match(rRegP);
5404
5405 format %{ %}
5406 interface(REG_INTER);
5407 %}
5408
5409 operand rsi_RegP()
5410 %{
5411 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5412 match(RegP);
5413 match(rRegP);
5414
5415 format %{ %}
5416 interface(REG_INTER);
5417 %}
5418
5419 operand rbp_RegP()
5420 %{
5421 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5422 match(RegP);
5423 match(rRegP);
5424
5425 format %{ %}
5426 interface(REG_INTER);
5427 %}
5428
5429 // Used in rep stosq
5430 operand rdi_RegP()
5431 %{
5432 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5433 match(RegP);
5434 match(rRegP);
5435
5436 format %{ %}
5437 interface(REG_INTER);
5438 %}
5439
5440 operand r15_RegP()
5441 %{
5442 constraint(ALLOC_IN_RC(ptr_r15_reg));
5443 match(RegP);
5444 match(rRegP);
5445
5446 format %{ %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand rRegL()
5451 %{
5452 constraint(ALLOC_IN_RC(long_reg));
5453 match(RegL);
5454 match(rax_RegL);
5455 match(rdx_RegL);
5456
5457 format %{ %}
5458 interface(REG_INTER);
5459 %}
5460
5461 // Special Registers
5462 operand no_rax_rdx_RegL()
5463 %{
5464 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5465 match(RegL);
5466 match(rRegL);
5467
5468 format %{ %}
5469 interface(REG_INTER);
5470 %}
5471
5472 operand rax_RegL()
5473 %{
5474 constraint(ALLOC_IN_RC(long_rax_reg));
5475 match(RegL);
5476 match(rRegL);
5477
5478 format %{ "RAX" %}
5479 interface(REG_INTER);
5480 %}
5481
5482 operand rcx_RegL()
5483 %{
5484 constraint(ALLOC_IN_RC(long_rcx_reg));
5485 match(RegL);
5486 match(rRegL);
5487
5488 format %{ %}
5489 interface(REG_INTER);
5490 %}
5491
5492 operand rdx_RegL()
5493 %{
5494 constraint(ALLOC_IN_RC(long_rdx_reg));
5495 match(RegL);
5496 match(rRegL);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 operand r11_RegL()
5503 %{
5504 constraint(ALLOC_IN_RC(long_r11_reg));
5505 match(RegL);
5506 match(rRegL);
5507
5508 format %{ %}
5509 interface(REG_INTER);
5510 %}
5511
5512 operand no_rbp_r13_RegL()
5513 %{
5514 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5515 match(RegL);
5516 match(rRegL);
5517 match(rax_RegL);
5518 match(rcx_RegL);
5519 match(rdx_RegL);
5520
5521 format %{ %}
5522 interface(REG_INTER);
5523 %}
5524
5525 // Flags register, used as output of compare instructions
5526 operand rFlagsReg()
5527 %{
5528 constraint(ALLOC_IN_RC(int_flags));
5529 match(RegFlags);
5530
5531 format %{ "RFLAGS" %}
5532 interface(REG_INTER);
5533 %}
5534
5535 // Flags register, used as output of FLOATING POINT compare instructions
5536 operand rFlagsRegU()
5537 %{
5538 constraint(ALLOC_IN_RC(int_flags));
5539 match(RegFlags);
5540
5541 format %{ "RFLAGS_U" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCF() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CF" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 operand rFlagsRegUCFE() %{
5555 constraint(ALLOC_IN_RC(int_flags));
5556 match(RegFlags);
5557 predicate(UseAPX && VM_Version::supports_avx10_2());
5558
5559 format %{ "RFLAGS_U_CFE" %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand regF() %{
5565 constraint(ALLOC_IN_RC(float_reg));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand legRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_legacy));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Float register operands
5582 operand vlRegF() %{
5583 constraint(ALLOC_IN_RC(float_reg_vl));
5584 match(RegF);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand regD() %{
5592 constraint(ALLOC_IN_RC(double_reg));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand legRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_legacy));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Double register operands
5609 operand vlRegD() %{
5610 constraint(ALLOC_IN_RC(double_reg_vl));
5611 match(RegD);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 //----------Memory Operands----------------------------------------------------
5618 // Direct Memory Operand
5619 // operand direct(immP addr)
5620 // %{
5621 // match(addr);
5622
5623 // format %{ "[$addr]" %}
5624 // interface(MEMORY_INTER) %{
5625 // base(0xFFFFFFFF);
5626 // index(0x4);
5627 // scale(0x0);
5628 // disp($addr);
5629 // %}
5630 // %}
5631
5632 // Indirect Memory Operand
5633 operand indirect(any_RegP reg)
5634 %{
5635 constraint(ALLOC_IN_RC(ptr_reg));
5636 match(reg);
5637
5638 format %{ "[$reg]" %}
5639 interface(MEMORY_INTER) %{
5640 base($reg);
5641 index(0x4);
5642 scale(0x0);
5643 disp(0x0);
5644 %}
5645 %}
5646
5647 // Indirect Memory Plus Short Offset Operand
5648 operand indOffset8(any_RegP reg, immL8 off)
5649 %{
5650 constraint(ALLOC_IN_RC(ptr_reg));
5651 match(AddP reg off);
5652
5653 format %{ "[$reg + $off (8-bit)]" %}
5654 interface(MEMORY_INTER) %{
5655 base($reg);
5656 index(0x4);
5657 scale(0x0);
5658 disp($off);
5659 %}
5660 %}
5661
5662 // Indirect Memory Plus Long Offset Operand
5663 operand indOffset32(any_RegP reg, immL32 off)
5664 %{
5665 constraint(ALLOC_IN_RC(ptr_reg));
5666 match(AddP reg off);
5667
5668 format %{ "[$reg + $off (32-bit)]" %}
5669 interface(MEMORY_INTER) %{
5670 base($reg);
5671 index(0x4);
5672 scale(0x0);
5673 disp($off);
5674 %}
5675 %}
5676
5677 // Indirect Memory Plus Index Register Plus Offset Operand
5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 match(AddP (AddP reg lreg) off);
5682
5683 op_cost(10);
5684 format %{"[$reg + $off + $lreg]" %}
5685 interface(MEMORY_INTER) %{
5686 base($reg);
5687 index($lreg);
5688 scale(0x0);
5689 disp($off);
5690 %}
5691 %}
5692
5693 // Indirect Memory Plus Index Register Plus Offset Operand
5694 operand indIndex(any_RegP reg, rRegL lreg)
5695 %{
5696 constraint(ALLOC_IN_RC(ptr_reg));
5697 match(AddP reg lreg);
5698
5699 op_cost(10);
5700 format %{"[$reg + $lreg]" %}
5701 interface(MEMORY_INTER) %{
5702 base($reg);
5703 index($lreg);
5704 scale(0x0);
5705 disp(0x0);
5706 %}
5707 %}
5708
5709 // Indirect Memory Times Scale Plus Index Register
5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5711 %{
5712 constraint(ALLOC_IN_RC(ptr_reg));
5713 match(AddP reg (LShiftL lreg scale));
5714
5715 op_cost(10);
5716 format %{"[$reg + $lreg << $scale]" %}
5717 interface(MEMORY_INTER) %{
5718 base($reg);
5719 index($lreg);
5720 scale($scale);
5721 disp(0x0);
5722 %}
5723 %}
5724
5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5726 %{
5727 constraint(ALLOC_IN_RC(ptr_reg));
5728 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5729 match(AddP reg (LShiftL (ConvI2L idx) scale));
5730
5731 op_cost(10);
5732 format %{"[$reg + pos $idx << $scale]" %}
5733 interface(MEMORY_INTER) %{
5734 base($reg);
5735 index($idx);
5736 scale($scale);
5737 disp(0x0);
5738 %}
5739 %}
5740
5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5743 %{
5744 constraint(ALLOC_IN_RC(ptr_reg));
5745 match(AddP (AddP reg (LShiftL lreg scale)) off);
5746
5747 op_cost(10);
5748 format %{"[$reg + $off + $lreg << $scale]" %}
5749 interface(MEMORY_INTER) %{
5750 base($reg);
5751 index($lreg);
5752 scale($scale);
5753 disp($off);
5754 %}
5755 %}
5756
5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5759 %{
5760 constraint(ALLOC_IN_RC(ptr_reg));
5761 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5762 match(AddP (AddP reg (ConvI2L idx)) off);
5763
5764 op_cost(10);
5765 format %{"[$reg + $off + $idx]" %}
5766 interface(MEMORY_INTER) %{
5767 base($reg);
5768 index($idx);
5769 scale(0x0);
5770 disp($off);
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
5811 %{
5812 predicate(CompressedOops::shift() == 0);
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 match(DecodeN reg);
5815
5816 format %{ "[$reg]" %}
5817 interface(MEMORY_INTER) %{
5818 base($reg);
5819 index(0x4);
5820 scale(0x0);
5821 disp(0x0);
5822 %}
5823 %}
5824
5825 // Indirect Memory Plus Short Offset Operand
5826 operand indOffset8Narrow(rRegN reg, immL8 off)
5827 %{
5828 predicate(CompressedOops::shift() == 0);
5829 constraint(ALLOC_IN_RC(ptr_reg));
5830 match(AddP (DecodeN reg) off);
5831
5832 format %{ "[$reg + $off (8-bit)]" %}
5833 interface(MEMORY_INTER) %{
5834 base($reg);
5835 index(0x4);
5836 scale(0x0);
5837 disp($off);
5838 %}
5839 %}
5840
5841 // Indirect Memory Plus Long Offset Operand
5842 operand indOffset32Narrow(rRegN reg, immL32 off)
5843 %{
5844 predicate(CompressedOops::shift() == 0);
5845 constraint(ALLOC_IN_RC(ptr_reg));
5846 match(AddP (DecodeN reg) off);
5847
5848 format %{ "[$reg + $off (32-bit)]" %}
5849 interface(MEMORY_INTER) %{
5850 base($reg);
5851 index(0x4);
5852 scale(0x0);
5853 disp($off);
5854 %}
5855 %}
5856
5857 // Indirect Memory Plus Index Register Plus Offset Operand
5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5859 %{
5860 predicate(CompressedOops::shift() == 0);
5861 constraint(ALLOC_IN_RC(ptr_reg));
5862 match(AddP (AddP (DecodeN reg) lreg) off);
5863
5864 op_cost(10);
5865 format %{"[$reg + $off + $lreg]" %}
5866 interface(MEMORY_INTER) %{
5867 base($reg);
5868 index($lreg);
5869 scale(0x0);
5870 disp($off);
5871 %}
5872 %}
5873
5874 // Indirect Memory Plus Index Register Plus Offset Operand
5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
5876 %{
5877 predicate(CompressedOops::shift() == 0);
5878 constraint(ALLOC_IN_RC(ptr_reg));
5879 match(AddP (DecodeN reg) lreg);
5880
5881 op_cost(10);
5882 format %{"[$reg + $lreg]" %}
5883 interface(MEMORY_INTER) %{
5884 base($reg);
5885 index($lreg);
5886 scale(0x0);
5887 disp(0x0);
5888 %}
5889 %}
5890
5891 // Indirect Memory Times Scale Plus Index Register
5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5893 %{
5894 predicate(CompressedOops::shift() == 0);
5895 constraint(ALLOC_IN_RC(ptr_reg));
5896 match(AddP (DecodeN reg) (LShiftL lreg scale));
5897
5898 op_cost(10);
5899 format %{"[$reg + $lreg << $scale]" %}
5900 interface(MEMORY_INTER) %{
5901 base($reg);
5902 index($lreg);
5903 scale($scale);
5904 disp(0x0);
5905 %}
5906 %}
5907
5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5910 %{
5911 predicate(CompressedOops::shift() == 0);
5912 constraint(ALLOC_IN_RC(ptr_reg));
5913 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5914
5915 op_cost(10);
5916 format %{"[$reg + $off + $lreg << $scale]" %}
5917 interface(MEMORY_INTER) %{
5918 base($reg);
5919 index($lreg);
5920 scale($scale);
5921 disp($off);
5922 %}
5923 %}
5924
5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5927 %{
5928 constraint(ALLOC_IN_RC(ptr_reg));
5929 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5930 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5931
5932 op_cost(10);
5933 format %{"[$reg + $off + $idx]" %}
5934 interface(MEMORY_INTER) %{
5935 base($reg);
5936 index($idx);
5937 scale(0x0);
5938 disp($off);
5939 %}
5940 %}
5941
5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5944 %{
5945 constraint(ALLOC_IN_RC(ptr_reg));
5946 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5947 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5948
5949 op_cost(10);
5950 format %{"[$reg + $off + $idx << $scale]" %}
5951 interface(MEMORY_INTER) %{
5952 base($reg);
5953 index($idx);
5954 scale($scale);
5955 disp($off);
5956 %}
5957 %}
5958
5959 //----------Special Memory Operands--------------------------------------------
5960 // Stack Slot Operand - This operand is used for loading and storing temporary
5961 // values on the stack where a match requires a value to
5962 // flow through memory.
5963 operand stackSlotP(sRegP reg)
5964 %{
5965 constraint(ALLOC_IN_RC(stack_slots));
5966 // No match rule because this operand is only generated in matching
5967
5968 format %{ "[$reg]" %}
5969 interface(MEMORY_INTER) %{
5970 base(0x4); // RSP
5971 index(0x4); // No Index
5972 scale(0x0); // No Scale
5973 disp($reg); // Stack Offset
5974 %}
5975 %}
5976
5977 operand stackSlotI(sRegI reg)
5978 %{
5979 constraint(ALLOC_IN_RC(stack_slots));
5980 // No match rule because this operand is only generated in matching
5981
5982 format %{ "[$reg]" %}
5983 interface(MEMORY_INTER) %{
5984 base(0x4); // RSP
5985 index(0x4); // No Index
5986 scale(0x0); // No Scale
5987 disp($reg); // Stack Offset
5988 %}
5989 %}
5990
5991 operand stackSlotF(sRegF reg)
5992 %{
5993 constraint(ALLOC_IN_RC(stack_slots));
5994 // No match rule because this operand is only generated in matching
5995
5996 format %{ "[$reg]" %}
5997 interface(MEMORY_INTER) %{
5998 base(0x4); // RSP
5999 index(0x4); // No Index
6000 scale(0x0); // No Scale
6001 disp($reg); // Stack Offset
6002 %}
6003 %}
6004
6005 operand stackSlotD(sRegD reg)
6006 %{
6007 constraint(ALLOC_IN_RC(stack_slots));
6008 // No match rule because this operand is only generated in matching
6009
6010 format %{ "[$reg]" %}
6011 interface(MEMORY_INTER) %{
6012 base(0x4); // RSP
6013 index(0x4); // No Index
6014 scale(0x0); // No Scale
6015 disp($reg); // Stack Offset
6016 %}
6017 %}
6018 operand stackSlotL(sRegL reg)
6019 %{
6020 constraint(ALLOC_IN_RC(stack_slots));
6021 // No match rule because this operand is only generated in matching
6022
6023 format %{ "[$reg]" %}
6024 interface(MEMORY_INTER) %{
6025 base(0x4); // RSP
6026 index(0x4); // No Index
6027 scale(0x0); // No Scale
6028 disp($reg); // Stack Offset
6029 %}
6030 %}
6031
6032 //----------Conditional Branch Operands----------------------------------------
6033 // Comparison Op - This is the operation of the comparison, and is limited to
6034 // the following set of codes:
6035 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6036 //
6037 // Other attributes of the comparison, such as unsignedness, are specified
6038 // by the comparison instruction that sets a condition code flags register.
6039 // That result is represented by a flags operand whose subtype is appropriate
6040 // to the unsignedness (etc.) of the comparison.
6041 //
6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6045
6046 // Comparison Code
6047 operand cmpOp()
6048 %{
6049 match(Bool);
6050
6051 format %{ "" %}
6052 interface(COND_INTER) %{
6053 equal(0x4, "e");
6054 not_equal(0x5, "ne");
6055 less(0xc, "l");
6056 greater_equal(0xd, "ge");
6057 less_equal(0xe, "le");
6058 greater(0xf, "g");
6059 overflow(0x0, "o");
6060 no_overflow(0x1, "no");
6061 %}
6062 %}
6063
6064 // Comparison Code, unsigned compare. Used by FP also, with
6065 // C2 (unordered) turned into GT or LT already. The other bits
6066 // C0 and C3 are turned into Carry & Zero flags.
6067 operand cmpOpU()
6068 %{
6069 match(Bool);
6070
6071 format %{ "" %}
6072 interface(COND_INTER) %{
6073 equal(0x4, "e");
6074 not_equal(0x5, "ne");
6075 less(0x2, "b");
6076 greater_equal(0x3, "ae");
6077 less_equal(0x6, "be");
6078 greater(0x7, "a");
6079 overflow(0x0, "o");
6080 no_overflow(0x1, "no");
6081 %}
6082 %}
6083
6084
6085 // Floating comparisons that don't require any fixup for the unordered case,
6086 // If both inputs of the comparison are the same, ZF is always set so we
6087 // don't need to use cmpOpUCF2 for eq/ne
6088 operand cmpOpUCF() %{
6089 match(Bool);
6090 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6091 (n->as_Bool()->_test._test == BoolTest::lt ||
6092 n->as_Bool()->_test._test == BoolTest::ge ||
6093 n->as_Bool()->_test._test == BoolTest::le ||
6094 n->as_Bool()->_test._test == BoolTest::gt ||
6095 n->in(1)->in(1) == n->in(1)->in(2)));
6096 format %{ "" %}
6097 interface(COND_INTER) %{
6098 equal(0xb, "np");
6099 not_equal(0xa, "p");
6100 less(0x2, "b");
6101 greater_equal(0x3, "ae");
6102 less_equal(0x6, "be");
6103 greater(0x7, "a");
6104 overflow(0x0, "o");
6105 no_overflow(0x1, "no");
6106 %}
6107 %}
6108
6109
6110 // Floating comparisons that can be fixed up with extra conditional jumps
6111 operand cmpOpUCF2() %{
6112 match(Bool);
6113 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6114 (n->as_Bool()->_test._test == BoolTest::ne ||
6115 n->as_Bool()->_test._test == BoolTest::eq) &&
6116 n->in(1)->in(1) != n->in(1)->in(2));
6117 format %{ "" %}
6118 interface(COND_INTER) %{
6119 equal(0x4, "e");
6120 not_equal(0x5, "ne");
6121 less(0x2, "b");
6122 greater_equal(0x3, "ae");
6123 less_equal(0x6, "be");
6124 greater(0x7, "a");
6125 overflow(0x0, "o");
6126 no_overflow(0x1, "no");
6127 %}
6128 %}
6129
6130
6131 // Floating point comparisons that set condition flags to test more directly,
6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
6134 // latter conditions to ones that use unsigned tests before passing into an
6135 // instruction because the preceding comparison might be based on a three way
6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6137 operand cmpOpUCFE()
6138 %{
6139 match(Bool);
6140 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6141 (n->as_Bool()->_test._test == BoolTest::ne ||
6142 n->as_Bool()->_test._test == BoolTest::eq ||
6143 n->as_Bool()->_test._test == BoolTest::lt ||
6144 n->as_Bool()->_test._test == BoolTest::ge ||
6145 n->as_Bool()->_test._test == BoolTest::le ||
6146 n->as_Bool()->_test._test == BoolTest::gt));
6147
6148 format %{ "" %}
6149 interface(COND_INTER) %{
6150 equal(0x4, "e");
6151 not_equal(0x5, "ne");
6152 less(0x2, "b");
6153 greater_equal(0x3, "ae");
6154 less_equal(0x6, "be");
6155 greater(0x7, "a");
6156 overflow(0x0, "o");
6157 no_overflow(0x1, "no");
6158 %}
6159 %}
6160
6161 // Operands for bound floating pointer register arguments
6162 operand rxmm0() %{
6163 constraint(ALLOC_IN_RC(xmm0_reg));
6164 match(VecX);
6165 format%{%}
6166 interface(REG_INTER);
6167 %}
6168
6169 // Vectors
6170
6171 // Dummy generic vector class. Should be used for all vector operands.
6172 // Replaced with vec[SDXYZ] during post-selection pass.
6173 operand vec() %{
6174 constraint(ALLOC_IN_RC(dynamic));
6175 match(VecX);
6176 match(VecY);
6177 match(VecZ);
6178 match(VecS);
6179 match(VecD);
6180
6181 format %{ %}
6182 interface(REG_INTER);
6183 %}
6184
6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6188 // runtime code generation via reg_class_dynamic.
6189 operand legVec() %{
6190 constraint(ALLOC_IN_RC(dynamic));
6191 match(VecX);
6192 match(VecY);
6193 match(VecZ);
6194 match(VecS);
6195 match(VecD);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces vec during post-selection cleanup. See above.
6202 operand vecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces legVec during post-selection cleanup. See above.
6211 operand legVecS() %{
6212 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6213 match(VecS);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces vec during post-selection cleanup. See above.
6220 operand vecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces legVec during post-selection cleanup. See above.
6229 operand legVecD() %{
6230 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6231 match(VecD);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces vec during post-selection cleanup. See above.
6238 operand vecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces legVec during post-selection cleanup. See above.
6247 operand legVecX() %{
6248 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6249 match(VecX);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces vec during post-selection cleanup. See above.
6256 operand vecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces legVec during post-selection cleanup. See above.
6265 operand legVecY() %{
6266 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6267 match(VecY);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces vec during post-selection cleanup. See above.
6274 operand vecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
6321 // Generic P2/P3 pipeline
6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6323 // 3 instructions decoded per cycle.
6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6325 // 3 ALU op, only ALU0 handles mul instructions.
6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6327 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6328 BR, FPU,
6329 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6330
6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
6332 // Pipeline Description specifies the stages in the machine's pipeline
6333
6334 // Generic P2/P3 pipeline
6335 pipe_desc(S0, S1, S2, S3, S4, S5);
6336
6337 //----------PIPELINE CLASSES---------------------------------------------------
6338 // Pipeline Classes describe the stages in which input and output are
6339 // referenced by the hardware pipeline.
6340
6341 // Naming convention: ialu or fpu
6342 // Then: _reg
6343 // Then: _reg if there is a 2nd register
6344 // Then: _long if it's a pair of instructions implementing a long
6345 // Then: _fat if it requires the big decoder
6346 // Or: _mem if it requires the big decoder and a memory unit.
6347
6348 // Integer ALU reg operation
6349 pipe_class ialu_reg(rRegI dst)
6350 %{
6351 single_instruction;
6352 dst : S4(write);
6353 dst : S3(read);
6354 DECODE : S0; // any decoder
6355 ALU : S3; // any alu
6356 %}
6357
6358 // Long ALU reg operation
6359 pipe_class ialu_reg_long(rRegL dst)
6360 %{
6361 instruction_count(2);
6362 dst : S4(write);
6363 dst : S3(read);
6364 DECODE : S0(2); // any 2 decoders
6365 ALU : S3(2); // both alus
6366 %}
6367
6368 // Integer ALU reg operation using big decoder
6369 pipe_class ialu_reg_fat(rRegI dst)
6370 %{
6371 single_instruction;
6372 dst : S4(write);
6373 dst : S3(read);
6374 D0 : S0; // big decoder only
6375 ALU : S3; // any alu
6376 %}
6377
6378 // Integer ALU reg-reg operation
6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6380 %{
6381 single_instruction;
6382 dst : S4(write);
6383 src : S3(read);
6384 DECODE : S0; // any decoder
6385 ALU : S3; // any alu
6386 %}
6387
6388 // Integer ALU reg-reg operation
6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6390 %{
6391 single_instruction;
6392 dst : S4(write);
6393 src : S3(read);
6394 D0 : S0; // big decoder only
6395 ALU : S3; // any alu
6396 %}
6397
6398 // Integer ALU reg-mem operation
6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6400 %{
6401 single_instruction;
6402 dst : S5(write);
6403 mem : S3(read);
6404 D0 : S0; // big decoder only
6405 ALU : S4; // any alu
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer mem operation (prefetch)
6410 pipe_class ialu_mem(memory mem)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 D0 : S0; // big decoder only
6415 MEM : S3; // any mem
6416 %}
6417
6418 // Integer Store to Memory
6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
6420 %{
6421 single_instruction;
6422 mem : S3(read);
6423 src : S5(read);
6424 D0 : S0; // big decoder only
6425 ALU : S4; // any alu
6426 MEM : S3;
6427 %}
6428
6429 // // Long Store to Memory
6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6431 // %{
6432 // instruction_count(2);
6433 // mem : S3(read);
6434 // src : S5(read);
6435 // D0 : S0(2); // big decoder only; twice
6436 // ALU : S4(2); // any 2 alus
6437 // MEM : S3(2); // Both mems
6438 // %}
6439
6440 // Integer Store to Memory
6441 pipe_class ialu_mem_imm(memory mem)
6442 %{
6443 single_instruction;
6444 mem : S3(read);
6445 D0 : S0; // big decoder only
6446 ALU : S4; // any alu
6447 MEM : S3;
6448 %}
6449
6450 // Integer ALU0 reg-reg operation
6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6452 %{
6453 single_instruction;
6454 dst : S4(write);
6455 src : S3(read);
6456 D0 : S0; // Big decoder only
6457 ALU0 : S3; // only alu0
6458 %}
6459
6460 // Integer ALU0 reg-mem operation
6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6462 %{
6463 single_instruction;
6464 dst : S5(write);
6465 mem : S3(read);
6466 D0 : S0; // big decoder only
6467 ALU0 : S4; // ALU0 only
6468 MEM : S3; // any mem
6469 %}
6470
6471 // Integer ALU reg-reg operation
6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6473 %{
6474 single_instruction;
6475 cr : S4(write);
6476 src1 : S3(read);
6477 src2 : S3(read);
6478 DECODE : S0; // any decoder
6479 ALU : S3; // any alu
6480 %}
6481
6482 // Integer ALU reg-imm operation
6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6484 %{
6485 single_instruction;
6486 cr : S4(write);
6487 src1 : S3(read);
6488 DECODE : S0; // any decoder
6489 ALU : S3; // any alu
6490 %}
6491
6492 // Integer ALU reg-mem operation
6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6494 %{
6495 single_instruction;
6496 cr : S4(write);
6497 src1 : S3(read);
6498 src2 : S3(read);
6499 D0 : S0; // big decoder only
6500 ALU : S4; // any alu
6501 MEM : S3;
6502 %}
6503
6504 // Conditional move reg-reg
6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6506 %{
6507 instruction_count(4);
6508 y : S4(read);
6509 q : S3(read);
6510 p : S3(read);
6511 DECODE : S0(4); // any decoder
6512 %}
6513
6514 // Conditional move reg-reg
6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6516 %{
6517 single_instruction;
6518 dst : S4(write);
6519 src : S3(read);
6520 cr : S3(read);
6521 DECODE : S0; // any decoder
6522 %}
6523
6524 // Conditional move reg-mem
6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6526 %{
6527 single_instruction;
6528 dst : S4(write);
6529 src : S3(read);
6530 cr : S3(read);
6531 DECODE : S0; // any decoder
6532 MEM : S3;
6533 %}
6534
6535 // Conditional move reg-reg long
6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6537 %{
6538 single_instruction;
6539 dst : S4(write);
6540 src : S3(read);
6541 cr : S3(read);
6542 DECODE : S0(2); // any 2 decoders
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg(regD dst)
6547 %{
6548 instruction_count(2);
6549 dst : S3(read);
6550 DECODE : S0(2); // any 2 decoders
6551 FPU : S3;
6552 %}
6553
6554 // Float reg-reg operation
6555 pipe_class fpu_reg_reg(regD dst, regD src)
6556 %{
6557 instruction_count(2);
6558 dst : S4(write);
6559 src : S3(read);
6560 DECODE : S0(2); // any 2 decoders
6561 FPU : S3;
6562 %}
6563
6564 // Float reg-reg operation
6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6566 %{
6567 instruction_count(3);
6568 dst : S4(write);
6569 src1 : S3(read);
6570 src2 : S3(read);
6571 DECODE : S0(3); // any 3 decoders
6572 FPU : S3(2);
6573 %}
6574
6575 // Float reg-reg operation
6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6577 %{
6578 instruction_count(4);
6579 dst : S4(write);
6580 src1 : S3(read);
6581 src2 : S3(read);
6582 src3 : S3(read);
6583 DECODE : S0(4); // any 3 decoders
6584 FPU : S3(2);
6585 %}
6586
6587 // Float reg-reg operation
6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6589 %{
6590 instruction_count(4);
6591 dst : S4(write);
6592 src1 : S3(read);
6593 src2 : S3(read);
6594 src3 : S3(read);
6595 DECODE : S1(3); // any 3 decoders
6596 D0 : S0; // Big decoder only
6597 FPU : S3(2);
6598 MEM : S3;
6599 %}
6600
6601 // Float reg-mem operation
6602 pipe_class fpu_reg_mem(regD dst, memory mem)
6603 %{
6604 instruction_count(2);
6605 dst : S5(write);
6606 mem : S3(read);
6607 D0 : S0; // big decoder only
6608 DECODE : S1; // any decoder for FPU POP
6609 FPU : S4;
6610 MEM : S3; // any mem
6611 %}
6612
6613 // Float reg-mem operation
6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6615 %{
6616 instruction_count(3);
6617 dst : S5(write);
6618 src1 : S3(read);
6619 mem : S3(read);
6620 D0 : S0; // big decoder only
6621 DECODE : S1(2); // any decoder for FPU POP
6622 FPU : S4;
6623 MEM : S3; // any mem
6624 %}
6625
6626 // Float mem-reg operation
6627 pipe_class fpu_mem_reg(memory mem, regD src)
6628 %{
6629 instruction_count(2);
6630 src : S5(read);
6631 mem : S3(read);
6632 DECODE : S0; // any decoder for FPU PUSH
6633 D0 : S1; // big decoder only
6634 FPU : S4;
6635 MEM : S3; // any mem
6636 %}
6637
6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6639 %{
6640 instruction_count(3);
6641 src1 : S3(read);
6642 src2 : S3(read);
6643 mem : S3(read);
6644 DECODE : S0(2); // any decoder for FPU PUSH
6645 D0 : S1; // big decoder only
6646 FPU : S4;
6647 MEM : S3; // any mem
6648 %}
6649
6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6651 %{
6652 instruction_count(3);
6653 src1 : S3(read);
6654 src2 : S3(read);
6655 mem : S4(read);
6656 DECODE : S0; // any decoder for FPU PUSH
6657 D0 : S0(2); // big decoder only
6658 FPU : S4;
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem(memory dst, memory src1)
6663 %{
6664 instruction_count(2);
6665 src1 : S3(read);
6666 dst : S4(read);
6667 D0 : S0(2); // big decoder only
6668 MEM : S3(2); // any mem
6669 %}
6670
6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6672 %{
6673 instruction_count(3);
6674 src1 : S3(read);
6675 src2 : S3(read);
6676 dst : S4(read);
6677 D0 : S0(3); // big decoder only
6678 FPU : S4;
6679 MEM : S3(3); // any mem
6680 %}
6681
6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6683 %{
6684 instruction_count(3);
6685 src1 : S4(read);
6686 mem : S4(read);
6687 DECODE : S0; // any decoder for FPU PUSH
6688 D0 : S0(2); // big decoder only
6689 FPU : S4;
6690 MEM : S3(2); // any mem
6691 %}
6692
6693 // Float load constant
6694 pipe_class fpu_reg_con(regD dst)
6695 %{
6696 instruction_count(2);
6697 dst : S5(write);
6698 D0 : S0; // big decoder only for the load
6699 DECODE : S1; // any decoder for FPU POP
6700 FPU : S4;
6701 MEM : S3; // any mem
6702 %}
6703
6704 // Float load constant
6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
6706 %{
6707 instruction_count(3);
6708 dst : S5(write);
6709 src : S3(read);
6710 D0 : S0; // big decoder only for the load
6711 DECODE : S1(2); // any decoder for FPU POP
6712 FPU : S4;
6713 MEM : S3; // any mem
6714 %}
6715
6716 // UnConditional branch
6717 pipe_class pipe_jmp(label labl)
6718 %{
6719 single_instruction;
6720 BR : S3;
6721 %}
6722
6723 // Conditional branch
6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6725 %{
6726 single_instruction;
6727 cr : S1(read);
6728 BR : S3;
6729 %}
6730
6731 // Allocation idiom
6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6733 %{
6734 instruction_count(1); force_serialization;
6735 fixed_latency(6);
6736 heap_ptr : S3(read);
6737 DECODE : S0(3);
6738 D0 : S2;
6739 MEM : S3;
6740 ALU : S3(2);
6741 dst : S5(write);
6742 BR : S5;
6743 %}
6744
6745 // Generic big/slow expanded idiom
6746 pipe_class pipe_slow()
6747 %{
6748 instruction_count(10); multiple_bundles; force_serialization;
6749 fixed_latency(100);
6750 D0 : S0(2);
6751 MEM : S3(2);
6752 %}
6753
6754 // The real do-nothing guy
6755 pipe_class empty()
6756 %{
6757 instruction_count(0);
6758 %}
6759
6760 // Define the class for the Nop node
6761 define
6762 %{
6763 MachNop = empty;
6764 %}
6765
6766 %}
6767
6768 //----------INSTRUCTIONS-------------------------------------------------------
6769 //
6770 // match -- States which machine-independent subtree may be replaced
6771 // by this instruction.
6772 // ins_cost -- The estimated cost of this instruction is used by instruction
6773 // selection to identify a minimum cost tree of machine
6774 // instructions that matches a tree of machine-independent
6775 // instructions.
6776 // format -- A string providing the disassembly for this instruction.
6777 // The value of an instruction's operand may be inserted
6778 // by referring to it with a '$' prefix.
6779 // opcode -- Three instruction opcodes may be provided. These are referred
6780 // to within an encode class as $primary, $secondary, and $tertiary
6781 // rrspectively. The primary opcode is commonly used to
6782 // indicate the type of machine instruction, while secondary
6783 // and tertiary are often used for prefix options or addressing
6784 // modes.
6785 // ins_encode -- A list of encode classes with parameters. The encode class
6786 // name must have been defined in an 'enc_class' specification
6787 // in the encode section of the architecture description.
6788
6789 // ============================================================================
6790
6791 instruct ShouldNotReachHere() %{
6792 match(Halt);
6793 format %{ "stop\t# ShouldNotReachHere" %}
6794 ins_encode %{
6795 if (is_reachable()) {
6796 const char* str = __ code_string(_halt_reason);
6797 __ stop(str);
6798 }
6799 %}
6800 ins_pipe(pipe_slow);
6801 %}
6802
6803 // ============================================================================
6804
6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6806 // Load Float
6807 instruct MoveF2VL(vlRegF dst, regF src) %{
6808 match(Set dst src);
6809 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6810 ins_encode %{
6811 ShouldNotReachHere();
6812 %}
6813 ins_pipe( fpu_reg_reg );
6814 %}
6815
6816 // Load Float
6817 instruct MoveF2LEG(legRegF dst, regF src) %{
6818 match(Set dst src);
6819 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6820 ins_encode %{
6821 ShouldNotReachHere();
6822 %}
6823 ins_pipe( fpu_reg_reg );
6824 %}
6825
6826 // Load Float
6827 instruct MoveVL2F(regF dst, vlRegF src) %{
6828 match(Set dst src);
6829 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6830 ins_encode %{
6831 ShouldNotReachHere();
6832 %}
6833 ins_pipe( fpu_reg_reg );
6834 %}
6835
6836 // Load Float
6837 instruct MoveLEG2F(regF dst, legRegF src) %{
6838 match(Set dst src);
6839 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6840 ins_encode %{
6841 ShouldNotReachHere();
6842 %}
6843 ins_pipe( fpu_reg_reg );
6844 %}
6845
6846 // Load Double
6847 instruct MoveD2VL(vlRegD dst, regD src) %{
6848 match(Set dst src);
6849 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6850 ins_encode %{
6851 ShouldNotReachHere();
6852 %}
6853 ins_pipe( fpu_reg_reg );
6854 %}
6855
6856 // Load Double
6857 instruct MoveD2LEG(legRegD dst, regD src) %{
6858 match(Set dst src);
6859 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6860 ins_encode %{
6861 ShouldNotReachHere();
6862 %}
6863 ins_pipe( fpu_reg_reg );
6864 %}
6865
6866 // Load Double
6867 instruct MoveVL2D(regD dst, vlRegD src) %{
6868 match(Set dst src);
6869 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6870 ins_encode %{
6871 ShouldNotReachHere();
6872 %}
6873 ins_pipe( fpu_reg_reg );
6874 %}
6875
6876 // Load Double
6877 instruct MoveLEG2D(regD dst, legRegD src) %{
6878 match(Set dst src);
6879 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6880 ins_encode %{
6881 ShouldNotReachHere();
6882 %}
6883 ins_pipe( fpu_reg_reg );
6884 %}
6885
6886 //----------Load/Store/Move Instructions---------------------------------------
6887 //----------Load Instructions--------------------------------------------------
6888
6889 // Load Byte (8 bit signed)
6890 instruct loadB(rRegI dst, memory mem)
6891 %{
6892 match(Set dst (LoadB mem));
6893
6894 ins_cost(125);
6895 format %{ "movsbl $dst, $mem\t# byte" %}
6896
6897 ins_encode %{
6898 __ movsbl($dst$$Register, $mem$$Address);
6899 %}
6900
6901 ins_pipe(ialu_reg_mem);
6902 %}
6903
6904 // Load Byte (8 bit signed) into Long Register
6905 instruct loadB2L(rRegL dst, memory mem)
6906 %{
6907 match(Set dst (ConvI2L (LoadB mem)));
6908
6909 ins_cost(125);
6910 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6911
6912 ins_encode %{
6913 __ movsbq($dst$$Register, $mem$$Address);
6914 %}
6915
6916 ins_pipe(ialu_reg_mem);
6917 %}
6918
6919 // Load Unsigned Byte (8 bit UNsigned)
6920 instruct loadUB(rRegI dst, memory mem)
6921 %{
6922 match(Set dst (LoadUB mem));
6923
6924 ins_cost(125);
6925 format %{ "movzbl $dst, $mem\t# ubyte" %}
6926
6927 ins_encode %{
6928 __ movzbl($dst$$Register, $mem$$Address);
6929 %}
6930
6931 ins_pipe(ialu_reg_mem);
6932 %}
6933
6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6935 instruct loadUB2L(rRegL dst, memory mem)
6936 %{
6937 match(Set dst (ConvI2L (LoadUB mem)));
6938
6939 ins_cost(125);
6940 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6941
6942 ins_encode %{
6943 __ movzbq($dst$$Register, $mem$$Address);
6944 %}
6945
6946 ins_pipe(ialu_reg_mem);
6947 %}
6948
6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6951 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6952 effect(KILL cr);
6953
6954 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6955 "andl $dst, right_n_bits($mask, 8)" %}
6956 ins_encode %{
6957 Register Rdst = $dst$$Register;
6958 __ movzbq(Rdst, $mem$$Address);
6959 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6960 %}
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Short (16 bit signed)
6965 instruct loadS(rRegI dst, memory mem)
6966 %{
6967 match(Set dst (LoadS mem));
6968
6969 ins_cost(125);
6970 format %{ "movswl $dst, $mem\t# short" %}
6971
6972 ins_encode %{
6973 __ movswl($dst$$Register, $mem$$Address);
6974 %}
6975
6976 ins_pipe(ialu_reg_mem);
6977 %}
6978
6979 // Load Short (16 bit signed) to Byte (8 bit signed)
6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6981 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6982
6983 ins_cost(125);
6984 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6985 ins_encode %{
6986 __ movsbl($dst$$Register, $mem$$Address);
6987 %}
6988 ins_pipe(ialu_reg_mem);
6989 %}
6990
6991 // Load Short (16 bit signed) into Long Register
6992 instruct loadS2L(rRegL dst, memory mem)
6993 %{
6994 match(Set dst (ConvI2L (LoadS mem)));
6995
6996 ins_cost(125);
6997 format %{ "movswq $dst, $mem\t# short -> long" %}
6998
6999 ins_encode %{
7000 __ movswq($dst$$Register, $mem$$Address);
7001 %}
7002
7003 ins_pipe(ialu_reg_mem);
7004 %}
7005
7006 // Load Unsigned Short/Char (16 bit UNsigned)
7007 instruct loadUS(rRegI dst, memory mem)
7008 %{
7009 match(Set dst (LoadUS mem));
7010
7011 ins_cost(125);
7012 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7013
7014 ins_encode %{
7015 __ movzwl($dst$$Register, $mem$$Address);
7016 %}
7017
7018 ins_pipe(ialu_reg_mem);
7019 %}
7020
7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7023 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7024
7025 ins_cost(125);
7026 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7027 ins_encode %{
7028 __ movsbl($dst$$Register, $mem$$Address);
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7034 instruct loadUS2L(rRegL dst, memory mem)
7035 %{
7036 match(Set dst (ConvI2L (LoadUS mem)));
7037
7038 ins_cost(125);
7039 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7040
7041 ins_encode %{
7042 __ movzwq($dst$$Register, $mem$$Address);
7043 %}
7044
7045 ins_pipe(ialu_reg_mem);
7046 %}
7047
7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7050 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7051
7052 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7053 ins_encode %{
7054 __ movzbq($dst$$Register, $mem$$Address);
7055 %}
7056 ins_pipe(ialu_reg_mem);
7057 %}
7058
7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7061 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7062 effect(KILL cr);
7063
7064 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7065 "andl $dst, right_n_bits($mask, 16)" %}
7066 ins_encode %{
7067 Register Rdst = $dst$$Register;
7068 __ movzwq(Rdst, $mem$$Address);
7069 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7070 %}
7071 ins_pipe(ialu_reg_mem);
7072 %}
7073
7074 // Load Integer
7075 instruct loadI(rRegI dst, memory mem)
7076 %{
7077 match(Set dst (LoadI mem));
7078
7079 ins_cost(125);
7080 format %{ "movl $dst, $mem\t# int" %}
7081
7082 ins_encode %{
7083 __ movl($dst$$Register, $mem$$Address);
7084 %}
7085
7086 ins_pipe(ialu_reg_mem);
7087 %}
7088
7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7091 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7092
7093 ins_cost(125);
7094 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7095 ins_encode %{
7096 __ movsbl($dst$$Register, $mem$$Address);
7097 %}
7098 ins_pipe(ialu_reg_mem);
7099 %}
7100
7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7103 match(Set dst (AndI (LoadI mem) mask));
7104
7105 ins_cost(125);
7106 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7107 ins_encode %{
7108 __ movzbl($dst$$Register, $mem$$Address);
7109 %}
7110 ins_pipe(ialu_reg_mem);
7111 %}
7112
7113 // Load Integer (32 bit signed) to Short (16 bit signed)
7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7115 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7116
7117 ins_cost(125);
7118 format %{ "movswl $dst, $mem\t# int -> short" %}
7119 ins_encode %{
7120 __ movswl($dst$$Register, $mem$$Address);
7121 %}
7122 ins_pipe(ialu_reg_mem);
7123 %}
7124
7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7127 match(Set dst (AndI (LoadI mem) mask));
7128
7129 ins_cost(125);
7130 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7131 ins_encode %{
7132 __ movzwl($dst$$Register, $mem$$Address);
7133 %}
7134 ins_pipe(ialu_reg_mem);
7135 %}
7136
7137 // Load Integer into Long Register
7138 instruct loadI2L(rRegL dst, memory mem)
7139 %{
7140 match(Set dst (ConvI2L (LoadI mem)));
7141
7142 ins_cost(125);
7143 format %{ "movslq $dst, $mem\t# int -> long" %}
7144
7145 ins_encode %{
7146 __ movslq($dst$$Register, $mem$$Address);
7147 %}
7148
7149 ins_pipe(ialu_reg_mem);
7150 %}
7151
7152 // Load Integer with mask 0xFF into Long Register
7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7154 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7155
7156 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7157 ins_encode %{
7158 __ movzbq($dst$$Register, $mem$$Address);
7159 %}
7160 ins_pipe(ialu_reg_mem);
7161 %}
7162
7163 // Load Integer with mask 0xFFFF into Long Register
7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7165 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7166
7167 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7168 ins_encode %{
7169 __ movzwq($dst$$Register, $mem$$Address);
7170 %}
7171 ins_pipe(ialu_reg_mem);
7172 %}
7173
7174 // Load Integer with a 31-bit mask into Long Register
7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7176 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7177 effect(KILL cr);
7178
7179 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7180 "andl $dst, $mask" %}
7181 ins_encode %{
7182 Register Rdst = $dst$$Register;
7183 __ movl(Rdst, $mem$$Address);
7184 __ andl(Rdst, $mask$$constant);
7185 %}
7186 ins_pipe(ialu_reg_mem);
7187 %}
7188
7189 // Load Unsigned Integer into Long Register
7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7191 %{
7192 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7193
7194 ins_cost(125);
7195 format %{ "movl $dst, $mem\t# uint -> long" %}
7196
7197 ins_encode %{
7198 __ movl($dst$$Register, $mem$$Address);
7199 %}
7200
7201 ins_pipe(ialu_reg_mem);
7202 %}
7203
7204 // Load Long
7205 instruct loadL(rRegL dst, memory mem)
7206 %{
7207 match(Set dst (LoadL mem));
7208
7209 ins_cost(125);
7210 format %{ "movq $dst, $mem\t# long" %}
7211
7212 ins_encode %{
7213 __ movq($dst$$Register, $mem$$Address);
7214 %}
7215
7216 ins_pipe(ialu_reg_mem); // XXX
7217 %}
7218
7219 // Load Range
7220 instruct loadRange(rRegI dst, memory mem)
7221 %{
7222 match(Set dst (LoadRange mem));
7223
7224 ins_cost(125); // XXX
7225 format %{ "movl $dst, $mem\t# range" %}
7226 ins_encode %{
7227 __ movl($dst$$Register, $mem$$Address);
7228 %}
7229 ins_pipe(ialu_reg_mem);
7230 %}
7231
7232 // Load Pointer
7233 instruct loadP(rRegP dst, memory mem)
7234 %{
7235 match(Set dst (LoadP mem));
7236 predicate(n->as_Load()->barrier_data() == 0);
7237
7238 ins_cost(125); // XXX
7239 format %{ "movq $dst, $mem\t# ptr" %}
7240 ins_encode %{
7241 __ movq($dst$$Register, $mem$$Address);
7242 %}
7243 ins_pipe(ialu_reg_mem); // XXX
7244 %}
7245
7246 // Load Compressed Pointer
7247 instruct loadN(rRegN dst, memory mem)
7248 %{
7249 predicate(n->as_Load()->barrier_data() == 0);
7250 match(Set dst (LoadN mem));
7251
7252 ins_cost(125); // XXX
7253 format %{ "movl $dst, $mem\t# compressed ptr" %}
7254 ins_encode %{
7255 __ movl($dst$$Register, $mem$$Address);
7256 %}
7257 ins_pipe(ialu_reg_mem); // XXX
7258 %}
7259
7260
7261 // Load Klass Pointer
7262 instruct loadKlass(rRegP dst, memory mem)
7263 %{
7264 match(Set dst (LoadKlass mem));
7265
7266 ins_cost(125); // XXX
7267 format %{ "movq $dst, $mem\t# class" %}
7268 ins_encode %{
7269 __ movq($dst$$Register, $mem$$Address);
7270 %}
7271 ins_pipe(ialu_reg_mem); // XXX
7272 %}
7273
7274 // Load narrow Klass Pointer
7275 instruct loadNKlass(rRegN dst, memory mem)
7276 %{
7277 predicate(!UseCompactObjectHeaders);
7278 match(Set dst (LoadNKlass mem));
7279
7280 ins_cost(125); // XXX
7281 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7282 ins_encode %{
7283 __ movl($dst$$Register, $mem$$Address);
7284 %}
7285 ins_pipe(ialu_reg_mem); // XXX
7286 %}
7287
7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7289 %{
7290 predicate(UseCompactObjectHeaders);
7291 match(Set dst (LoadNKlass mem));
7292 effect(KILL cr);
7293 ins_cost(125);
7294 format %{
7295 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7296 "shrl $dst, markWord::klass_shift_at_offset"
7297 %}
7298 ins_encode %{
7299 if (UseAPX) {
7300 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7301 }
7302 else {
7303 __ movl($dst$$Register, $mem$$Address);
7304 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7305 }
7306 %}
7307 ins_pipe(ialu_reg_mem);
7308 %}
7309
7310 // Load Float
7311 instruct loadF(regF dst, memory mem)
7312 %{
7313 match(Set dst (LoadF mem));
7314
7315 ins_cost(145); // XXX
7316 format %{ "movss $dst, $mem\t# float" %}
7317 ins_encode %{
7318 __ movflt($dst$$XMMRegister, $mem$$Address);
7319 %}
7320 ins_pipe(pipe_slow); // XXX
7321 %}
7322
7323 // Load Double
7324 instruct loadD_partial(regD dst, memory mem)
7325 %{
7326 predicate(!UseXmmLoadAndClearUpper);
7327 match(Set dst (LoadD mem));
7328
7329 ins_cost(145); // XXX
7330 format %{ "movlpd $dst, $mem\t# double" %}
7331 ins_encode %{
7332 __ movdbl($dst$$XMMRegister, $mem$$Address);
7333 %}
7334 ins_pipe(pipe_slow); // XXX
7335 %}
7336
7337 instruct loadD(regD dst, memory mem)
7338 %{
7339 predicate(UseXmmLoadAndClearUpper);
7340 match(Set dst (LoadD mem));
7341
7342 ins_cost(145); // XXX
7343 format %{ "movsd $dst, $mem\t# double" %}
7344 ins_encode %{
7345 __ movdbl($dst$$XMMRegister, $mem$$Address);
7346 %}
7347 ins_pipe(pipe_slow); // XXX
7348 %}
7349
7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7351 %{
7352 match(Set dst con);
7353
7354 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7355
7356 ins_encode %{
7357 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7358 %}
7359
7360 ins_pipe(ialu_reg_fat);
7361 %}
7362
7363 // min = java.lang.Math.min(float a, float b)
7364 // max = java.lang.Math.max(float a, float b)
7365 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7366 %{
7367 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7368 match(Set dst (MaxF a b));
7369 match(Set dst (MinF a b));
7370
7371 format %{ "minmaxF $dst, $a, $b" %}
7372 ins_encode %{
7373 int opcode = this->ideal_Opcode();
7374 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7375 %}
7376 ins_pipe( pipe_slow );
7377 %}
7378
7379 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
7380 %{
7381 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7382 match(Set dst (MaxF a b));
7383 match(Set dst (MinF a b));
7384 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7385
7386 format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7387 ins_encode %{
7388 int opcode = this->ideal_Opcode();
7389 bool min = (opcode == Op_MinF) ? true : false;
7390 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7391 min, fp_prec_flt /*pt*/);
7392 %}
7393 ins_pipe( pipe_slow );
7394 %}
7395
7396 // min = java.lang.Math.min(float a, float b)
7397 // max = java.lang.Math.max(float a, float b)
7398 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7399 %{
7400 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7401 match(Set dst (MaxF a b));
7402 match(Set dst (MinF a b));
7403 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7404
7405 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7406 ins_encode %{
7407 int opcode = this->ideal_Opcode();
7408 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7409 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7410 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7411 %}
7412 ins_pipe( pipe_slow );
7413 %}
7414
7415 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
7416 %{
7417 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7418 match(Set dst (MaxF a b));
7419 match(Set dst (MinF a b));
7420 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7421
7422 format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7423 ins_encode %{
7424 int opcode = this->ideal_Opcode();
7425 bool min = (opcode == Op_MinF) ? true : false;
7426 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7427 min, fp_prec_flt /*pt*/);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 // min = java.lang.Math.min(double a, double b)
7433 // max = java.lang.Math.max(double a, double b)
7434 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7435 %{
7436 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7437 match(Set dst (MaxD a b));
7438 match(Set dst (MinD a b));
7439
7440 format %{ "minmaxD $dst, $a, $b" %}
7441 ins_encode %{
7442 int opcode = this->ideal_Opcode();
7443 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
7449 %{
7450 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7451 match(Set dst (MaxD a b));
7452 match(Set dst (MinD a b));
7453 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7454
7455 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7456 ins_encode %{
7457 int opcode = this->ideal_Opcode();
7458 bool min = (opcode == Op_MinD) ? true : false;
7459 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7460 min, fp_prec_dbl /*pt*/);
7461 %}
7462 ins_pipe( pipe_slow );
7463 %}
7464
7465 // min = java.lang.Math.min(double a, double b)
7466 // max = java.lang.Math.max(double a, double b)
7467 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7468 %{
7469 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7470 match(Set dst (MaxD a b));
7471 match(Set dst (MinD a b));
7472 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7473
7474 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7475 ins_encode %{
7476 int opcode = this->ideal_Opcode();
7477 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7478 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7479 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7480 %}
7481 ins_pipe( pipe_slow );
7482 %}
7483
7484 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
7485 %{
7486 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7487 match(Set dst (MaxD a b));
7488 match(Set dst (MinD a b));
7489 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7490
7491 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7492 ins_encode %{
7493 int opcode = this->ideal_Opcode();
7494 bool min = (opcode == Op_MinD) ? true : false;
7495 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7496 min, fp_prec_dbl /*pt*/);
7497 %}
7498 ins_pipe( pipe_slow );
7499 %}
7500
7501 // Load Effective Address
7502 instruct leaP8(rRegP dst, indOffset8 mem)
7503 %{
7504 match(Set dst mem);
7505
7506 ins_cost(110); // XXX
7507 format %{ "leaq $dst, $mem\t# ptr 8" %}
7508 ins_encode %{
7509 __ leaq($dst$$Register, $mem$$Address);
7510 %}
7511 ins_pipe(ialu_reg_reg_fat);
7512 %}
7513
7514 instruct leaP32(rRegP dst, indOffset32 mem)
7515 %{
7516 match(Set dst mem);
7517
7518 ins_cost(110);
7519 format %{ "leaq $dst, $mem\t# ptr 32" %}
7520 ins_encode %{
7521 __ leaq($dst$$Register, $mem$$Address);
7522 %}
7523 ins_pipe(ialu_reg_reg_fat);
7524 %}
7525
7526 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7527 %{
7528 match(Set dst mem);
7529
7530 ins_cost(110);
7531 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7532 ins_encode %{
7533 __ leaq($dst$$Register, $mem$$Address);
7534 %}
7535 ins_pipe(ialu_reg_reg_fat);
7536 %}
7537
7538 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7539 %{
7540 match(Set dst mem);
7541
7542 ins_cost(110);
7543 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7544 ins_encode %{
7545 __ leaq($dst$$Register, $mem$$Address);
7546 %}
7547 ins_pipe(ialu_reg_reg_fat);
7548 %}
7549
7550 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7551 %{
7552 match(Set dst mem);
7553
7554 ins_cost(110);
7555 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7556 ins_encode %{
7557 __ leaq($dst$$Register, $mem$$Address);
7558 %}
7559 ins_pipe(ialu_reg_reg_fat);
7560 %}
7561
7562 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7563 %{
7564 match(Set dst mem);
7565
7566 ins_cost(110);
7567 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7568 ins_encode %{
7569 __ leaq($dst$$Register, $mem$$Address);
7570 %}
7571 ins_pipe(ialu_reg_reg_fat);
7572 %}
7573
7574 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7575 %{
7576 match(Set dst mem);
7577
7578 ins_cost(110);
7579 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7580 ins_encode %{
7581 __ leaq($dst$$Register, $mem$$Address);
7582 %}
7583 ins_pipe(ialu_reg_reg_fat);
7584 %}
7585
7586 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7587 %{
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 // Load Effective Address which uses Narrow (32-bits) oop
7599 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7600 %{
7601 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7602 match(Set dst mem);
7603
7604 ins_cost(110);
7605 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7606 ins_encode %{
7607 __ leaq($dst$$Register, $mem$$Address);
7608 %}
7609 ins_pipe(ialu_reg_reg_fat);
7610 %}
7611
7612 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7613 %{
7614 predicate(CompressedOops::shift() == 0);
7615 match(Set dst mem);
7616
7617 ins_cost(110); // XXX
7618 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7619 ins_encode %{
7620 __ leaq($dst$$Register, $mem$$Address);
7621 %}
7622 ins_pipe(ialu_reg_reg_fat);
7623 %}
7624
7625 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7626 %{
7627 predicate(CompressedOops::shift() == 0);
7628 match(Set dst mem);
7629
7630 ins_cost(110);
7631 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7632 ins_encode %{
7633 __ leaq($dst$$Register, $mem$$Address);
7634 %}
7635 ins_pipe(ialu_reg_reg_fat);
7636 %}
7637
7638 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7639 %{
7640 predicate(CompressedOops::shift() == 0);
7641 match(Set dst mem);
7642
7643 ins_cost(110);
7644 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7645 ins_encode %{
7646 __ leaq($dst$$Register, $mem$$Address);
7647 %}
7648 ins_pipe(ialu_reg_reg_fat);
7649 %}
7650
7651 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7652 %{
7653 predicate(CompressedOops::shift() == 0);
7654 match(Set dst mem);
7655
7656 ins_cost(110);
7657 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7658 ins_encode %{
7659 __ leaq($dst$$Register, $mem$$Address);
7660 %}
7661 ins_pipe(ialu_reg_reg_fat);
7662 %}
7663
7664 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7665 %{
7666 predicate(CompressedOops::shift() == 0);
7667 match(Set dst mem);
7668
7669 ins_cost(110);
7670 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7671 ins_encode %{
7672 __ leaq($dst$$Register, $mem$$Address);
7673 %}
7674 ins_pipe(ialu_reg_reg_fat);
7675 %}
7676
7677 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7678 %{
7679 predicate(CompressedOops::shift() == 0);
7680 match(Set dst mem);
7681
7682 ins_cost(110);
7683 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7684 ins_encode %{
7685 __ leaq($dst$$Register, $mem$$Address);
7686 %}
7687 ins_pipe(ialu_reg_reg_fat);
7688 %}
7689
7690 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7691 %{
7692 predicate(CompressedOops::shift() == 0);
7693 match(Set dst mem);
7694
7695 ins_cost(110);
7696 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7697 ins_encode %{
7698 __ leaq($dst$$Register, $mem$$Address);
7699 %}
7700 ins_pipe(ialu_reg_reg_fat);
7701 %}
7702
7703 instruct loadConI(rRegI dst, immI src)
7704 %{
7705 match(Set dst src);
7706
7707 format %{ "movl $dst, $src\t# int" %}
7708 ins_encode %{
7709 __ movl($dst$$Register, $src$$constant);
7710 %}
7711 ins_pipe(ialu_reg_fat); // XXX
7712 %}
7713
7714 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7715 %{
7716 match(Set dst src);
7717 effect(KILL cr);
7718
7719 ins_cost(50);
7720 format %{ "xorl $dst, $dst\t# int" %}
7721 ins_encode %{
7722 __ xorl($dst$$Register, $dst$$Register);
7723 %}
7724 ins_pipe(ialu_reg);
7725 %}
7726
7727 instruct loadConL(rRegL dst, immL src)
7728 %{
7729 match(Set dst src);
7730
7731 ins_cost(150);
7732 format %{ "movq $dst, $src\t# long" %}
7733 ins_encode %{
7734 __ mov64($dst$$Register, $src$$constant);
7735 %}
7736 ins_pipe(ialu_reg);
7737 %}
7738
7739 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7740 %{
7741 match(Set dst src);
7742 effect(KILL cr);
7743
7744 ins_cost(50);
7745 format %{ "xorl $dst, $dst\t# long" %}
7746 ins_encode %{
7747 __ xorl($dst$$Register, $dst$$Register);
7748 %}
7749 ins_pipe(ialu_reg); // XXX
7750 %}
7751
7752 instruct loadConUL32(rRegL dst, immUL32 src)
7753 %{
7754 match(Set dst src);
7755
7756 ins_cost(60);
7757 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7758 ins_encode %{
7759 __ movl($dst$$Register, $src$$constant);
7760 %}
7761 ins_pipe(ialu_reg);
7762 %}
7763
7764 instruct loadConL32(rRegL dst, immL32 src)
7765 %{
7766 match(Set dst src);
7767
7768 ins_cost(70);
7769 format %{ "movq $dst, $src\t# long (32-bit)" %}
7770 ins_encode %{
7771 __ movq($dst$$Register, $src$$constant);
7772 %}
7773 ins_pipe(ialu_reg);
7774 %}
7775
7776 instruct loadConP(rRegP dst, immP con) %{
7777 match(Set dst con);
7778
7779 format %{ "movq $dst, $con\t# ptr" %}
7780 ins_encode %{
7781 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7782 %}
7783 ins_pipe(ialu_reg_fat); // XXX
7784 %}
7785
7786 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7787 %{
7788 match(Set dst src);
7789 effect(KILL cr);
7790
7791 ins_cost(50);
7792 format %{ "xorl $dst, $dst\t# ptr" %}
7793 ins_encode %{
7794 __ xorl($dst$$Register, $dst$$Register);
7795 %}
7796 ins_pipe(ialu_reg);
7797 %}
7798
7799 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7800 %{
7801 match(Set dst src);
7802 effect(KILL cr);
7803
7804 ins_cost(60);
7805 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7806 ins_encode %{
7807 __ movl($dst$$Register, $src$$constant);
7808 %}
7809 ins_pipe(ialu_reg);
7810 %}
7811
7812 instruct loadConF(regF dst, immF con) %{
7813 match(Set dst con);
7814 ins_cost(125);
7815 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7816 ins_encode %{
7817 __ movflt($dst$$XMMRegister, $constantaddress($con));
7818 %}
7819 ins_pipe(pipe_slow);
7820 %}
7821
7822 instruct loadConH(regF dst, immH con) %{
7823 match(Set dst con);
7824 ins_cost(125);
7825 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7826 ins_encode %{
7827 __ movflt($dst$$XMMRegister, $constantaddress($con));
7828 %}
7829 ins_pipe(pipe_slow);
7830 %}
7831
7832 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7833 match(Set dst src);
7834 effect(KILL cr);
7835 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7836 ins_encode %{
7837 __ xorq($dst$$Register, $dst$$Register);
7838 %}
7839 ins_pipe(ialu_reg);
7840 %}
7841
7842 instruct loadConN(rRegN dst, immN src) %{
7843 match(Set dst src);
7844
7845 ins_cost(125);
7846 format %{ "movl $dst, $src\t# compressed ptr" %}
7847 ins_encode %{
7848 address con = (address)$src$$constant;
7849 if (con == nullptr) {
7850 ShouldNotReachHere();
7851 } else {
7852 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7853 }
7854 %}
7855 ins_pipe(ialu_reg_fat); // XXX
7856 %}
7857
7858 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7859 match(Set dst src);
7860
7861 ins_cost(125);
7862 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7863 ins_encode %{
7864 address con = (address)$src$$constant;
7865 if (con == nullptr) {
7866 ShouldNotReachHere();
7867 } else {
7868 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7869 }
7870 %}
7871 ins_pipe(ialu_reg_fat); // XXX
7872 %}
7873
7874 instruct loadConF0(regF dst, immF0 src)
7875 %{
7876 match(Set dst src);
7877 ins_cost(100);
7878
7879 format %{ "xorps $dst, $dst\t# float 0.0" %}
7880 ins_encode %{
7881 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7882 %}
7883 ins_pipe(pipe_slow);
7884 %}
7885
7886 // Use the same format since predicate() can not be used here.
7887 instruct loadConD(regD dst, immD con) %{
7888 match(Set dst con);
7889 ins_cost(125);
7890 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7891 ins_encode %{
7892 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7893 %}
7894 ins_pipe(pipe_slow);
7895 %}
7896
7897 instruct loadConD0(regD dst, immD0 src)
7898 %{
7899 match(Set dst src);
7900 ins_cost(100);
7901
7902 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7903 ins_encode %{
7904 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7905 %}
7906 ins_pipe(pipe_slow);
7907 %}
7908
7909 instruct loadSSI(rRegI dst, stackSlotI src)
7910 %{
7911 match(Set dst src);
7912
7913 ins_cost(125);
7914 format %{ "movl $dst, $src\t# int stk" %}
7915 ins_encode %{
7916 __ movl($dst$$Register, $src$$Address);
7917 %}
7918 ins_pipe(ialu_reg_mem);
7919 %}
7920
7921 instruct loadSSL(rRegL dst, stackSlotL src)
7922 %{
7923 match(Set dst src);
7924
7925 ins_cost(125);
7926 format %{ "movq $dst, $src\t# long stk" %}
7927 ins_encode %{
7928 __ movq($dst$$Register, $src$$Address);
7929 %}
7930 ins_pipe(ialu_reg_mem);
7931 %}
7932
7933 instruct loadSSP(rRegP dst, stackSlotP src)
7934 %{
7935 match(Set dst src);
7936
7937 ins_cost(125);
7938 format %{ "movq $dst, $src\t# ptr stk" %}
7939 ins_encode %{
7940 __ movq($dst$$Register, $src$$Address);
7941 %}
7942 ins_pipe(ialu_reg_mem);
7943 %}
7944
7945 instruct loadSSF(regF dst, stackSlotF src)
7946 %{
7947 match(Set dst src);
7948
7949 ins_cost(125);
7950 format %{ "movss $dst, $src\t# float stk" %}
7951 ins_encode %{
7952 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7953 %}
7954 ins_pipe(pipe_slow); // XXX
7955 %}
7956
7957 // Use the same format since predicate() can not be used here.
7958 instruct loadSSD(regD dst, stackSlotD src)
7959 %{
7960 match(Set dst src);
7961
7962 ins_cost(125);
7963 format %{ "movsd $dst, $src\t# double stk" %}
7964 ins_encode %{
7965 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7966 %}
7967 ins_pipe(pipe_slow); // XXX
7968 %}
7969
7970 // Prefetch instructions for allocation.
7971 // Must be safe to execute with invalid address (cannot fault).
7972
7973 instruct prefetchAlloc( memory mem ) %{
7974 predicate(AllocatePrefetchInstr==3);
7975 match(PrefetchAllocation mem);
7976 ins_cost(125);
7977
7978 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7979 ins_encode %{
7980 __ prefetchw($mem$$Address);
7981 %}
7982 ins_pipe(ialu_mem);
7983 %}
7984
7985 instruct prefetchAllocNTA( memory mem ) %{
7986 predicate(AllocatePrefetchInstr==0);
7987 match(PrefetchAllocation mem);
7988 ins_cost(125);
7989
7990 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7991 ins_encode %{
7992 __ prefetchnta($mem$$Address);
7993 %}
7994 ins_pipe(ialu_mem);
7995 %}
7996
7997 instruct prefetchAllocT0( memory mem ) %{
7998 predicate(AllocatePrefetchInstr==1);
7999 match(PrefetchAllocation mem);
8000 ins_cost(125);
8001
8002 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8003 ins_encode %{
8004 __ prefetcht0($mem$$Address);
8005 %}
8006 ins_pipe(ialu_mem);
8007 %}
8008
8009 instruct prefetchAllocT2( memory mem ) %{
8010 predicate(AllocatePrefetchInstr==2);
8011 match(PrefetchAllocation mem);
8012 ins_cost(125);
8013
8014 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8015 ins_encode %{
8016 __ prefetcht2($mem$$Address);
8017 %}
8018 ins_pipe(ialu_mem);
8019 %}
8020
8021 //----------Store Instructions-------------------------------------------------
8022
8023 // Store Byte
8024 instruct storeB(memory mem, rRegI src)
8025 %{
8026 match(Set mem (StoreB mem src));
8027
8028 ins_cost(125); // XXX
8029 format %{ "movb $mem, $src\t# byte" %}
8030 ins_encode %{
8031 __ movb($mem$$Address, $src$$Register);
8032 %}
8033 ins_pipe(ialu_mem_reg);
8034 %}
8035
8036 // Store Char/Short
8037 instruct storeC(memory mem, rRegI src)
8038 %{
8039 match(Set mem (StoreC mem src));
8040
8041 ins_cost(125); // XXX
8042 format %{ "movw $mem, $src\t# char/short" %}
8043 ins_encode %{
8044 __ movw($mem$$Address, $src$$Register);
8045 %}
8046 ins_pipe(ialu_mem_reg);
8047 %}
8048
8049 // Store Integer
8050 instruct storeI(memory mem, rRegI src)
8051 %{
8052 match(Set mem (StoreI mem src));
8053
8054 ins_cost(125); // XXX
8055 format %{ "movl $mem, $src\t# int" %}
8056 ins_encode %{
8057 __ movl($mem$$Address, $src$$Register);
8058 %}
8059 ins_pipe(ialu_mem_reg);
8060 %}
8061
8062 // Store Long
8063 instruct storeL(memory mem, rRegL src)
8064 %{
8065 match(Set mem (StoreL mem src));
8066
8067 ins_cost(125); // XXX
8068 format %{ "movq $mem, $src\t# long" %}
8069 ins_encode %{
8070 __ movq($mem$$Address, $src$$Register);
8071 %}
8072 ins_pipe(ialu_mem_reg); // XXX
8073 %}
8074
8075 // Store Pointer
8076 instruct storeP(memory mem, any_RegP src)
8077 %{
8078 predicate(n->as_Store()->barrier_data() == 0);
8079 match(Set mem (StoreP mem src));
8080
8081 ins_cost(125); // XXX
8082 format %{ "movq $mem, $src\t# ptr" %}
8083 ins_encode %{
8084 __ movq($mem$$Address, $src$$Register);
8085 %}
8086 ins_pipe(ialu_mem_reg);
8087 %}
8088
8089 instruct storeImmP0(memory mem, immP0 zero)
8090 %{
8091 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8092 match(Set mem (StoreP mem zero));
8093
8094 ins_cost(125); // XXX
8095 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8096 ins_encode %{
8097 __ movq($mem$$Address, r12);
8098 %}
8099 ins_pipe(ialu_mem_reg);
8100 %}
8101
8102 // Store Null Pointer, mark word, or other simple pointer constant.
8103 instruct storeImmP(memory mem, immP31 src)
8104 %{
8105 predicate(n->as_Store()->barrier_data() == 0);
8106 match(Set mem (StoreP mem src));
8107
8108 ins_cost(150); // XXX
8109 format %{ "movq $mem, $src\t# ptr" %}
8110 ins_encode %{
8111 __ movq($mem$$Address, $src$$constant);
8112 %}
8113 ins_pipe(ialu_mem_imm);
8114 %}
8115
8116 // Store Compressed Pointer
8117 instruct storeN(memory mem, rRegN src)
8118 %{
8119 predicate(n->as_Store()->barrier_data() == 0);
8120 match(Set mem (StoreN mem src));
8121
8122 ins_cost(125); // XXX
8123 format %{ "movl $mem, $src\t# compressed ptr" %}
8124 ins_encode %{
8125 __ movl($mem$$Address, $src$$Register);
8126 %}
8127 ins_pipe(ialu_mem_reg);
8128 %}
8129
8130 instruct storeNKlass(memory mem, rRegN src)
8131 %{
8132 match(Set mem (StoreNKlass mem src));
8133
8134 ins_cost(125); // XXX
8135 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8136 ins_encode %{
8137 __ movl($mem$$Address, $src$$Register);
8138 %}
8139 ins_pipe(ialu_mem_reg);
8140 %}
8141
8142 instruct storeImmN0(memory mem, immN0 zero)
8143 %{
8144 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8145 match(Set mem (StoreN mem zero));
8146
8147 ins_cost(125); // XXX
8148 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8149 ins_encode %{
8150 __ movl($mem$$Address, r12);
8151 %}
8152 ins_pipe(ialu_mem_reg);
8153 %}
8154
8155 instruct storeImmN(memory mem, immN src)
8156 %{
8157 predicate(n->as_Store()->barrier_data() == 0);
8158 match(Set mem (StoreN mem src));
8159
8160 ins_cost(150); // XXX
8161 format %{ "movl $mem, $src\t# compressed ptr" %}
8162 ins_encode %{
8163 address con = (address)$src$$constant;
8164 if (con == nullptr) {
8165 __ movl($mem$$Address, 0);
8166 } else {
8167 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8168 }
8169 %}
8170 ins_pipe(ialu_mem_imm);
8171 %}
8172
8173 instruct storeImmNKlass(memory mem, immNKlass src)
8174 %{
8175 match(Set mem (StoreNKlass mem src));
8176
8177 ins_cost(150); // XXX
8178 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8179 ins_encode %{
8180 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8181 %}
8182 ins_pipe(ialu_mem_imm);
8183 %}
8184
8185 // Store Integer Immediate
8186 instruct storeImmI0(memory mem, immI_0 zero)
8187 %{
8188 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8189 match(Set mem (StoreI mem zero));
8190
8191 ins_cost(125); // XXX
8192 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8193 ins_encode %{
8194 __ movl($mem$$Address, r12);
8195 %}
8196 ins_pipe(ialu_mem_reg);
8197 %}
8198
8199 instruct storeImmI(memory mem, immI src)
8200 %{
8201 match(Set mem (StoreI mem src));
8202
8203 ins_cost(150);
8204 format %{ "movl $mem, $src\t# int" %}
8205 ins_encode %{
8206 __ movl($mem$$Address, $src$$constant);
8207 %}
8208 ins_pipe(ialu_mem_imm);
8209 %}
8210
8211 // Store Long Immediate
8212 instruct storeImmL0(memory mem, immL0 zero)
8213 %{
8214 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8215 match(Set mem (StoreL mem zero));
8216
8217 ins_cost(125); // XXX
8218 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8219 ins_encode %{
8220 __ movq($mem$$Address, r12);
8221 %}
8222 ins_pipe(ialu_mem_reg);
8223 %}
8224
8225 instruct storeImmL(memory mem, immL32 src)
8226 %{
8227 match(Set mem (StoreL mem src));
8228
8229 ins_cost(150);
8230 format %{ "movq $mem, $src\t# long" %}
8231 ins_encode %{
8232 __ movq($mem$$Address, $src$$constant);
8233 %}
8234 ins_pipe(ialu_mem_imm);
8235 %}
8236
8237 // Store Short/Char Immediate
8238 instruct storeImmC0(memory mem, immI_0 zero)
8239 %{
8240 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8241 match(Set mem (StoreC mem zero));
8242
8243 ins_cost(125); // XXX
8244 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8245 ins_encode %{
8246 __ movw($mem$$Address, r12);
8247 %}
8248 ins_pipe(ialu_mem_reg);
8249 %}
8250
8251 instruct storeImmI16(memory mem, immI16 src)
8252 %{
8253 predicate(UseStoreImmI16);
8254 match(Set mem (StoreC mem src));
8255
8256 ins_cost(150);
8257 format %{ "movw $mem, $src\t# short/char" %}
8258 ins_encode %{
8259 __ movw($mem$$Address, $src$$constant);
8260 %}
8261 ins_pipe(ialu_mem_imm);
8262 %}
8263
8264 // Store Byte Immediate
8265 instruct storeImmB0(memory mem, immI_0 zero)
8266 %{
8267 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8268 match(Set mem (StoreB mem zero));
8269
8270 ins_cost(125); // XXX
8271 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8272 ins_encode %{
8273 __ movb($mem$$Address, r12);
8274 %}
8275 ins_pipe(ialu_mem_reg);
8276 %}
8277
8278 instruct storeImmB(memory mem, immI8 src)
8279 %{
8280 match(Set mem (StoreB mem src));
8281
8282 ins_cost(150); // XXX
8283 format %{ "movb $mem, $src\t# byte" %}
8284 ins_encode %{
8285 __ movb($mem$$Address, $src$$constant);
8286 %}
8287 ins_pipe(ialu_mem_imm);
8288 %}
8289
8290 // Store Float
8291 instruct storeF(memory mem, regF src)
8292 %{
8293 match(Set mem (StoreF mem src));
8294
8295 ins_cost(95); // XXX
8296 format %{ "movss $mem, $src\t# float" %}
8297 ins_encode %{
8298 __ movflt($mem$$Address, $src$$XMMRegister);
8299 %}
8300 ins_pipe(pipe_slow); // XXX
8301 %}
8302
8303 // Store immediate Float value (it is faster than store from XMM register)
8304 instruct storeF0(memory mem, immF0 zero)
8305 %{
8306 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8307 match(Set mem (StoreF mem zero));
8308
8309 ins_cost(25); // XXX
8310 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8311 ins_encode %{
8312 __ movl($mem$$Address, r12);
8313 %}
8314 ins_pipe(ialu_mem_reg);
8315 %}
8316
8317 instruct storeF_imm(memory mem, immF src)
8318 %{
8319 match(Set mem (StoreF mem src));
8320
8321 ins_cost(50);
8322 format %{ "movl $mem, $src\t# float" %}
8323 ins_encode %{
8324 __ movl($mem$$Address, jint_cast($src$$constant));
8325 %}
8326 ins_pipe(ialu_mem_imm);
8327 %}
8328
8329 // Store Double
8330 instruct storeD(memory mem, regD src)
8331 %{
8332 match(Set mem (StoreD mem src));
8333
8334 ins_cost(95); // XXX
8335 format %{ "movsd $mem, $src\t# double" %}
8336 ins_encode %{
8337 __ movdbl($mem$$Address, $src$$XMMRegister);
8338 %}
8339 ins_pipe(pipe_slow); // XXX
8340 %}
8341
8342 // Store immediate double 0.0 (it is faster than store from XMM register)
8343 instruct storeD0_imm(memory mem, immD0 src)
8344 %{
8345 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8346 match(Set mem (StoreD mem src));
8347
8348 ins_cost(50);
8349 format %{ "movq $mem, $src\t# double 0." %}
8350 ins_encode %{
8351 __ movq($mem$$Address, $src$$constant);
8352 %}
8353 ins_pipe(ialu_mem_imm);
8354 %}
8355
8356 instruct storeD0(memory mem, immD0 zero)
8357 %{
8358 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8359 match(Set mem (StoreD mem zero));
8360
8361 ins_cost(25); // XXX
8362 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8363 ins_encode %{
8364 __ movq($mem$$Address, r12);
8365 %}
8366 ins_pipe(ialu_mem_reg);
8367 %}
8368
8369 instruct storeSSI(stackSlotI dst, rRegI src)
8370 %{
8371 match(Set dst src);
8372
8373 ins_cost(100);
8374 format %{ "movl $dst, $src\t# int stk" %}
8375 ins_encode %{
8376 __ movl($dst$$Address, $src$$Register);
8377 %}
8378 ins_pipe( ialu_mem_reg );
8379 %}
8380
8381 instruct storeSSL(stackSlotL dst, rRegL src)
8382 %{
8383 match(Set dst src);
8384
8385 ins_cost(100);
8386 format %{ "movq $dst, $src\t# long stk" %}
8387 ins_encode %{
8388 __ movq($dst$$Address, $src$$Register);
8389 %}
8390 ins_pipe(ialu_mem_reg);
8391 %}
8392
8393 instruct storeSSP(stackSlotP dst, rRegP src)
8394 %{
8395 match(Set dst src);
8396
8397 ins_cost(100);
8398 format %{ "movq $dst, $src\t# ptr stk" %}
8399 ins_encode %{
8400 __ movq($dst$$Address, $src$$Register);
8401 %}
8402 ins_pipe(ialu_mem_reg);
8403 %}
8404
8405 instruct storeSSF(stackSlotF dst, regF src)
8406 %{
8407 match(Set dst src);
8408
8409 ins_cost(95); // XXX
8410 format %{ "movss $dst, $src\t# float stk" %}
8411 ins_encode %{
8412 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8413 %}
8414 ins_pipe(pipe_slow); // XXX
8415 %}
8416
8417 instruct storeSSD(stackSlotD dst, regD src)
8418 %{
8419 match(Set dst src);
8420
8421 ins_cost(95); // XXX
8422 format %{ "movsd $dst, $src\t# double stk" %}
8423 ins_encode %{
8424 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8425 %}
8426 ins_pipe(pipe_slow); // XXX
8427 %}
8428
8429 instruct cacheWB(indirect addr)
8430 %{
8431 predicate(VM_Version::supports_data_cache_line_flush());
8432 match(CacheWB addr);
8433
8434 ins_cost(100);
8435 format %{"cache wb $addr" %}
8436 ins_encode %{
8437 assert($addr->index_position() < 0, "should be");
8438 assert($addr$$disp == 0, "should be");
8439 __ cache_wb(Address($addr$$base$$Register, 0));
8440 %}
8441 ins_pipe(pipe_slow); // XXX
8442 %}
8443
8444 instruct cacheWBPreSync()
8445 %{
8446 predicate(VM_Version::supports_data_cache_line_flush());
8447 match(CacheWBPreSync);
8448
8449 ins_cost(100);
8450 format %{"cache wb presync" %}
8451 ins_encode %{
8452 __ cache_wbsync(true);
8453 %}
8454 ins_pipe(pipe_slow); // XXX
8455 %}
8456
8457 instruct cacheWBPostSync()
8458 %{
8459 predicate(VM_Version::supports_data_cache_line_flush());
8460 match(CacheWBPostSync);
8461
8462 ins_cost(100);
8463 format %{"cache wb postsync" %}
8464 ins_encode %{
8465 __ cache_wbsync(false);
8466 %}
8467 ins_pipe(pipe_slow); // XXX
8468 %}
8469
8470 //----------BSWAP Instructions-------------------------------------------------
8471 instruct bytes_reverse_int(rRegI dst) %{
8472 match(Set dst (ReverseBytesI dst));
8473
8474 format %{ "bswapl $dst" %}
8475 ins_encode %{
8476 __ bswapl($dst$$Register);
8477 %}
8478 ins_pipe( ialu_reg );
8479 %}
8480
8481 instruct bytes_reverse_long(rRegL dst) %{
8482 match(Set dst (ReverseBytesL dst));
8483
8484 format %{ "bswapq $dst" %}
8485 ins_encode %{
8486 __ bswapq($dst$$Register);
8487 %}
8488 ins_pipe( ialu_reg);
8489 %}
8490
8491 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8492 match(Set dst (ReverseBytesUS dst));
8493 effect(KILL cr);
8494
8495 format %{ "bswapl $dst\n\t"
8496 "shrl $dst,16\n\t" %}
8497 ins_encode %{
8498 __ bswapl($dst$$Register);
8499 __ shrl($dst$$Register, 16);
8500 %}
8501 ins_pipe( ialu_reg );
8502 %}
8503
8504 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8505 match(Set dst (ReverseBytesS dst));
8506 effect(KILL cr);
8507
8508 format %{ "bswapl $dst\n\t"
8509 "sar $dst,16\n\t" %}
8510 ins_encode %{
8511 __ bswapl($dst$$Register);
8512 __ sarl($dst$$Register, 16);
8513 %}
8514 ins_pipe( ialu_reg );
8515 %}
8516
8517 //---------- Zeros Count Instructions ------------------------------------------
8518
8519 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8520 predicate(UseCountLeadingZerosInstruction);
8521 match(Set dst (CountLeadingZerosI src));
8522 effect(KILL cr);
8523
8524 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8525 ins_encode %{
8526 __ lzcntl($dst$$Register, $src$$Register);
8527 %}
8528 ins_pipe(ialu_reg);
8529 %}
8530
8531 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8532 predicate(UseCountLeadingZerosInstruction);
8533 match(Set dst (CountLeadingZerosI (LoadI src)));
8534 effect(KILL cr);
8535 ins_cost(175);
8536 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8537 ins_encode %{
8538 __ lzcntl($dst$$Register, $src$$Address);
8539 %}
8540 ins_pipe(ialu_reg_mem);
8541 %}
8542
8543 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8544 predicate(!UseCountLeadingZerosInstruction);
8545 match(Set dst (CountLeadingZerosI src));
8546 effect(KILL cr);
8547
8548 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8549 "jnz skip\n\t"
8550 "movl $dst, -1\n"
8551 "skip:\n\t"
8552 "negl $dst\n\t"
8553 "addl $dst, 31" %}
8554 ins_encode %{
8555 Register Rdst = $dst$$Register;
8556 Register Rsrc = $src$$Register;
8557 Label skip;
8558 __ bsrl(Rdst, Rsrc);
8559 __ jccb(Assembler::notZero, skip);
8560 __ movl(Rdst, -1);
8561 __ bind(skip);
8562 __ negl(Rdst);
8563 __ addl(Rdst, BitsPerInt - 1);
8564 %}
8565 ins_pipe(ialu_reg);
8566 %}
8567
8568 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8569 predicate(UseCountLeadingZerosInstruction);
8570 match(Set dst (CountLeadingZerosL src));
8571 effect(KILL cr);
8572
8573 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8574 ins_encode %{
8575 __ lzcntq($dst$$Register, $src$$Register);
8576 %}
8577 ins_pipe(ialu_reg);
8578 %}
8579
8580 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8581 predicate(UseCountLeadingZerosInstruction);
8582 match(Set dst (CountLeadingZerosL (LoadL src)));
8583 effect(KILL cr);
8584 ins_cost(175);
8585 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8586 ins_encode %{
8587 __ lzcntq($dst$$Register, $src$$Address);
8588 %}
8589 ins_pipe(ialu_reg_mem);
8590 %}
8591
8592 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8593 predicate(!UseCountLeadingZerosInstruction);
8594 match(Set dst (CountLeadingZerosL src));
8595 effect(KILL cr);
8596
8597 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8598 "jnz skip\n\t"
8599 "movl $dst, -1\n"
8600 "skip:\n\t"
8601 "negl $dst\n\t"
8602 "addl $dst, 63" %}
8603 ins_encode %{
8604 Register Rdst = $dst$$Register;
8605 Register Rsrc = $src$$Register;
8606 Label skip;
8607 __ bsrq(Rdst, Rsrc);
8608 __ jccb(Assembler::notZero, skip);
8609 __ movl(Rdst, -1);
8610 __ bind(skip);
8611 __ negl(Rdst);
8612 __ addl(Rdst, BitsPerLong - 1);
8613 %}
8614 ins_pipe(ialu_reg);
8615 %}
8616
8617 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8618 predicate(UseCountTrailingZerosInstruction);
8619 match(Set dst (CountTrailingZerosI src));
8620 effect(KILL cr);
8621
8622 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8623 ins_encode %{
8624 __ tzcntl($dst$$Register, $src$$Register);
8625 %}
8626 ins_pipe(ialu_reg);
8627 %}
8628
8629 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8630 predicate(UseCountTrailingZerosInstruction);
8631 match(Set dst (CountTrailingZerosI (LoadI src)));
8632 effect(KILL cr);
8633 ins_cost(175);
8634 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8635 ins_encode %{
8636 __ tzcntl($dst$$Register, $src$$Address);
8637 %}
8638 ins_pipe(ialu_reg_mem);
8639 %}
8640
8641 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8642 predicate(!UseCountTrailingZerosInstruction);
8643 match(Set dst (CountTrailingZerosI src));
8644 effect(KILL cr);
8645
8646 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8647 "jnz done\n\t"
8648 "movl $dst, 32\n"
8649 "done:" %}
8650 ins_encode %{
8651 Register Rdst = $dst$$Register;
8652 Label done;
8653 __ bsfl(Rdst, $src$$Register);
8654 __ jccb(Assembler::notZero, done);
8655 __ movl(Rdst, BitsPerInt);
8656 __ bind(done);
8657 %}
8658 ins_pipe(ialu_reg);
8659 %}
8660
8661 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8662 predicate(UseCountTrailingZerosInstruction);
8663 match(Set dst (CountTrailingZerosL src));
8664 effect(KILL cr);
8665
8666 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8667 ins_encode %{
8668 __ tzcntq($dst$$Register, $src$$Register);
8669 %}
8670 ins_pipe(ialu_reg);
8671 %}
8672
8673 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8674 predicate(UseCountTrailingZerosInstruction);
8675 match(Set dst (CountTrailingZerosL (LoadL src)));
8676 effect(KILL cr);
8677 ins_cost(175);
8678 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8679 ins_encode %{
8680 __ tzcntq($dst$$Register, $src$$Address);
8681 %}
8682 ins_pipe(ialu_reg_mem);
8683 %}
8684
8685 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8686 predicate(!UseCountTrailingZerosInstruction);
8687 match(Set dst (CountTrailingZerosL src));
8688 effect(KILL cr);
8689
8690 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8691 "jnz done\n\t"
8692 "movl $dst, 64\n"
8693 "done:" %}
8694 ins_encode %{
8695 Register Rdst = $dst$$Register;
8696 Label done;
8697 __ bsfq(Rdst, $src$$Register);
8698 __ jccb(Assembler::notZero, done);
8699 __ movl(Rdst, BitsPerLong);
8700 __ bind(done);
8701 %}
8702 ins_pipe(ialu_reg);
8703 %}
8704
8705 //--------------- Reverse Operation Instructions ----------------
8706 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8707 predicate(!VM_Version::supports_gfni());
8708 match(Set dst (ReverseI src));
8709 effect(TEMP dst, TEMP rtmp, KILL cr);
8710 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8711 ins_encode %{
8712 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8713 %}
8714 ins_pipe( ialu_reg );
8715 %}
8716
8717 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8718 predicate(VM_Version::supports_gfni());
8719 match(Set dst (ReverseI src));
8720 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8721 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8722 ins_encode %{
8723 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8724 %}
8725 ins_pipe( ialu_reg );
8726 %}
8727
8728 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8729 predicate(!VM_Version::supports_gfni());
8730 match(Set dst (ReverseL src));
8731 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8732 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8733 ins_encode %{
8734 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8735 %}
8736 ins_pipe( ialu_reg );
8737 %}
8738
8739 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8740 predicate(VM_Version::supports_gfni());
8741 match(Set dst (ReverseL src));
8742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8743 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8744 ins_encode %{
8745 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8746 %}
8747 ins_pipe( ialu_reg );
8748 %}
8749
8750 //---------- Population Count Instructions -------------------------------------
8751
8752 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8753 predicate(UsePopCountInstruction);
8754 match(Set dst (PopCountI src));
8755 effect(KILL cr);
8756
8757 format %{ "popcnt $dst, $src" %}
8758 ins_encode %{
8759 __ popcntl($dst$$Register, $src$$Register);
8760 %}
8761 ins_pipe(ialu_reg);
8762 %}
8763
8764 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8765 predicate(UsePopCountInstruction);
8766 match(Set dst (PopCountI (LoadI mem)));
8767 effect(KILL cr);
8768
8769 format %{ "popcnt $dst, $mem" %}
8770 ins_encode %{
8771 __ popcntl($dst$$Register, $mem$$Address);
8772 %}
8773 ins_pipe(ialu_reg);
8774 %}
8775
8776 // Note: Long.bitCount(long) returns an int.
8777 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8778 predicate(UsePopCountInstruction);
8779 match(Set dst (PopCountL src));
8780 effect(KILL cr);
8781
8782 format %{ "popcnt $dst, $src" %}
8783 ins_encode %{
8784 __ popcntq($dst$$Register, $src$$Register);
8785 %}
8786 ins_pipe(ialu_reg);
8787 %}
8788
8789 // Note: Long.bitCount(long) returns an int.
8790 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8791 predicate(UsePopCountInstruction);
8792 match(Set dst (PopCountL (LoadL mem)));
8793 effect(KILL cr);
8794
8795 format %{ "popcnt $dst, $mem" %}
8796 ins_encode %{
8797 __ popcntq($dst$$Register, $mem$$Address);
8798 %}
8799 ins_pipe(ialu_reg);
8800 %}
8801
8802
8803 //----------MemBar Instructions-----------------------------------------------
8804 // Memory barrier flavors
8805
8806 instruct membar_acquire()
8807 %{
8808 match(MemBarAcquire);
8809 match(LoadFence);
8810 ins_cost(0);
8811
8812 size(0);
8813 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8814 ins_encode();
8815 ins_pipe(empty);
8816 %}
8817
8818 instruct membar_acquire_lock()
8819 %{
8820 match(MemBarAcquireLock);
8821 ins_cost(0);
8822
8823 size(0);
8824 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8825 ins_encode();
8826 ins_pipe(empty);
8827 %}
8828
8829 instruct membar_release()
8830 %{
8831 match(MemBarRelease);
8832 match(StoreFence);
8833 ins_cost(0);
8834
8835 size(0);
8836 format %{ "MEMBAR-release ! (empty encoding)" %}
8837 ins_encode();
8838 ins_pipe(empty);
8839 %}
8840
8841 instruct membar_release_lock()
8842 %{
8843 match(MemBarReleaseLock);
8844 ins_cost(0);
8845
8846 size(0);
8847 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8848 ins_encode();
8849 ins_pipe(empty);
8850 %}
8851
8852 instruct membar_storeload(rFlagsReg cr) %{
8853 match(MemBarStoreLoad);
8854 effect(KILL cr);
8855 ins_cost(400);
8856
8857 format %{
8858 $$template
8859 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8860 %}
8861 ins_encode %{
8862 __ membar(Assembler::StoreLoad);
8863 %}
8864 ins_pipe(pipe_slow);
8865 %}
8866
8867 instruct membar_volatile(rFlagsReg cr) %{
8868 match(MemBarVolatile);
8869 effect(KILL cr);
8870 ins_cost(400);
8871
8872 format %{
8873 $$template
8874 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8875 %}
8876 ins_encode %{
8877 __ membar(Assembler::StoreLoad);
8878 %}
8879 ins_pipe(pipe_slow);
8880 %}
8881
8882 instruct unnecessary_membar_volatile()
8883 %{
8884 match(MemBarVolatile);
8885 predicate(Matcher::post_store_load_barrier(n));
8886 ins_cost(0);
8887
8888 size(0);
8889 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8890 ins_encode();
8891 ins_pipe(empty);
8892 %}
8893
8894 instruct membar_full(rFlagsReg cr) %{
8895 match(MemBarFull);
8896 effect(KILL cr);
8897 ins_cost(400);
8898
8899 format %{
8900 $$template
8901 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8902 %}
8903 ins_encode %{
8904 __ membar(Assembler::StoreLoad);
8905 %}
8906 ins_pipe(pipe_slow);
8907 %}
8908
8909 instruct membar_storestore() %{
8910 match(MemBarStoreStore);
8911 match(StoreStoreFence);
8912 ins_cost(0);
8913
8914 size(0);
8915 format %{ "MEMBAR-storestore (empty encoding)" %}
8916 ins_encode( );
8917 ins_pipe(empty);
8918 %}
8919
8920 //----------Move Instructions--------------------------------------------------
8921
8922 instruct castX2P(rRegP dst, rRegL src)
8923 %{
8924 match(Set dst (CastX2P src));
8925
8926 format %{ "movq $dst, $src\t# long->ptr" %}
8927 ins_encode %{
8928 if ($dst$$reg != $src$$reg) {
8929 __ movptr($dst$$Register, $src$$Register);
8930 }
8931 %}
8932 ins_pipe(ialu_reg_reg); // XXX
8933 %}
8934
8935 instruct castP2X(rRegL dst, rRegP src)
8936 %{
8937 match(Set dst (CastP2X src));
8938
8939 format %{ "movq $dst, $src\t# ptr -> long" %}
8940 ins_encode %{
8941 if ($dst$$reg != $src$$reg) {
8942 __ movptr($dst$$Register, $src$$Register);
8943 }
8944 %}
8945 ins_pipe(ialu_reg_reg); // XXX
8946 %}
8947
8948 // Convert oop into int for vectors alignment masking
8949 instruct convP2I(rRegI dst, rRegP src)
8950 %{
8951 match(Set dst (ConvL2I (CastP2X src)));
8952
8953 format %{ "movl $dst, $src\t# ptr -> int" %}
8954 ins_encode %{
8955 __ movl($dst$$Register, $src$$Register);
8956 %}
8957 ins_pipe(ialu_reg_reg); // XXX
8958 %}
8959
8960 // Convert compressed oop into int for vectors alignment masking
8961 // in case of 32bit oops (heap < 4Gb).
8962 instruct convN2I(rRegI dst, rRegN src)
8963 %{
8964 predicate(CompressedOops::shift() == 0);
8965 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8966
8967 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8968 ins_encode %{
8969 __ movl($dst$$Register, $src$$Register);
8970 %}
8971 ins_pipe(ialu_reg_reg); // XXX
8972 %}
8973
8974 // Convert oop pointer into compressed form
8975 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8976 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8977 match(Set dst (EncodeP src));
8978 effect(KILL cr);
8979 format %{ "encode_heap_oop $dst,$src" %}
8980 ins_encode %{
8981 Register s = $src$$Register;
8982 Register d = $dst$$Register;
8983 if (s != d) {
8984 __ movq(d, s);
8985 }
8986 __ encode_heap_oop(d);
8987 %}
8988 ins_pipe(ialu_reg_long);
8989 %}
8990
8991 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8992 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8993 match(Set dst (EncodeP src));
8994 effect(KILL cr);
8995 format %{ "encode_heap_oop_not_null $dst,$src" %}
8996 ins_encode %{
8997 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8998 %}
8999 ins_pipe(ialu_reg_long);
9000 %}
9001
9002 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9003 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9004 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9005 match(Set dst (DecodeN src));
9006 effect(KILL cr);
9007 format %{ "decode_heap_oop $dst,$src" %}
9008 ins_encode %{
9009 Register s = $src$$Register;
9010 Register d = $dst$$Register;
9011 if (s != d) {
9012 __ movq(d, s);
9013 }
9014 __ decode_heap_oop(d);
9015 %}
9016 ins_pipe(ialu_reg_long);
9017 %}
9018
9019 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9020 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9021 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9022 match(Set dst (DecodeN src));
9023 effect(KILL cr);
9024 format %{ "decode_heap_oop_not_null $dst,$src" %}
9025 ins_encode %{
9026 Register s = $src$$Register;
9027 Register d = $dst$$Register;
9028 if (s != d) {
9029 __ decode_heap_oop_not_null(d, s);
9030 } else {
9031 __ decode_heap_oop_not_null(d);
9032 }
9033 %}
9034 ins_pipe(ialu_reg_long);
9035 %}
9036
9037 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9038 match(Set dst (EncodePKlass src));
9039 effect(TEMP dst, KILL cr);
9040 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9041 ins_encode %{
9042 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9043 %}
9044 ins_pipe(ialu_reg_long);
9045 %}
9046
9047 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9048 match(Set dst (DecodeNKlass src));
9049 effect(TEMP dst, KILL cr);
9050 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9051 ins_encode %{
9052 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9053 %}
9054 ins_pipe(ialu_reg_long);
9055 %}
9056
9057 //----------Conditional Move---------------------------------------------------
9058 // Jump
9059 // dummy instruction for generating temp registers
9060 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9061 match(Jump (LShiftL switch_val shift));
9062 ins_cost(350);
9063 predicate(false);
9064 effect(TEMP dest);
9065
9066 format %{ "leaq $dest, [$constantaddress]\n\t"
9067 "jmp [$dest + $switch_val << $shift]\n\t" %}
9068 ins_encode %{
9069 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9070 // to do that and the compiler is using that register as one it can allocate.
9071 // So we build it all by hand.
9072 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9073 // ArrayAddress dispatch(table, index);
9074 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9075 __ lea($dest$$Register, $constantaddress);
9076 __ jmp(dispatch);
9077 %}
9078 ins_pipe(pipe_jmp);
9079 %}
9080
9081 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9082 match(Jump (AddL (LShiftL switch_val shift) offset));
9083 ins_cost(350);
9084 effect(TEMP dest);
9085
9086 format %{ "leaq $dest, [$constantaddress]\n\t"
9087 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9088 ins_encode %{
9089 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9090 // to do that and the compiler is using that register as one it can allocate.
9091 // So we build it all by hand.
9092 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9093 // ArrayAddress dispatch(table, index);
9094 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9095 __ lea($dest$$Register, $constantaddress);
9096 __ jmp(dispatch);
9097 %}
9098 ins_pipe(pipe_jmp);
9099 %}
9100
9101 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9102 match(Jump switch_val);
9103 ins_cost(350);
9104 effect(TEMP dest);
9105
9106 format %{ "leaq $dest, [$constantaddress]\n\t"
9107 "jmp [$dest + $switch_val]\n\t" %}
9108 ins_encode %{
9109 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9110 // to do that and the compiler is using that register as one it can allocate.
9111 // So we build it all by hand.
9112 // Address index(noreg, switch_reg, Address::times_1);
9113 // ArrayAddress dispatch(table, index);
9114 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9115 __ lea($dest$$Register, $constantaddress);
9116 __ jmp(dispatch);
9117 %}
9118 ins_pipe(pipe_jmp);
9119 %}
9120
9121 // Conditional move
9122 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9123 %{
9124 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9125 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9126
9127 ins_cost(100); // XXX
9128 format %{ "setbn$cop $dst\t# signed, int" %}
9129 ins_encode %{
9130 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9131 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9132 %}
9133 ins_pipe(ialu_reg);
9134 %}
9135
9136 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9137 %{
9138 predicate(!UseAPX);
9139 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9140
9141 ins_cost(200); // XXX
9142 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9143 ins_encode %{
9144 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9145 %}
9146 ins_pipe(pipe_cmov_reg);
9147 %}
9148
9149 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9150 %{
9151 predicate(UseAPX);
9152 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9153
9154 ins_cost(200);
9155 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9156 ins_encode %{
9157 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9158 %}
9159 ins_pipe(pipe_cmov_reg);
9160 %}
9161
9162 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9163 %{
9164 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9165 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9166
9167 ins_cost(100); // XXX
9168 format %{ "setbn$cop $dst\t# unsigned, int" %}
9169 ins_encode %{
9170 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9171 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9172 %}
9173 ins_pipe(ialu_reg);
9174 %}
9175
9176 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9177 predicate(!UseAPX);
9178 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9179
9180 ins_cost(200); // XXX
9181 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9182 ins_encode %{
9183 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9184 %}
9185 ins_pipe(pipe_cmov_reg);
9186 %}
9187
9188 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9189 predicate(UseAPX);
9190 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9191
9192 ins_cost(200);
9193 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9194 ins_encode %{
9195 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9196 %}
9197 ins_pipe(pipe_cmov_reg);
9198 %}
9199
9200 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9201 %{
9202 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9203 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9204
9205 ins_cost(100); // XXX
9206 format %{ "setbn$cop $dst\t# unsigned, int" %}
9207 ins_encode %{
9208 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9209 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9210 %}
9211 ins_pipe(ialu_reg);
9212 %}
9213
9214 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9215 %{
9216 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9217 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9218
9219 ins_cost(100); // XXX
9220 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9221 ins_encode %{
9222 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9223 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9224 %}
9225 ins_pipe(ialu_reg);
9226 %}
9227
9228 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9229 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9230
9231 ins_cost(200);
9232 expand %{
9233 cmovI_regU(cop, cr, dst, src);
9234 %}
9235 %}
9236
9237 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9238 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9239
9240 ins_cost(200);
9241 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9242 ins_encode %{
9243 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9244 %}
9245 ins_pipe(pipe_cmov_reg);
9246 %}
9247
9248 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9249 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9250 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9251
9252 ins_cost(200); // XXX
9253 format %{ "cmovpl $dst, $src\n\t"
9254 "cmovnel $dst, $src" %}
9255 ins_encode %{
9256 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9257 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9258 %}
9259 ins_pipe(pipe_cmov_reg);
9260 %}
9261
9262 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9263 // inputs of the CMove
9264 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9265 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9266 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9267 effect(TEMP dst);
9268
9269 ins_cost(200); // XXX
9270 format %{ "cmovpl $dst, $src\n\t"
9271 "cmovnel $dst, $src" %}
9272 ins_encode %{
9273 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9274 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9275 %}
9276 ins_pipe(pipe_cmov_reg);
9277 %}
9278
9279 // Conditional move
9280 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9281 predicate(!UseAPX);
9282 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9283
9284 ins_cost(250); // XXX
9285 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9286 ins_encode %{
9287 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9288 %}
9289 ins_pipe(pipe_cmov_mem);
9290 %}
9291
9292 // Conditional move
9293 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9294 %{
9295 predicate(UseAPX);
9296 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9297
9298 ins_cost(250);
9299 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9300 ins_encode %{
9301 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9302 %}
9303 ins_pipe(pipe_cmov_mem);
9304 %}
9305
9306 // Conditional move
9307 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9308 %{
9309 predicate(!UseAPX);
9310 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9311
9312 ins_cost(250); // XXX
9313 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9314 ins_encode %{
9315 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9316 %}
9317 ins_pipe(pipe_cmov_mem);
9318 %}
9319
9320 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9321 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9322
9323 ins_cost(250);
9324 expand %{
9325 cmovI_memU(cop, cr, dst, src);
9326 %}
9327 %}
9328
9329 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9330 %{
9331 predicate(UseAPX);
9332 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9333
9334 ins_cost(250);
9335 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9336 ins_encode %{
9337 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9338 %}
9339 ins_pipe(pipe_cmov_mem);
9340 %}
9341
9342 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9343 %{
9344 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9345
9346 ins_cost(250);
9347 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9348 ins_encode %{
9349 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9350 %}
9351 ins_pipe(pipe_cmov_mem);
9352 %}
9353
9354 // Conditional move
9355 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9356 %{
9357 predicate(!UseAPX);
9358 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9359
9360 ins_cost(200); // XXX
9361 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9362 ins_encode %{
9363 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9364 %}
9365 ins_pipe(pipe_cmov_reg);
9366 %}
9367
9368 // Conditional move ndd
9369 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9370 %{
9371 predicate(UseAPX);
9372 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9373
9374 ins_cost(200);
9375 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9376 ins_encode %{
9377 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9378 %}
9379 ins_pipe(pipe_cmov_reg);
9380 %}
9381
9382 // Conditional move
9383 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9384 %{
9385 predicate(!UseAPX);
9386 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9387
9388 ins_cost(200); // XXX
9389 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9390 ins_encode %{
9391 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9392 %}
9393 ins_pipe(pipe_cmov_reg);
9394 %}
9395
9396 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9397 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9398
9399 ins_cost(200);
9400 expand %{
9401 cmovN_regU(cop, cr, dst, src);
9402 %}
9403 %}
9404
9405 // Conditional move ndd
9406 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9407 %{
9408 predicate(UseAPX);
9409 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9410
9411 ins_cost(200);
9412 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9413 ins_encode %{
9414 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9415 %}
9416 ins_pipe(pipe_cmov_reg);
9417 %}
9418
9419 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9420 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9421
9422 ins_cost(200);
9423 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9424 ins_encode %{
9425 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9426 %}
9427 ins_pipe(pipe_cmov_reg);
9428 %}
9429
9430 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9431 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9432 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9433
9434 ins_cost(200); // XXX
9435 format %{ "cmovpl $dst, $src\n\t"
9436 "cmovnel $dst, $src" %}
9437 ins_encode %{
9438 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9439 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9440 %}
9441 ins_pipe(pipe_cmov_reg);
9442 %}
9443
9444 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9445 // inputs of the CMove
9446 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9447 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9448 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9449
9450 ins_cost(200); // XXX
9451 format %{ "cmovpl $dst, $src\n\t"
9452 "cmovnel $dst, $src" %}
9453 ins_encode %{
9454 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9455 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9456 %}
9457 ins_pipe(pipe_cmov_reg);
9458 %}
9459
9460 // Conditional move
9461 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9462 %{
9463 predicate(!UseAPX);
9464 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9465
9466 ins_cost(200); // XXX
9467 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9468 ins_encode %{
9469 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9470 %}
9471 ins_pipe(pipe_cmov_reg); // XXX
9472 %}
9473
9474 // Conditional move ndd
9475 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9476 %{
9477 predicate(UseAPX);
9478 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9479
9480 ins_cost(200);
9481 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9482 ins_encode %{
9483 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9484 %}
9485 ins_pipe(pipe_cmov_reg);
9486 %}
9487
9488 // Conditional move
9489 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9490 %{
9491 predicate(!UseAPX);
9492 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9493
9494 ins_cost(200); // XXX
9495 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9496 ins_encode %{
9497 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9498 %}
9499 ins_pipe(pipe_cmov_reg); // XXX
9500 %}
9501
9502 // Conditional move ndd
9503 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9504 %{
9505 predicate(UseAPX);
9506 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9507
9508 ins_cost(200);
9509 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9510 ins_encode %{
9511 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9512 %}
9513 ins_pipe(pipe_cmov_reg);
9514 %}
9515
9516 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9517 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9518
9519 ins_cost(200);
9520 expand %{
9521 cmovP_regU(cop, cr, dst, src);
9522 %}
9523 %}
9524
9525 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9526 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9527
9528 ins_cost(200);
9529 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9530 ins_encode %{
9531 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9532 %}
9533 ins_pipe(pipe_cmov_reg);
9534 %}
9535
9536 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9537 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9538 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9539
9540 ins_cost(200); // XXX
9541 format %{ "cmovpq $dst, $src\n\t"
9542 "cmovneq $dst, $src" %}
9543 ins_encode %{
9544 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9545 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9546 %}
9547 ins_pipe(pipe_cmov_reg);
9548 %}
9549
9550 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9551 // inputs of the CMove
9552 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9553 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9554 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9555
9556 ins_cost(200); // XXX
9557 format %{ "cmovpq $dst, $src\n\t"
9558 "cmovneq $dst, $src" %}
9559 ins_encode %{
9560 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9561 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg);
9564 %}
9565
9566 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9567 %{
9568 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9569 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9570
9571 ins_cost(100); // XXX
9572 format %{ "setbn$cop $dst\t# signed, long" %}
9573 ins_encode %{
9574 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9575 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9576 %}
9577 ins_pipe(ialu_reg);
9578 %}
9579
9580 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9581 %{
9582 predicate(!UseAPX);
9583 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9584
9585 ins_cost(200); // XXX
9586 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9587 ins_encode %{
9588 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9589 %}
9590 ins_pipe(pipe_cmov_reg); // XXX
9591 %}
9592
9593 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9594 %{
9595 predicate(UseAPX);
9596 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9597
9598 ins_cost(200);
9599 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9600 ins_encode %{
9601 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9602 %}
9603 ins_pipe(pipe_cmov_reg);
9604 %}
9605
9606 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9607 %{
9608 predicate(!UseAPX);
9609 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9610
9611 ins_cost(200); // XXX
9612 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9613 ins_encode %{
9614 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9615 %}
9616 ins_pipe(pipe_cmov_mem); // XXX
9617 %}
9618
9619 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9620 %{
9621 predicate(UseAPX);
9622 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9623
9624 ins_cost(200);
9625 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9626 ins_encode %{
9627 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9628 %}
9629 ins_pipe(pipe_cmov_mem);
9630 %}
9631
9632 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9633 %{
9634 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9636
9637 ins_cost(100); // XXX
9638 format %{ "setbn$cop $dst\t# unsigned, long" %}
9639 ins_encode %{
9640 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9641 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9642 %}
9643 ins_pipe(ialu_reg);
9644 %}
9645
9646 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9647 %{
9648 predicate(!UseAPX);
9649 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9650
9651 ins_cost(200); // XXX
9652 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9653 ins_encode %{
9654 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9655 %}
9656 ins_pipe(pipe_cmov_reg); // XXX
9657 %}
9658
9659 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9660 %{
9661 predicate(UseAPX);
9662 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9663
9664 ins_cost(200);
9665 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9666 ins_encode %{
9667 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9668 %}
9669 ins_pipe(pipe_cmov_reg);
9670 %}
9671
9672 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9673 %{
9674 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9675 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9676
9677 ins_cost(100); // XXX
9678 format %{ "setbn$cop $dst\t# unsigned, long" %}
9679 ins_encode %{
9680 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9681 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9682 %}
9683 ins_pipe(ialu_reg);
9684 %}
9685
9686 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9687 %{
9688 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9689 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9690
9691 ins_cost(100); // XXX
9692 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9693 ins_encode %{
9694 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9695 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9696 %}
9697 ins_pipe(ialu_reg);
9698 %}
9699
9700 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9701 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9702
9703 ins_cost(200);
9704 expand %{
9705 cmovL_regU(cop, cr, dst, src);
9706 %}
9707 %}
9708
9709 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9710 %{
9711 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9712
9713 ins_cost(200);
9714 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9715 ins_encode %{
9716 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9717 %}
9718 ins_pipe(pipe_cmov_reg);
9719 %}
9720
9721 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9722 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9723 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9724
9725 ins_cost(200); // XXX
9726 format %{ "cmovpq $dst, $src\n\t"
9727 "cmovneq $dst, $src" %}
9728 ins_encode %{
9729 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9730 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9731 %}
9732 ins_pipe(pipe_cmov_reg);
9733 %}
9734
9735 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9736 // inputs of the CMove
9737 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9738 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9739 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9740
9741 ins_cost(200); // XXX
9742 format %{ "cmovpq $dst, $src\n\t"
9743 "cmovneq $dst, $src" %}
9744 ins_encode %{
9745 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9746 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9747 %}
9748 ins_pipe(pipe_cmov_reg);
9749 %}
9750
9751 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9752 %{
9753 predicate(!UseAPX);
9754 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9755
9756 ins_cost(200); // XXX
9757 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9758 ins_encode %{
9759 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9760 %}
9761 ins_pipe(pipe_cmov_mem); // XXX
9762 %}
9763
9764 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9765 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9766
9767 ins_cost(200);
9768 expand %{
9769 cmovL_memU(cop, cr, dst, src);
9770 %}
9771 %}
9772
9773 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9774 %{
9775 predicate(UseAPX);
9776 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9777
9778 ins_cost(200);
9779 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9780 ins_encode %{
9781 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9782 %}
9783 ins_pipe(pipe_cmov_mem);
9784 %}
9785
9786 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9787 %{
9788 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9789
9790 ins_cost(200);
9791 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9792 ins_encode %{
9793 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9794 %}
9795 ins_pipe(pipe_cmov_mem);
9796 %}
9797
9798 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9799 %{
9800 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9801
9802 ins_cost(200); // XXX
9803 format %{ "jn$cop skip\t# signed cmove float\n\t"
9804 "movss $dst, $src\n"
9805 "skip:" %}
9806 ins_encode %{
9807 Label Lskip;
9808 // Invert sense of branch from sense of CMOV
9809 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9810 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9811 __ bind(Lskip);
9812 %}
9813 ins_pipe(pipe_slow);
9814 %}
9815
9816 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9817 %{
9818 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9819
9820 ins_cost(200); // XXX
9821 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9822 "movss $dst, $src\n"
9823 "skip:" %}
9824 ins_encode %{
9825 Label Lskip;
9826 // Invert sense of branch from sense of CMOV
9827 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9828 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9829 __ bind(Lskip);
9830 %}
9831 ins_pipe(pipe_slow);
9832 %}
9833
9834 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9835 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9836
9837 ins_cost(200);
9838 expand %{
9839 cmovF_regU(cop, cr, dst, src);
9840 %}
9841 %}
9842
9843 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9844 %{
9845 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9846
9847 ins_cost(200); // XXX
9848 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9849 "movss $dst, $src\n"
9850 "skip:" %}
9851 ins_encode %{
9852 Label Lskip;
9853 // Invert sense of branch from sense of CMOV
9854 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9855 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9856 __ bind(Lskip);
9857 %}
9858 ins_pipe(pipe_slow);
9859 %}
9860
9861 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9862 %{
9863 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9864
9865 ins_cost(200); // XXX
9866 format %{ "jn$cop skip\t# signed cmove double\n\t"
9867 "movsd $dst, $src\n"
9868 "skip:" %}
9869 ins_encode %{
9870 Label Lskip;
9871 // Invert sense of branch from sense of CMOV
9872 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9873 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9874 __ bind(Lskip);
9875 %}
9876 ins_pipe(pipe_slow);
9877 %}
9878
9879 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9880 %{
9881 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9882
9883 ins_cost(200); // XXX
9884 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9885 "movsd $dst, $src\n"
9886 "skip:" %}
9887 ins_encode %{
9888 Label Lskip;
9889 // Invert sense of branch from sense of CMOV
9890 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9892 __ bind(Lskip);
9893 %}
9894 ins_pipe(pipe_slow);
9895 %}
9896
9897 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9898 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9899
9900 ins_cost(200);
9901 expand %{
9902 cmovD_regU(cop, cr, dst, src);
9903 %}
9904 %}
9905
9906 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9907 %{
9908 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9909
9910 ins_cost(200); // XXX
9911 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9912 "movsd $dst, $src\n"
9913 "skip:" %}
9914 ins_encode %{
9915 Label Lskip;
9916 // Invert sense of branch from sense of CMOV
9917 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9918 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9919 __ bind(Lskip);
9920 %}
9921 ins_pipe(pipe_slow);
9922 %}
9923
9924 //----------Arithmetic Instructions--------------------------------------------
9925 //----------Addition Instructions----------------------------------------------
9926
9927 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9928 %{
9929 predicate(!UseAPX);
9930 match(Set dst (AddI dst src));
9931 effect(KILL cr);
9932 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9933 format %{ "addl $dst, $src\t# int" %}
9934 ins_encode %{
9935 __ addl($dst$$Register, $src$$Register);
9936 %}
9937 ins_pipe(ialu_reg_reg);
9938 %}
9939
9940 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9941 %{
9942 predicate(UseAPX);
9943 match(Set dst (AddI src1 src2));
9944 effect(KILL cr);
9945 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9946
9947 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9948 ins_encode %{
9949 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9950 %}
9951 ins_pipe(ialu_reg_reg);
9952 %}
9953
9954 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9955 %{
9956 predicate(!UseAPX);
9957 match(Set dst (AddI dst src));
9958 effect(KILL cr);
9959 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9960
9961 format %{ "addl $dst, $src\t# int" %}
9962 ins_encode %{
9963 __ addl($dst$$Register, $src$$constant);
9964 %}
9965 ins_pipe( ialu_reg );
9966 %}
9967
9968 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9969 %{
9970 predicate(UseAPX);
9971 match(Set dst (AddI src1 src2));
9972 effect(KILL cr);
9973 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9974
9975 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9976 ins_encode %{
9977 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9978 %}
9979 ins_pipe( ialu_reg );
9980 %}
9981
9982 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9983 %{
9984 predicate(UseAPX);
9985 match(Set dst (AddI (LoadI src1) src2));
9986 effect(KILL cr);
9987 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9988
9989 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9990 ins_encode %{
9991 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9992 %}
9993 ins_pipe( ialu_reg );
9994 %}
9995
9996 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9997 %{
9998 predicate(!UseAPX);
9999 match(Set dst (AddI dst (LoadI src)));
10000 effect(KILL cr);
10001 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10002
10003 ins_cost(150); // XXX
10004 format %{ "addl $dst, $src\t# int" %}
10005 ins_encode %{
10006 __ addl($dst$$Register, $src$$Address);
10007 %}
10008 ins_pipe(ialu_reg_mem);
10009 %}
10010
10011 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10012 %{
10013 predicate(UseAPX);
10014 match(Set dst (AddI src1 (LoadI src2)));
10015 effect(KILL cr);
10016 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10017
10018 ins_cost(150);
10019 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10020 ins_encode %{
10021 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10022 %}
10023 ins_pipe(ialu_reg_mem);
10024 %}
10025
10026 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10027 %{
10028 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029 effect(KILL cr);
10030 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10031
10032 ins_cost(150); // XXX
10033 format %{ "addl $dst, $src\t# int" %}
10034 ins_encode %{
10035 __ addl($dst$$Address, $src$$Register);
10036 %}
10037 ins_pipe(ialu_mem_reg);
10038 %}
10039
10040 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10041 %{
10042 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10043 effect(KILL cr);
10044 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10045
10046
10047 ins_cost(125); // XXX
10048 format %{ "addl $dst, $src\t# int" %}
10049 ins_encode %{
10050 __ addl($dst$$Address, $src$$constant);
10051 %}
10052 ins_pipe(ialu_mem_imm);
10053 %}
10054
10055 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10056 %{
10057 predicate(!UseAPX && UseIncDec);
10058 match(Set dst (AddI dst src));
10059 effect(KILL cr);
10060
10061 format %{ "incl $dst\t# int" %}
10062 ins_encode %{
10063 __ incrementl($dst$$Register);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10069 %{
10070 predicate(UseAPX && UseIncDec);
10071 match(Set dst (AddI src val));
10072 effect(KILL cr);
10073 flag(PD::Flag_ndd_demotable_opr1);
10074
10075 format %{ "eincl $dst, $src\t# int ndd" %}
10076 ins_encode %{
10077 __ eincl($dst$$Register, $src$$Register, false);
10078 %}
10079 ins_pipe(ialu_reg);
10080 %}
10081
10082 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10083 %{
10084 predicate(UseAPX && UseIncDec);
10085 match(Set dst (AddI (LoadI src) val));
10086 effect(KILL cr);
10087
10088 format %{ "eincl $dst, $src\t# int ndd" %}
10089 ins_encode %{
10090 __ eincl($dst$$Register, $src$$Address, false);
10091 %}
10092 ins_pipe(ialu_reg);
10093 %}
10094
10095 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10096 %{
10097 predicate(UseIncDec);
10098 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10099 effect(KILL cr);
10100
10101 ins_cost(125); // XXX
10102 format %{ "incl $dst\t# int" %}
10103 ins_encode %{
10104 __ incrementl($dst$$Address);
10105 %}
10106 ins_pipe(ialu_mem_imm);
10107 %}
10108
10109 // XXX why does that use AddI
10110 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10111 %{
10112 predicate(!UseAPX && UseIncDec);
10113 match(Set dst (AddI dst src));
10114 effect(KILL cr);
10115
10116 format %{ "decl $dst\t# int" %}
10117 ins_encode %{
10118 __ decrementl($dst$$Register);
10119 %}
10120 ins_pipe(ialu_reg);
10121 %}
10122
10123 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10124 %{
10125 predicate(UseAPX && UseIncDec);
10126 match(Set dst (AddI src val));
10127 effect(KILL cr);
10128 flag(PD::Flag_ndd_demotable_opr1);
10129
10130 format %{ "edecl $dst, $src\t# int ndd" %}
10131 ins_encode %{
10132 __ edecl($dst$$Register, $src$$Register, false);
10133 %}
10134 ins_pipe(ialu_reg);
10135 %}
10136
10137 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10138 %{
10139 predicate(UseAPX && UseIncDec);
10140 match(Set dst (AddI (LoadI src) val));
10141 effect(KILL cr);
10142
10143 format %{ "edecl $dst, $src\t# int ndd" %}
10144 ins_encode %{
10145 __ edecl($dst$$Register, $src$$Address, false);
10146 %}
10147 ins_pipe(ialu_reg);
10148 %}
10149
10150 // XXX why does that use AddI
10151 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10152 %{
10153 predicate(UseIncDec);
10154 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10155 effect(KILL cr);
10156
10157 ins_cost(125); // XXX
10158 format %{ "decl $dst\t# int" %}
10159 ins_encode %{
10160 __ decrementl($dst$$Address);
10161 %}
10162 ins_pipe(ialu_mem_imm);
10163 %}
10164
10165 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10166 %{
10167 predicate(VM_Version::supports_fast_2op_lea());
10168 match(Set dst (AddI (LShiftI index scale) disp));
10169
10170 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10171 ins_encode %{
10172 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10173 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10174 %}
10175 ins_pipe(ialu_reg_reg);
10176 %}
10177
10178 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10179 %{
10180 predicate(VM_Version::supports_fast_3op_lea());
10181 match(Set dst (AddI (AddI base index) disp));
10182
10183 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10184 ins_encode %{
10185 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10186 %}
10187 ins_pipe(ialu_reg_reg);
10188 %}
10189
10190 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10191 %{
10192 predicate(VM_Version::supports_fast_2op_lea());
10193 match(Set dst (AddI base (LShiftI index scale)));
10194
10195 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10196 ins_encode %{
10197 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10198 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10199 %}
10200 ins_pipe(ialu_reg_reg);
10201 %}
10202
10203 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10204 %{
10205 predicate(VM_Version::supports_fast_3op_lea());
10206 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10207
10208 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10209 ins_encode %{
10210 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10211 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10212 %}
10213 ins_pipe(ialu_reg_reg);
10214 %}
10215
10216 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10217 %{
10218 predicate(!UseAPX);
10219 match(Set dst (AddL dst src));
10220 effect(KILL cr);
10221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222
10223 format %{ "addq $dst, $src\t# long" %}
10224 ins_encode %{
10225 __ addq($dst$$Register, $src$$Register);
10226 %}
10227 ins_pipe(ialu_reg_reg);
10228 %}
10229
10230 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10231 %{
10232 predicate(UseAPX);
10233 match(Set dst (AddL src1 src2));
10234 effect(KILL cr);
10235 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10236
10237 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10238 ins_encode %{
10239 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10240 %}
10241 ins_pipe(ialu_reg_reg);
10242 %}
10243
10244 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10245 %{
10246 predicate(!UseAPX);
10247 match(Set dst (AddL dst src));
10248 effect(KILL cr);
10249 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250
10251 format %{ "addq $dst, $src\t# long" %}
10252 ins_encode %{
10253 __ addq($dst$$Register, $src$$constant);
10254 %}
10255 ins_pipe( ialu_reg );
10256 %}
10257
10258 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10259 %{
10260 predicate(UseAPX);
10261 match(Set dst (AddL src1 src2));
10262 effect(KILL cr);
10263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10264
10265 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10266 ins_encode %{
10267 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10268 %}
10269 ins_pipe( ialu_reg );
10270 %}
10271
10272 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10273 %{
10274 predicate(UseAPX);
10275 match(Set dst (AddL (LoadL src1) src2));
10276 effect(KILL cr);
10277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10278
10279 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10280 ins_encode %{
10281 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10282 %}
10283 ins_pipe( ialu_reg );
10284 %}
10285
10286 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10287 %{
10288 predicate(!UseAPX);
10289 match(Set dst (AddL dst (LoadL src)));
10290 effect(KILL cr);
10291 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10292
10293 ins_cost(150); // XXX
10294 format %{ "addq $dst, $src\t# long" %}
10295 ins_encode %{
10296 __ addq($dst$$Register, $src$$Address);
10297 %}
10298 ins_pipe(ialu_reg_mem);
10299 %}
10300
10301 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10302 %{
10303 predicate(UseAPX);
10304 match(Set dst (AddL src1 (LoadL src2)));
10305 effect(KILL cr);
10306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10307
10308 ins_cost(150);
10309 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10310 ins_encode %{
10311 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10312 %}
10313 ins_pipe(ialu_reg_mem);
10314 %}
10315
10316 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10317 %{
10318 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319 effect(KILL cr);
10320 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10321
10322 ins_cost(150); // XXX
10323 format %{ "addq $dst, $src\t# long" %}
10324 ins_encode %{
10325 __ addq($dst$$Address, $src$$Register);
10326 %}
10327 ins_pipe(ialu_mem_reg);
10328 %}
10329
10330 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10331 %{
10332 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10333 effect(KILL cr);
10334 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10335
10336 ins_cost(125); // XXX
10337 format %{ "addq $dst, $src\t# long" %}
10338 ins_encode %{
10339 __ addq($dst$$Address, $src$$constant);
10340 %}
10341 ins_pipe(ialu_mem_imm);
10342 %}
10343
10344 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10345 %{
10346 predicate(!UseAPX && UseIncDec);
10347 match(Set dst (AddL dst src));
10348 effect(KILL cr);
10349
10350 format %{ "incq $dst\t# long" %}
10351 ins_encode %{
10352 __ incrementq($dst$$Register);
10353 %}
10354 ins_pipe(ialu_reg);
10355 %}
10356
10357 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10358 %{
10359 predicate(UseAPX && UseIncDec);
10360 match(Set dst (AddL src val));
10361 effect(KILL cr);
10362 flag(PD::Flag_ndd_demotable_opr1);
10363
10364 format %{ "eincq $dst, $src\t# long ndd" %}
10365 ins_encode %{
10366 __ eincq($dst$$Register, $src$$Register, false);
10367 %}
10368 ins_pipe(ialu_reg);
10369 %}
10370
10371 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10372 %{
10373 predicate(UseAPX && UseIncDec);
10374 match(Set dst (AddL (LoadL src) val));
10375 effect(KILL cr);
10376
10377 format %{ "eincq $dst, $src\t# long ndd" %}
10378 ins_encode %{
10379 __ eincq($dst$$Register, $src$$Address, false);
10380 %}
10381 ins_pipe(ialu_reg);
10382 %}
10383
10384 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10385 %{
10386 predicate(UseIncDec);
10387 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10388 effect(KILL cr);
10389
10390 ins_cost(125); // XXX
10391 format %{ "incq $dst\t# long" %}
10392 ins_encode %{
10393 __ incrementq($dst$$Address);
10394 %}
10395 ins_pipe(ialu_mem_imm);
10396 %}
10397
10398 // XXX why does that use AddL
10399 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10400 %{
10401 predicate(!UseAPX && UseIncDec);
10402 match(Set dst (AddL dst src));
10403 effect(KILL cr);
10404
10405 format %{ "decq $dst\t# long" %}
10406 ins_encode %{
10407 __ decrementq($dst$$Register);
10408 %}
10409 ins_pipe(ialu_reg);
10410 %}
10411
10412 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10413 %{
10414 predicate(UseAPX && UseIncDec);
10415 match(Set dst (AddL src val));
10416 effect(KILL cr);
10417 flag(PD::Flag_ndd_demotable_opr1);
10418
10419 format %{ "edecq $dst, $src\t# long ndd" %}
10420 ins_encode %{
10421 __ edecq($dst$$Register, $src$$Register, false);
10422 %}
10423 ins_pipe(ialu_reg);
10424 %}
10425
10426 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10427 %{
10428 predicate(UseAPX && UseIncDec);
10429 match(Set dst (AddL (LoadL src) val));
10430 effect(KILL cr);
10431
10432 format %{ "edecq $dst, $src\t# long ndd" %}
10433 ins_encode %{
10434 __ edecq($dst$$Register, $src$$Address, false);
10435 %}
10436 ins_pipe(ialu_reg);
10437 %}
10438
10439 // XXX why does that use AddL
10440 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10441 %{
10442 predicate(UseIncDec);
10443 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10444 effect(KILL cr);
10445
10446 ins_cost(125); // XXX
10447 format %{ "decq $dst\t# long" %}
10448 ins_encode %{
10449 __ decrementq($dst$$Address);
10450 %}
10451 ins_pipe(ialu_mem_imm);
10452 %}
10453
10454 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10455 %{
10456 predicate(VM_Version::supports_fast_2op_lea());
10457 match(Set dst (AddL (LShiftL index scale) disp));
10458
10459 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10460 ins_encode %{
10461 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10462 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10463 %}
10464 ins_pipe(ialu_reg_reg);
10465 %}
10466
10467 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10468 %{
10469 predicate(VM_Version::supports_fast_3op_lea());
10470 match(Set dst (AddL (AddL base index) disp));
10471
10472 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10473 ins_encode %{
10474 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10475 %}
10476 ins_pipe(ialu_reg_reg);
10477 %}
10478
10479 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10480 %{
10481 predicate(VM_Version::supports_fast_2op_lea());
10482 match(Set dst (AddL base (LShiftL index scale)));
10483
10484 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10485 ins_encode %{
10486 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10487 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10488 %}
10489 ins_pipe(ialu_reg_reg);
10490 %}
10491
10492 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10493 %{
10494 predicate(VM_Version::supports_fast_3op_lea());
10495 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10496
10497 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10498 ins_encode %{
10499 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10500 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10501 %}
10502 ins_pipe(ialu_reg_reg);
10503 %}
10504
10505 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10506 %{
10507 match(Set dst (AddP dst src));
10508 effect(KILL cr);
10509 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10510
10511 format %{ "addq $dst, $src\t# ptr" %}
10512 ins_encode %{
10513 __ addq($dst$$Register, $src$$Register);
10514 %}
10515 ins_pipe(ialu_reg_reg);
10516 %}
10517
10518 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10519 %{
10520 match(Set dst (AddP dst src));
10521 effect(KILL cr);
10522 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10523
10524 format %{ "addq $dst, $src\t# ptr" %}
10525 ins_encode %{
10526 __ addq($dst$$Register, $src$$constant);
10527 %}
10528 ins_pipe( ialu_reg );
10529 %}
10530
10531 // XXX addP mem ops ????
10532
10533 instruct checkCastPP(rRegP dst)
10534 %{
10535 match(Set dst (CheckCastPP dst));
10536
10537 size(0);
10538 format %{ "# checkcastPP of $dst" %}
10539 ins_encode(/* empty encoding */);
10540 ins_pipe(empty);
10541 %}
10542
10543 instruct castPP(rRegP dst)
10544 %{
10545 match(Set dst (CastPP dst));
10546
10547 size(0);
10548 format %{ "# castPP of $dst" %}
10549 ins_encode(/* empty encoding */);
10550 ins_pipe(empty);
10551 %}
10552
10553 instruct castII(rRegI dst)
10554 %{
10555 predicate(VerifyConstraintCasts == 0);
10556 match(Set dst (CastII dst));
10557
10558 size(0);
10559 format %{ "# castII of $dst" %}
10560 ins_encode(/* empty encoding */);
10561 ins_cost(0);
10562 ins_pipe(empty);
10563 %}
10564
10565 instruct castII_checked(rRegI dst, rFlagsReg cr)
10566 %{
10567 predicate(VerifyConstraintCasts > 0);
10568 match(Set dst (CastII dst));
10569
10570 effect(KILL cr);
10571 format %{ "# cast_checked_II $dst" %}
10572 ins_encode %{
10573 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10574 %}
10575 ins_pipe(pipe_slow);
10576 %}
10577
10578 instruct castLL(rRegL dst)
10579 %{
10580 predicate(VerifyConstraintCasts == 0);
10581 match(Set dst (CastLL dst));
10582
10583 size(0);
10584 format %{ "# castLL of $dst" %}
10585 ins_encode(/* empty encoding */);
10586 ins_cost(0);
10587 ins_pipe(empty);
10588 %}
10589
10590 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10591 %{
10592 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10593 match(Set dst (CastLL dst));
10594
10595 effect(KILL cr);
10596 format %{ "# cast_checked_LL $dst" %}
10597 ins_encode %{
10598 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10599 %}
10600 ins_pipe(pipe_slow);
10601 %}
10602
10603 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10604 %{
10605 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10606 match(Set dst (CastLL dst));
10607
10608 effect(KILL cr, TEMP tmp);
10609 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10610 ins_encode %{
10611 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10612 %}
10613 ins_pipe(pipe_slow);
10614 %}
10615
10616 instruct castFF(regF dst)
10617 %{
10618 match(Set dst (CastFF dst));
10619
10620 size(0);
10621 format %{ "# castFF of $dst" %}
10622 ins_encode(/* empty encoding */);
10623 ins_cost(0);
10624 ins_pipe(empty);
10625 %}
10626
10627 instruct castHH(regF dst)
10628 %{
10629 match(Set dst (CastHH dst));
10630
10631 size(0);
10632 format %{ "# castHH of $dst" %}
10633 ins_encode(/* empty encoding */);
10634 ins_cost(0);
10635 ins_pipe(empty);
10636 %}
10637
10638 instruct castDD(regD dst)
10639 %{
10640 match(Set dst (CastDD dst));
10641
10642 size(0);
10643 format %{ "# castDD of $dst" %}
10644 ins_encode(/* empty encoding */);
10645 ins_cost(0);
10646 ins_pipe(empty);
10647 %}
10648
10649 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10650 instruct compareAndSwapP(rRegI res,
10651 memory mem_ptr,
10652 rax_RegP oldval, rRegP newval,
10653 rFlagsReg cr)
10654 %{
10655 predicate(n->as_LoadStore()->barrier_data() == 0);
10656 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10657 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10658 effect(KILL cr, KILL oldval);
10659
10660 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10661 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10662 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10663 ins_encode %{
10664 __ lock();
10665 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10666 __ setcc(Assembler::equal, $res$$Register);
10667 %}
10668 ins_pipe( pipe_cmpxchg );
10669 %}
10670
10671 instruct compareAndSwapL(rRegI res,
10672 memory mem_ptr,
10673 rax_RegL oldval, rRegL newval,
10674 rFlagsReg cr)
10675 %{
10676 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10677 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10678 effect(KILL cr, KILL oldval);
10679
10680 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10682 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10683 ins_encode %{
10684 __ lock();
10685 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10686 __ setcc(Assembler::equal, $res$$Register);
10687 %}
10688 ins_pipe( pipe_cmpxchg );
10689 %}
10690
10691 instruct compareAndSwapI(rRegI res,
10692 memory mem_ptr,
10693 rax_RegI oldval, rRegI newval,
10694 rFlagsReg cr)
10695 %{
10696 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10697 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10698 effect(KILL cr, KILL oldval);
10699
10700 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10701 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10702 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10703 ins_encode %{
10704 __ lock();
10705 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10706 __ setcc(Assembler::equal, $res$$Register);
10707 %}
10708 ins_pipe( pipe_cmpxchg );
10709 %}
10710
10711 instruct compareAndSwapB(rRegI res,
10712 memory mem_ptr,
10713 rax_RegI oldval, rRegI newval,
10714 rFlagsReg cr)
10715 %{
10716 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10717 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10718 effect(KILL cr, KILL oldval);
10719
10720 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10721 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10722 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10723 ins_encode %{
10724 __ lock();
10725 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10726 __ setcc(Assembler::equal, $res$$Register);
10727 %}
10728 ins_pipe( pipe_cmpxchg );
10729 %}
10730
10731 instruct compareAndSwapS(rRegI res,
10732 memory mem_ptr,
10733 rax_RegI oldval, rRegI newval,
10734 rFlagsReg cr)
10735 %{
10736 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10737 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10738 effect(KILL cr, KILL oldval);
10739
10740 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10741 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10742 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10743 ins_encode %{
10744 __ lock();
10745 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10746 __ setcc(Assembler::equal, $res$$Register);
10747 %}
10748 ins_pipe( pipe_cmpxchg );
10749 %}
10750
10751 instruct compareAndSwapN(rRegI res,
10752 memory mem_ptr,
10753 rax_RegN oldval, rRegN newval,
10754 rFlagsReg cr) %{
10755 predicate(n->as_LoadStore()->barrier_data() == 0);
10756 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10757 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10758 effect(KILL cr, KILL oldval);
10759
10760 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10761 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10762 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10763 ins_encode %{
10764 __ lock();
10765 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10766 __ setcc(Assembler::equal, $res$$Register);
10767 %}
10768 ins_pipe( pipe_cmpxchg );
10769 %}
10770
10771 instruct compareAndExchangeB(
10772 memory mem_ptr,
10773 rax_RegI oldval, rRegI newval,
10774 rFlagsReg cr)
10775 %{
10776 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10777 effect(KILL cr);
10778
10779 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10781 ins_encode %{
10782 __ lock();
10783 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10784 %}
10785 ins_pipe( pipe_cmpxchg );
10786 %}
10787
10788 instruct compareAndExchangeS(
10789 memory mem_ptr,
10790 rax_RegI oldval, rRegI newval,
10791 rFlagsReg cr)
10792 %{
10793 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10794 effect(KILL cr);
10795
10796 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10797 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10798 ins_encode %{
10799 __ lock();
10800 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10801 %}
10802 ins_pipe( pipe_cmpxchg );
10803 %}
10804
10805 instruct compareAndExchangeI(
10806 memory mem_ptr,
10807 rax_RegI oldval, rRegI newval,
10808 rFlagsReg cr)
10809 %{
10810 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10811 effect(KILL cr);
10812
10813 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10814 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10815 ins_encode %{
10816 __ lock();
10817 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10818 %}
10819 ins_pipe( pipe_cmpxchg );
10820 %}
10821
10822 instruct compareAndExchangeL(
10823 memory mem_ptr,
10824 rax_RegL oldval, rRegL newval,
10825 rFlagsReg cr)
10826 %{
10827 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10828 effect(KILL cr);
10829
10830 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10831 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10832 ins_encode %{
10833 __ lock();
10834 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10835 %}
10836 ins_pipe( pipe_cmpxchg );
10837 %}
10838
10839 instruct compareAndExchangeN(
10840 memory mem_ptr,
10841 rax_RegN oldval, rRegN newval,
10842 rFlagsReg cr) %{
10843 predicate(n->as_LoadStore()->barrier_data() == 0);
10844 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10845 effect(KILL cr);
10846
10847 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849 ins_encode %{
10850 __ lock();
10851 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852 %}
10853 ins_pipe( pipe_cmpxchg );
10854 %}
10855
10856 instruct compareAndExchangeP(
10857 memory mem_ptr,
10858 rax_RegP oldval, rRegP newval,
10859 rFlagsReg cr)
10860 %{
10861 predicate(n->as_LoadStore()->barrier_data() == 0);
10862 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10863 effect(KILL cr);
10864
10865 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10866 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10867 ins_encode %{
10868 __ lock();
10869 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10870 %}
10871 ins_pipe( pipe_cmpxchg );
10872 %}
10873
10874 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10875 predicate(n->as_LoadStore()->result_not_used());
10876 match(Set dummy (GetAndAddB mem add));
10877 effect(KILL cr);
10878 format %{ "addb_lock $mem, $add" %}
10879 ins_encode %{
10880 __ lock();
10881 __ addb($mem$$Address, $add$$Register);
10882 %}
10883 ins_pipe(pipe_cmpxchg);
10884 %}
10885
10886 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10887 predicate(n->as_LoadStore()->result_not_used());
10888 match(Set dummy (GetAndAddB mem add));
10889 effect(KILL cr);
10890 format %{ "addb_lock $mem, $add" %}
10891 ins_encode %{
10892 __ lock();
10893 __ addb($mem$$Address, $add$$constant);
10894 %}
10895 ins_pipe(pipe_cmpxchg);
10896 %}
10897
10898 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10899 predicate(!n->as_LoadStore()->result_not_used());
10900 match(Set newval (GetAndAddB mem newval));
10901 effect(KILL cr);
10902 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10903 ins_encode %{
10904 __ lock();
10905 __ xaddb($mem$$Address, $newval$$Register);
10906 __ narrow_subword_type($newval$$Register, T_BYTE);
10907 %}
10908 ins_pipe(pipe_cmpxchg);
10909 %}
10910
10911 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10912 predicate(n->as_LoadStore()->result_not_used());
10913 match(Set dummy (GetAndAddS mem add));
10914 effect(KILL cr);
10915 format %{ "addw_lock $mem, $add" %}
10916 ins_encode %{
10917 __ lock();
10918 __ addw($mem$$Address, $add$$Register);
10919 %}
10920 ins_pipe(pipe_cmpxchg);
10921 %}
10922
10923 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10924 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10925 match(Set dummy (GetAndAddS mem add));
10926 effect(KILL cr);
10927 format %{ "addw_lock $mem, $add" %}
10928 ins_encode %{
10929 __ lock();
10930 __ addw($mem$$Address, $add$$constant);
10931 %}
10932 ins_pipe(pipe_cmpxchg);
10933 %}
10934
10935 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10936 predicate(!n->as_LoadStore()->result_not_used());
10937 match(Set newval (GetAndAddS mem newval));
10938 effect(KILL cr);
10939 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10940 ins_encode %{
10941 __ lock();
10942 __ xaddw($mem$$Address, $newval$$Register);
10943 __ narrow_subword_type($newval$$Register, T_SHORT);
10944 %}
10945 ins_pipe(pipe_cmpxchg);
10946 %}
10947
10948 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10949 predicate(n->as_LoadStore()->result_not_used());
10950 match(Set dummy (GetAndAddI mem add));
10951 effect(KILL cr);
10952 format %{ "addl_lock $mem, $add" %}
10953 ins_encode %{
10954 __ lock();
10955 __ addl($mem$$Address, $add$$Register);
10956 %}
10957 ins_pipe(pipe_cmpxchg);
10958 %}
10959
10960 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10961 predicate(n->as_LoadStore()->result_not_used());
10962 match(Set dummy (GetAndAddI mem add));
10963 effect(KILL cr);
10964 format %{ "addl_lock $mem, $add" %}
10965 ins_encode %{
10966 __ lock();
10967 __ addl($mem$$Address, $add$$constant);
10968 %}
10969 ins_pipe(pipe_cmpxchg);
10970 %}
10971
10972 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10973 predicate(!n->as_LoadStore()->result_not_used());
10974 match(Set newval (GetAndAddI mem newval));
10975 effect(KILL cr);
10976 format %{ "xaddl_lock $mem, $newval" %}
10977 ins_encode %{
10978 __ lock();
10979 __ xaddl($mem$$Address, $newval$$Register);
10980 %}
10981 ins_pipe(pipe_cmpxchg);
10982 %}
10983
10984 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10985 predicate(n->as_LoadStore()->result_not_used());
10986 match(Set dummy (GetAndAddL mem add));
10987 effect(KILL cr);
10988 format %{ "addq_lock $mem, $add" %}
10989 ins_encode %{
10990 __ lock();
10991 __ addq($mem$$Address, $add$$Register);
10992 %}
10993 ins_pipe(pipe_cmpxchg);
10994 %}
10995
10996 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10997 predicate(n->as_LoadStore()->result_not_used());
10998 match(Set dummy (GetAndAddL mem add));
10999 effect(KILL cr);
11000 format %{ "addq_lock $mem, $add" %}
11001 ins_encode %{
11002 __ lock();
11003 __ addq($mem$$Address, $add$$constant);
11004 %}
11005 ins_pipe(pipe_cmpxchg);
11006 %}
11007
11008 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11009 predicate(!n->as_LoadStore()->result_not_used());
11010 match(Set newval (GetAndAddL mem newval));
11011 effect(KILL cr);
11012 format %{ "xaddq_lock $mem, $newval" %}
11013 ins_encode %{
11014 __ lock();
11015 __ xaddq($mem$$Address, $newval$$Register);
11016 %}
11017 ins_pipe(pipe_cmpxchg);
11018 %}
11019
11020 instruct xchgB( memory mem, rRegI newval) %{
11021 match(Set newval (GetAndSetB mem newval));
11022 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
11023 ins_encode %{
11024 __ xchgb($newval$$Register, $mem$$Address);
11025 __ narrow_subword_type($newval$$Register, T_BYTE);
11026 %}
11027 ins_pipe( pipe_cmpxchg );
11028 %}
11029
11030 instruct xchgS( memory mem, rRegI newval) %{
11031 match(Set newval (GetAndSetS mem newval));
11032 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
11033 ins_encode %{
11034 __ xchgw($newval$$Register, $mem$$Address);
11035 __ narrow_subword_type($newval$$Register, T_SHORT);
11036 %}
11037 ins_pipe( pipe_cmpxchg );
11038 %}
11039
11040 instruct xchgI( memory mem, rRegI newval) %{
11041 match(Set newval (GetAndSetI mem newval));
11042 format %{ "XCHGL $newval,[$mem]" %}
11043 ins_encode %{
11044 __ xchgl($newval$$Register, $mem$$Address);
11045 %}
11046 ins_pipe( pipe_cmpxchg );
11047 %}
11048
11049 instruct xchgL( memory mem, rRegL newval) %{
11050 match(Set newval (GetAndSetL mem newval));
11051 format %{ "XCHGL $newval,[$mem]" %}
11052 ins_encode %{
11053 __ xchgq($newval$$Register, $mem$$Address);
11054 %}
11055 ins_pipe( pipe_cmpxchg );
11056 %}
11057
11058 instruct xchgP( memory mem, rRegP newval) %{
11059 match(Set newval (GetAndSetP mem newval));
11060 predicate(n->as_LoadStore()->barrier_data() == 0);
11061 format %{ "XCHGQ $newval,[$mem]" %}
11062 ins_encode %{
11063 __ xchgq($newval$$Register, $mem$$Address);
11064 %}
11065 ins_pipe( pipe_cmpxchg );
11066 %}
11067
11068 instruct xchgN( memory mem, rRegN newval) %{
11069 predicate(n->as_LoadStore()->barrier_data() == 0);
11070 match(Set newval (GetAndSetN mem newval));
11071 format %{ "XCHGL $newval,$mem]" %}
11072 ins_encode %{
11073 __ xchgl($newval$$Register, $mem$$Address);
11074 %}
11075 ins_pipe( pipe_cmpxchg );
11076 %}
11077
11078 //----------Abs Instructions-------------------------------------------
11079
11080 // Integer Absolute Instructions
11081 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11082 %{
11083 match(Set dst (AbsI src));
11084 effect(TEMP dst, KILL cr);
11085 format %{ "xorl $dst, $dst\t# abs int\n\t"
11086 "subl $dst, $src\n\t"
11087 "cmovll $dst, $src" %}
11088 ins_encode %{
11089 __ xorl($dst$$Register, $dst$$Register);
11090 __ subl($dst$$Register, $src$$Register);
11091 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11092 %}
11093
11094 ins_pipe(ialu_reg_reg);
11095 %}
11096
11097 // Long Absolute Instructions
11098 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11099 %{
11100 match(Set dst (AbsL src));
11101 effect(TEMP dst, KILL cr);
11102 format %{ "xorl $dst, $dst\t# abs long\n\t"
11103 "subq $dst, $src\n\t"
11104 "cmovlq $dst, $src" %}
11105 ins_encode %{
11106 __ xorl($dst$$Register, $dst$$Register);
11107 __ subq($dst$$Register, $src$$Register);
11108 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11109 %}
11110
11111 ins_pipe(ialu_reg_reg);
11112 %}
11113
11114 //----------Subtraction Instructions-------------------------------------------
11115
11116 // Integer Subtraction Instructions
11117 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11118 %{
11119 predicate(!UseAPX);
11120 match(Set dst (SubI dst src));
11121 effect(KILL cr);
11122 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11123
11124 format %{ "subl $dst, $src\t# int" %}
11125 ins_encode %{
11126 __ subl($dst$$Register, $src$$Register);
11127 %}
11128 ins_pipe(ialu_reg_reg);
11129 %}
11130
11131 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11132 %{
11133 predicate(UseAPX);
11134 match(Set dst (SubI src1 src2));
11135 effect(KILL cr);
11136 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11137
11138 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11139 ins_encode %{
11140 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11141 %}
11142 ins_pipe(ialu_reg_reg);
11143 %}
11144
11145 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11146 %{
11147 predicate(UseAPX);
11148 match(Set dst (SubI src1 src2));
11149 effect(KILL cr);
11150 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11151
11152 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11153 ins_encode %{
11154 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11155 %}
11156 ins_pipe(ialu_reg_reg);
11157 %}
11158
11159 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11160 %{
11161 predicate(UseAPX);
11162 match(Set dst (SubI (LoadI src1) src2));
11163 effect(KILL cr);
11164 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11165
11166 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11167 ins_encode %{
11168 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11169 %}
11170 ins_pipe(ialu_reg_reg);
11171 %}
11172
11173 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11174 %{
11175 predicate(!UseAPX);
11176 match(Set dst (SubI dst (LoadI src)));
11177 effect(KILL cr);
11178 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11179
11180 ins_cost(150);
11181 format %{ "subl $dst, $src\t# int" %}
11182 ins_encode %{
11183 __ subl($dst$$Register, $src$$Address);
11184 %}
11185 ins_pipe(ialu_reg_mem);
11186 %}
11187
11188 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11189 %{
11190 predicate(UseAPX);
11191 match(Set dst (SubI src1 (LoadI src2)));
11192 effect(KILL cr);
11193 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11194
11195 ins_cost(150);
11196 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11197 ins_encode %{
11198 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11199 %}
11200 ins_pipe(ialu_reg_mem);
11201 %}
11202
11203 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11204 %{
11205 predicate(UseAPX);
11206 match(Set dst (SubI (LoadI src1) src2));
11207 effect(KILL cr);
11208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11209
11210 ins_cost(150);
11211 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11212 ins_encode %{
11213 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11214 %}
11215 ins_pipe(ialu_reg_mem);
11216 %}
11217
11218 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11219 %{
11220 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11221 effect(KILL cr);
11222 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11223
11224 ins_cost(150);
11225 format %{ "subl $dst, $src\t# int" %}
11226 ins_encode %{
11227 __ subl($dst$$Address, $src$$Register);
11228 %}
11229 ins_pipe(ialu_mem_reg);
11230 %}
11231
11232 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11233 %{
11234 predicate(!UseAPX);
11235 match(Set dst (SubL dst src));
11236 effect(KILL cr);
11237 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11238
11239 format %{ "subq $dst, $src\t# long" %}
11240 ins_encode %{
11241 __ subq($dst$$Register, $src$$Register);
11242 %}
11243 ins_pipe(ialu_reg_reg);
11244 %}
11245
11246 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11247 %{
11248 predicate(UseAPX);
11249 match(Set dst (SubL src1 src2));
11250 effect(KILL cr);
11251 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11252
11253 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11254 ins_encode %{
11255 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11256 %}
11257 ins_pipe(ialu_reg_reg);
11258 %}
11259
11260 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11261 %{
11262 predicate(UseAPX);
11263 match(Set dst (SubL src1 src2));
11264 effect(KILL cr);
11265 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11266
11267 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11268 ins_encode %{
11269 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11270 %}
11271 ins_pipe(ialu_reg_reg);
11272 %}
11273
11274 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11275 %{
11276 predicate(UseAPX);
11277 match(Set dst (SubL (LoadL src1) src2));
11278 effect(KILL cr);
11279 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11280
11281 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11282 ins_encode %{
11283 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11284 %}
11285 ins_pipe(ialu_reg_reg);
11286 %}
11287
11288 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11289 %{
11290 predicate(!UseAPX);
11291 match(Set dst (SubL dst (LoadL src)));
11292 effect(KILL cr);
11293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11294
11295 ins_cost(150);
11296 format %{ "subq $dst, $src\t# long" %}
11297 ins_encode %{
11298 __ subq($dst$$Register, $src$$Address);
11299 %}
11300 ins_pipe(ialu_reg_mem);
11301 %}
11302
11303 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11304 %{
11305 predicate(UseAPX);
11306 match(Set dst (SubL src1 (LoadL src2)));
11307 effect(KILL cr);
11308 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11309
11310 ins_cost(150);
11311 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11312 ins_encode %{
11313 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11314 %}
11315 ins_pipe(ialu_reg_mem);
11316 %}
11317
11318 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11319 %{
11320 predicate(UseAPX);
11321 match(Set dst (SubL (LoadL src1) src2));
11322 effect(KILL cr);
11323 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11324
11325 ins_cost(150);
11326 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11327 ins_encode %{
11328 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11329 %}
11330 ins_pipe(ialu_reg_mem);
11331 %}
11332
11333 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11334 %{
11335 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11336 effect(KILL cr);
11337 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11338
11339 ins_cost(150);
11340 format %{ "subq $dst, $src\t# long" %}
11341 ins_encode %{
11342 __ subq($dst$$Address, $src$$Register);
11343 %}
11344 ins_pipe(ialu_mem_reg);
11345 %}
11346
11347 // Subtract from a pointer
11348 // XXX hmpf???
11349 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11350 %{
11351 match(Set dst (AddP dst (SubI zero src)));
11352 effect(KILL cr);
11353
11354 format %{ "subq $dst, $src\t# ptr - int" %}
11355 ins_encode %{
11356 __ subq($dst$$Register, $src$$Register);
11357 %}
11358 ins_pipe(ialu_reg_reg);
11359 %}
11360
11361 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11362 %{
11363 predicate(!UseAPX);
11364 match(Set dst (SubI zero dst));
11365 effect(KILL cr);
11366 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11367
11368 format %{ "negl $dst\t# int" %}
11369 ins_encode %{
11370 __ negl($dst$$Register);
11371 %}
11372 ins_pipe(ialu_reg);
11373 %}
11374
11375 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11376 %{
11377 predicate(UseAPX);
11378 match(Set dst (SubI zero src));
11379 effect(KILL cr);
11380 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11381
11382 format %{ "enegl $dst, $src\t# int ndd" %}
11383 ins_encode %{
11384 __ enegl($dst$$Register, $src$$Register, false);
11385 %}
11386 ins_pipe(ialu_reg);
11387 %}
11388
11389 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11390 %{
11391 predicate(!UseAPX);
11392 match(Set dst (NegI dst));
11393 effect(KILL cr);
11394 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11395
11396 format %{ "negl $dst\t# int" %}
11397 ins_encode %{
11398 __ negl($dst$$Register);
11399 %}
11400 ins_pipe(ialu_reg);
11401 %}
11402
11403 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11404 %{
11405 predicate(UseAPX);
11406 match(Set dst (NegI src));
11407 effect(KILL cr);
11408 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11409
11410 format %{ "enegl $dst, $src\t# int ndd" %}
11411 ins_encode %{
11412 __ enegl($dst$$Register, $src$$Register, false);
11413 %}
11414 ins_pipe(ialu_reg);
11415 %}
11416
11417 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11418 %{
11419 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11420 effect(KILL cr);
11421 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11422
11423 format %{ "negl $dst\t# int" %}
11424 ins_encode %{
11425 __ negl($dst$$Address);
11426 %}
11427 ins_pipe(ialu_reg);
11428 %}
11429
11430 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11431 %{
11432 predicate(!UseAPX);
11433 match(Set dst (SubL zero dst));
11434 effect(KILL cr);
11435 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11436
11437 format %{ "negq $dst\t# long" %}
11438 ins_encode %{
11439 __ negq($dst$$Register);
11440 %}
11441 ins_pipe(ialu_reg);
11442 %}
11443
11444 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11445 %{
11446 predicate(UseAPX);
11447 match(Set dst (SubL zero src));
11448 effect(KILL cr);
11449 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11450
11451 format %{ "enegq $dst, $src\t# long ndd" %}
11452 ins_encode %{
11453 __ enegq($dst$$Register, $src$$Register, false);
11454 %}
11455 ins_pipe(ialu_reg);
11456 %}
11457
11458 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11459 %{
11460 predicate(!UseAPX);
11461 match(Set dst (NegL dst));
11462 effect(KILL cr);
11463 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11464
11465 format %{ "negq $dst\t# int" %}
11466 ins_encode %{
11467 __ negq($dst$$Register);
11468 %}
11469 ins_pipe(ialu_reg);
11470 %}
11471
11472 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11473 %{
11474 predicate(UseAPX);
11475 match(Set dst (NegL src));
11476 effect(KILL cr);
11477 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11478
11479 format %{ "enegq $dst, $src\t# long ndd" %}
11480 ins_encode %{
11481 __ enegq($dst$$Register, $src$$Register, false);
11482 %}
11483 ins_pipe(ialu_reg);
11484 %}
11485
11486 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11487 %{
11488 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11489 effect(KILL cr);
11490 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11491
11492 format %{ "negq $dst\t# long" %}
11493 ins_encode %{
11494 __ negq($dst$$Address);
11495 %}
11496 ins_pipe(ialu_reg);
11497 %}
11498
11499 //----------Multiplication/Division Instructions-------------------------------
11500 // Integer Multiplication Instructions
11501 // Multiply Register
11502
11503 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11504 %{
11505 predicate(!UseAPX);
11506 match(Set dst (MulI dst src));
11507 effect(KILL cr);
11508
11509 ins_cost(300);
11510 format %{ "imull $dst, $src\t# int" %}
11511 ins_encode %{
11512 __ imull($dst$$Register, $src$$Register);
11513 %}
11514 ins_pipe(ialu_reg_reg_alu0);
11515 %}
11516
11517 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11518 %{
11519 predicate(UseAPX);
11520 match(Set dst (MulI src1 src2));
11521 effect(KILL cr);
11522 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11523
11524 ins_cost(300);
11525 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11526 ins_encode %{
11527 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11528 %}
11529 ins_pipe(ialu_reg_reg_alu0);
11530 %}
11531
11532 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11533 %{
11534 match(Set dst (MulI src imm));
11535 effect(KILL cr);
11536
11537 ins_cost(300);
11538 format %{ "imull $dst, $src, $imm\t# int" %}
11539 ins_encode %{
11540 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11541 %}
11542 ins_pipe(ialu_reg_reg_alu0);
11543 %}
11544
11545 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11546 %{
11547 predicate(!UseAPX);
11548 match(Set dst (MulI dst (LoadI src)));
11549 effect(KILL cr);
11550
11551 ins_cost(350);
11552 format %{ "imull $dst, $src\t# int" %}
11553 ins_encode %{
11554 __ imull($dst$$Register, $src$$Address);
11555 %}
11556 ins_pipe(ialu_reg_mem_alu0);
11557 %}
11558
11559 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11560 %{
11561 predicate(UseAPX);
11562 match(Set dst (MulI src1 (LoadI src2)));
11563 effect(KILL cr);
11564 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11565
11566 ins_cost(350);
11567 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11568 ins_encode %{
11569 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11570 %}
11571 ins_pipe(ialu_reg_mem_alu0);
11572 %}
11573
11574 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11575 %{
11576 match(Set dst (MulI (LoadI src) imm));
11577 effect(KILL cr);
11578
11579 ins_cost(300);
11580 format %{ "imull $dst, $src, $imm\t# int" %}
11581 ins_encode %{
11582 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11583 %}
11584 ins_pipe(ialu_reg_mem_alu0);
11585 %}
11586
11587 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11588 %{
11589 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11590 effect(KILL cr, KILL src2);
11591
11592 expand %{ mulI_rReg(dst, src1, cr);
11593 mulI_rReg(src2, src3, cr);
11594 addI_rReg(dst, src2, cr); %}
11595 %}
11596
11597 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11598 %{
11599 predicate(!UseAPX);
11600 match(Set dst (MulL dst src));
11601 effect(KILL cr);
11602
11603 ins_cost(300);
11604 format %{ "imulq $dst, $src\t# long" %}
11605 ins_encode %{
11606 __ imulq($dst$$Register, $src$$Register);
11607 %}
11608 ins_pipe(ialu_reg_reg_alu0);
11609 %}
11610
11611 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11612 %{
11613 predicate(UseAPX);
11614 match(Set dst (MulL src1 src2));
11615 effect(KILL cr);
11616 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11617
11618 ins_cost(300);
11619 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11620 ins_encode %{
11621 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11622 %}
11623 ins_pipe(ialu_reg_reg_alu0);
11624 %}
11625
11626 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11627 %{
11628 match(Set dst (MulL src imm));
11629 effect(KILL cr);
11630
11631 ins_cost(300);
11632 format %{ "imulq $dst, $src, $imm\t# long" %}
11633 ins_encode %{
11634 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11635 %}
11636 ins_pipe(ialu_reg_reg_alu0);
11637 %}
11638
11639 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11640 %{
11641 predicate(!UseAPX);
11642 match(Set dst (MulL dst (LoadL src)));
11643 effect(KILL cr);
11644
11645 ins_cost(350);
11646 format %{ "imulq $dst, $src\t# long" %}
11647 ins_encode %{
11648 __ imulq($dst$$Register, $src$$Address);
11649 %}
11650 ins_pipe(ialu_reg_mem_alu0);
11651 %}
11652
11653 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11654 %{
11655 predicate(UseAPX);
11656 match(Set dst (MulL src1 (LoadL src2)));
11657 effect(KILL cr);
11658 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11659
11660 ins_cost(350);
11661 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11662 ins_encode %{
11663 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11664 %}
11665 ins_pipe(ialu_reg_mem_alu0);
11666 %}
11667
11668 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11669 %{
11670 match(Set dst (MulL (LoadL src) imm));
11671 effect(KILL cr);
11672
11673 ins_cost(300);
11674 format %{ "imulq $dst, $src, $imm\t# long" %}
11675 ins_encode %{
11676 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11677 %}
11678 ins_pipe(ialu_reg_mem_alu0);
11679 %}
11680
11681 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11682 %{
11683 match(Set dst (MulHiL src rax));
11684 effect(USE_KILL rax, KILL cr);
11685
11686 ins_cost(300);
11687 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11688 ins_encode %{
11689 __ imulq($src$$Register);
11690 %}
11691 ins_pipe(ialu_reg_reg_alu0);
11692 %}
11693
11694 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11695 %{
11696 match(Set dst (UMulHiL src rax));
11697 effect(USE_KILL rax, KILL cr);
11698
11699 ins_cost(300);
11700 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11701 ins_encode %{
11702 __ mulq($src$$Register);
11703 %}
11704 ins_pipe(ialu_reg_reg_alu0);
11705 %}
11706
11707 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11708 rFlagsReg cr)
11709 %{
11710 match(Set rax (DivI rax div));
11711 effect(KILL rdx, KILL cr);
11712
11713 ins_cost(30*100+10*100); // XXX
11714 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11715 "jne,s normal\n\t"
11716 "xorl rdx, rdx\n\t"
11717 "cmpl $div, -1\n\t"
11718 "je,s done\n"
11719 "normal: cdql\n\t"
11720 "idivl $div\n"
11721 "done:" %}
11722 ins_encode(cdql_enc(div));
11723 ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725
11726 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11727 rFlagsReg cr)
11728 %{
11729 match(Set rax (DivL rax div));
11730 effect(KILL rdx, KILL cr);
11731
11732 ins_cost(30*100+10*100); // XXX
11733 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11734 "cmpq rax, rdx\n\t"
11735 "jne,s normal\n\t"
11736 "xorl rdx, rdx\n\t"
11737 "cmpq $div, -1\n\t"
11738 "je,s done\n"
11739 "normal: cdqq\n\t"
11740 "idivq $div\n"
11741 "done:" %}
11742 ins_encode(cdqq_enc(div));
11743 ins_pipe(ialu_reg_reg_alu0);
11744 %}
11745
11746 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11747 %{
11748 match(Set rax (UDivI rax div));
11749 effect(KILL rdx, KILL cr);
11750
11751 ins_cost(300);
11752 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11753 ins_encode %{
11754 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11755 %}
11756 ins_pipe(ialu_reg_reg_alu0);
11757 %}
11758
11759 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11760 %{
11761 match(Set rax (UDivL rax div));
11762 effect(KILL rdx, KILL cr);
11763
11764 ins_cost(300);
11765 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11766 ins_encode %{
11767 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11768 %}
11769 ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771
11772 // Integer DIVMOD with Register, both quotient and mod results
11773 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11774 rFlagsReg cr)
11775 %{
11776 match(DivModI rax div);
11777 effect(KILL cr);
11778
11779 ins_cost(30*100+10*100); // XXX
11780 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11781 "jne,s normal\n\t"
11782 "xorl rdx, rdx\n\t"
11783 "cmpl $div, -1\n\t"
11784 "je,s done\n"
11785 "normal: cdql\n\t"
11786 "idivl $div\n"
11787 "done:" %}
11788 ins_encode(cdql_enc(div));
11789 ins_pipe(pipe_slow);
11790 %}
11791
11792 // Long DIVMOD with Register, both quotient and mod results
11793 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11794 rFlagsReg cr)
11795 %{
11796 match(DivModL rax div);
11797 effect(KILL cr);
11798
11799 ins_cost(30*100+10*100); // XXX
11800 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11801 "cmpq rax, rdx\n\t"
11802 "jne,s normal\n\t"
11803 "xorl rdx, rdx\n\t"
11804 "cmpq $div, -1\n\t"
11805 "je,s done\n"
11806 "normal: cdqq\n\t"
11807 "idivq $div\n"
11808 "done:" %}
11809 ins_encode(cdqq_enc(div));
11810 ins_pipe(pipe_slow);
11811 %}
11812
11813 // Unsigned integer DIVMOD with Register, both quotient and mod results
11814 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11815 no_rax_rdx_RegI div, rFlagsReg cr)
11816 %{
11817 match(UDivModI rax div);
11818 effect(TEMP tmp, KILL cr);
11819
11820 ins_cost(300);
11821 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11822 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11823 %}
11824 ins_encode %{
11825 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11826 %}
11827 ins_pipe(pipe_slow);
11828 %}
11829
11830 // Unsigned long DIVMOD with Register, both quotient and mod results
11831 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11832 no_rax_rdx_RegL div, rFlagsReg cr)
11833 %{
11834 match(UDivModL rax div);
11835 effect(TEMP tmp, KILL cr);
11836
11837 ins_cost(300);
11838 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11839 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11840 %}
11841 ins_encode %{
11842 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11843 %}
11844 ins_pipe(pipe_slow);
11845 %}
11846
11847 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11848 rFlagsReg cr)
11849 %{
11850 match(Set rdx (ModI rax div));
11851 effect(KILL rax, KILL cr);
11852
11853 ins_cost(300); // XXX
11854 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11855 "jne,s normal\n\t"
11856 "xorl rdx, rdx\n\t"
11857 "cmpl $div, -1\n\t"
11858 "je,s done\n"
11859 "normal: cdql\n\t"
11860 "idivl $div\n"
11861 "done:" %}
11862 ins_encode(cdql_enc(div));
11863 ins_pipe(ialu_reg_reg_alu0);
11864 %}
11865
11866 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11867 rFlagsReg cr)
11868 %{
11869 match(Set rdx (ModL rax div));
11870 effect(KILL rax, KILL cr);
11871
11872 ins_cost(300); // XXX
11873 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11874 "cmpq rax, rdx\n\t"
11875 "jne,s normal\n\t"
11876 "xorl rdx, rdx\n\t"
11877 "cmpq $div, -1\n\t"
11878 "je,s done\n"
11879 "normal: cdqq\n\t"
11880 "idivq $div\n"
11881 "done:" %}
11882 ins_encode(cdqq_enc(div));
11883 ins_pipe(ialu_reg_reg_alu0);
11884 %}
11885
11886 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11887 %{
11888 match(Set rdx (UModI rax div));
11889 effect(KILL rax, KILL cr);
11890
11891 ins_cost(300);
11892 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11893 ins_encode %{
11894 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11895 %}
11896 ins_pipe(ialu_reg_reg_alu0);
11897 %}
11898
11899 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11900 %{
11901 match(Set rdx (UModL rax div));
11902 effect(KILL rax, KILL cr);
11903
11904 ins_cost(300);
11905 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11906 ins_encode %{
11907 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11908 %}
11909 ins_pipe(ialu_reg_reg_alu0);
11910 %}
11911
11912 // Integer Shift Instructions
11913 // Shift Left by one, two, three
11914 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11915 %{
11916 predicate(!UseAPX);
11917 match(Set dst (LShiftI dst shift));
11918 effect(KILL cr);
11919
11920 format %{ "sall $dst, $shift" %}
11921 ins_encode %{
11922 __ sall($dst$$Register, $shift$$constant);
11923 %}
11924 ins_pipe(ialu_reg);
11925 %}
11926
11927 // Shift Left by one, two, three
11928 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11929 %{
11930 predicate(UseAPX);
11931 match(Set dst (LShiftI src shift));
11932 effect(KILL cr);
11933 flag(PD::Flag_ndd_demotable_opr1);
11934
11935 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11936 ins_encode %{
11937 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11938 %}
11939 ins_pipe(ialu_reg);
11940 %}
11941
11942 // Shift Left by 8-bit immediate
11943 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11944 %{
11945 predicate(!UseAPX);
11946 match(Set dst (LShiftI dst shift));
11947 effect(KILL cr);
11948
11949 format %{ "sall $dst, $shift" %}
11950 ins_encode %{
11951 __ sall($dst$$Register, $shift$$constant);
11952 %}
11953 ins_pipe(ialu_reg);
11954 %}
11955
11956 // Shift Left by 8-bit immediate
11957 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11958 %{
11959 predicate(UseAPX);
11960 match(Set dst (LShiftI src shift));
11961 effect(KILL cr);
11962 flag(PD::Flag_ndd_demotable_opr1);
11963
11964 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11965 ins_encode %{
11966 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11967 %}
11968 ins_pipe(ialu_reg);
11969 %}
11970
11971 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11972 %{
11973 predicate(UseAPX);
11974 match(Set dst (LShiftI (LoadI src) shift));
11975 effect(KILL cr);
11976
11977 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11978 ins_encode %{
11979 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11980 %}
11981 ins_pipe(ialu_reg);
11982 %}
11983
11984 // Shift Left by 8-bit immediate
11985 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11986 %{
11987 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11988 effect(KILL cr);
11989
11990 format %{ "sall $dst, $shift" %}
11991 ins_encode %{
11992 __ sall($dst$$Address, $shift$$constant);
11993 %}
11994 ins_pipe(ialu_mem_imm);
11995 %}
11996
11997 // Shift Left by variable
11998 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11999 %{
12000 predicate(!VM_Version::supports_bmi2());
12001 match(Set dst (LShiftI dst shift));
12002 effect(KILL cr);
12003
12004 format %{ "sall $dst, $shift" %}
12005 ins_encode %{
12006 __ sall($dst$$Register);
12007 %}
12008 ins_pipe(ialu_reg_reg);
12009 %}
12010
12011 // Shift Left by variable
12012 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12013 %{
12014 predicate(!VM_Version::supports_bmi2());
12015 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12016 effect(KILL cr);
12017
12018 format %{ "sall $dst, $shift" %}
12019 ins_encode %{
12020 __ sall($dst$$Address);
12021 %}
12022 ins_pipe(ialu_mem_reg);
12023 %}
12024
12025 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12026 %{
12027 predicate(VM_Version::supports_bmi2());
12028 match(Set dst (LShiftI src shift));
12029
12030 format %{ "shlxl $dst, $src, $shift" %}
12031 ins_encode %{
12032 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12033 %}
12034 ins_pipe(ialu_reg_reg);
12035 %}
12036
12037 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12038 %{
12039 predicate(VM_Version::supports_bmi2());
12040 match(Set dst (LShiftI (LoadI src) shift));
12041 ins_cost(175);
12042 format %{ "shlxl $dst, $src, $shift" %}
12043 ins_encode %{
12044 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12045 %}
12046 ins_pipe(ialu_reg_mem);
12047 %}
12048
12049 // Arithmetic Shift Right by 8-bit immediate
12050 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12051 %{
12052 predicate(!UseAPX);
12053 match(Set dst (RShiftI dst shift));
12054 effect(KILL cr);
12055
12056 format %{ "sarl $dst, $shift" %}
12057 ins_encode %{
12058 __ sarl($dst$$Register, $shift$$constant);
12059 %}
12060 ins_pipe(ialu_mem_imm);
12061 %}
12062
12063 // Arithmetic Shift Right by 8-bit immediate
12064 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12065 %{
12066 predicate(UseAPX);
12067 match(Set dst (RShiftI src shift));
12068 effect(KILL cr);
12069 flag(PD::Flag_ndd_demotable_opr1);
12070
12071 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12072 ins_encode %{
12073 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12074 %}
12075 ins_pipe(ialu_mem_imm);
12076 %}
12077
12078 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12079 %{
12080 predicate(UseAPX);
12081 match(Set dst (RShiftI (LoadI src) shift));
12082 effect(KILL cr);
12083
12084 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12085 ins_encode %{
12086 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12087 %}
12088 ins_pipe(ialu_mem_imm);
12089 %}
12090
12091 // Arithmetic Shift Right by 8-bit immediate
12092 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12093 %{
12094 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12095 effect(KILL cr);
12096
12097 format %{ "sarl $dst, $shift" %}
12098 ins_encode %{
12099 __ sarl($dst$$Address, $shift$$constant);
12100 %}
12101 ins_pipe(ialu_mem_imm);
12102 %}
12103
12104 // Arithmetic Shift Right by variable
12105 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12106 %{
12107 predicate(!VM_Version::supports_bmi2());
12108 match(Set dst (RShiftI dst shift));
12109 effect(KILL cr);
12110
12111 format %{ "sarl $dst, $shift" %}
12112 ins_encode %{
12113 __ sarl($dst$$Register);
12114 %}
12115 ins_pipe(ialu_reg_reg);
12116 %}
12117
12118 // Arithmetic Shift Right by variable
12119 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12120 %{
12121 predicate(!VM_Version::supports_bmi2());
12122 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12123 effect(KILL cr);
12124
12125 format %{ "sarl $dst, $shift" %}
12126 ins_encode %{
12127 __ sarl($dst$$Address);
12128 %}
12129 ins_pipe(ialu_mem_reg);
12130 %}
12131
12132 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12133 %{
12134 predicate(VM_Version::supports_bmi2());
12135 match(Set dst (RShiftI src shift));
12136
12137 format %{ "sarxl $dst, $src, $shift" %}
12138 ins_encode %{
12139 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12140 %}
12141 ins_pipe(ialu_reg_reg);
12142 %}
12143
12144 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12145 %{
12146 predicate(VM_Version::supports_bmi2());
12147 match(Set dst (RShiftI (LoadI src) shift));
12148 ins_cost(175);
12149 format %{ "sarxl $dst, $src, $shift" %}
12150 ins_encode %{
12151 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12152 %}
12153 ins_pipe(ialu_reg_mem);
12154 %}
12155
12156 // Logical Shift Right by 8-bit immediate
12157 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12158 %{
12159 predicate(!UseAPX);
12160 match(Set dst (URShiftI dst shift));
12161 effect(KILL cr);
12162
12163 format %{ "shrl $dst, $shift" %}
12164 ins_encode %{
12165 __ shrl($dst$$Register, $shift$$constant);
12166 %}
12167 ins_pipe(ialu_reg);
12168 %}
12169
12170 // Logical Shift Right by 8-bit immediate
12171 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12172 %{
12173 predicate(UseAPX);
12174 match(Set dst (URShiftI src shift));
12175 effect(KILL cr);
12176 flag(PD::Flag_ndd_demotable_opr1);
12177
12178 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12179 ins_encode %{
12180 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12181 %}
12182 ins_pipe(ialu_reg);
12183 %}
12184
12185 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12186 %{
12187 predicate(UseAPX);
12188 match(Set dst (URShiftI (LoadI src) shift));
12189 effect(KILL cr);
12190
12191 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12192 ins_encode %{
12193 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12194 %}
12195 ins_pipe(ialu_reg);
12196 %}
12197
12198 // Logical Shift Right by 8-bit immediate
12199 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12200 %{
12201 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12202 effect(KILL cr);
12203
12204 format %{ "shrl $dst, $shift" %}
12205 ins_encode %{
12206 __ shrl($dst$$Address, $shift$$constant);
12207 %}
12208 ins_pipe(ialu_mem_imm);
12209 %}
12210
12211 // Logical Shift Right by variable
12212 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12213 %{
12214 predicate(!VM_Version::supports_bmi2());
12215 match(Set dst (URShiftI dst shift));
12216 effect(KILL cr);
12217
12218 format %{ "shrl $dst, $shift" %}
12219 ins_encode %{
12220 __ shrl($dst$$Register);
12221 %}
12222 ins_pipe(ialu_reg_reg);
12223 %}
12224
12225 // Logical Shift Right by variable
12226 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12227 %{
12228 predicate(!VM_Version::supports_bmi2());
12229 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12230 effect(KILL cr);
12231
12232 format %{ "shrl $dst, $shift" %}
12233 ins_encode %{
12234 __ shrl($dst$$Address);
12235 %}
12236 ins_pipe(ialu_mem_reg);
12237 %}
12238
12239 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12240 %{
12241 predicate(VM_Version::supports_bmi2());
12242 match(Set dst (URShiftI src shift));
12243
12244 format %{ "shrxl $dst, $src, $shift" %}
12245 ins_encode %{
12246 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12247 %}
12248 ins_pipe(ialu_reg_reg);
12249 %}
12250
12251 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12252 %{
12253 predicate(VM_Version::supports_bmi2());
12254 match(Set dst (URShiftI (LoadI src) shift));
12255 ins_cost(175);
12256 format %{ "shrxl $dst, $src, $shift" %}
12257 ins_encode %{
12258 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12259 %}
12260 ins_pipe(ialu_reg_mem);
12261 %}
12262
12263 // Long Shift Instructions
12264 // Shift Left by one, two, three
12265 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12266 %{
12267 predicate(!UseAPX);
12268 match(Set dst (LShiftL dst shift));
12269 effect(KILL cr);
12270
12271 format %{ "salq $dst, $shift" %}
12272 ins_encode %{
12273 __ salq($dst$$Register, $shift$$constant);
12274 %}
12275 ins_pipe(ialu_reg);
12276 %}
12277
12278 // Shift Left by one, two, three
12279 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12280 %{
12281 predicate(UseAPX);
12282 match(Set dst (LShiftL src shift));
12283 effect(KILL cr);
12284 flag(PD::Flag_ndd_demotable_opr1);
12285
12286 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12287 ins_encode %{
12288 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12289 %}
12290 ins_pipe(ialu_reg);
12291 %}
12292
12293 // Shift Left by 8-bit immediate
12294 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12295 %{
12296 predicate(!UseAPX);
12297 match(Set dst (LShiftL dst shift));
12298 effect(KILL cr);
12299
12300 format %{ "salq $dst, $shift" %}
12301 ins_encode %{
12302 __ salq($dst$$Register, $shift$$constant);
12303 %}
12304 ins_pipe(ialu_reg);
12305 %}
12306
12307 // Shift Left by 8-bit immediate
12308 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12309 %{
12310 predicate(UseAPX);
12311 match(Set dst (LShiftL src shift));
12312 effect(KILL cr);
12313 flag(PD::Flag_ndd_demotable_opr1);
12314
12315 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12316 ins_encode %{
12317 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12318 %}
12319 ins_pipe(ialu_reg);
12320 %}
12321
12322 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12323 %{
12324 predicate(UseAPX);
12325 match(Set dst (LShiftL (LoadL src) shift));
12326 effect(KILL cr);
12327
12328 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12329 ins_encode %{
12330 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12331 %}
12332 ins_pipe(ialu_reg);
12333 %}
12334
12335 // Shift Left by 8-bit immediate
12336 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12337 %{
12338 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12339 effect(KILL cr);
12340
12341 format %{ "salq $dst, $shift" %}
12342 ins_encode %{
12343 __ salq($dst$$Address, $shift$$constant);
12344 %}
12345 ins_pipe(ialu_mem_imm);
12346 %}
12347
12348 // Shift Left by variable
12349 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12350 %{
12351 predicate(!VM_Version::supports_bmi2());
12352 match(Set dst (LShiftL dst shift));
12353 effect(KILL cr);
12354
12355 format %{ "salq $dst, $shift" %}
12356 ins_encode %{
12357 __ salq($dst$$Register);
12358 %}
12359 ins_pipe(ialu_reg_reg);
12360 %}
12361
12362 // Shift Left by variable
12363 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12364 %{
12365 predicate(!VM_Version::supports_bmi2());
12366 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12367 effect(KILL cr);
12368
12369 format %{ "salq $dst, $shift" %}
12370 ins_encode %{
12371 __ salq($dst$$Address);
12372 %}
12373 ins_pipe(ialu_mem_reg);
12374 %}
12375
12376 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12377 %{
12378 predicate(VM_Version::supports_bmi2());
12379 match(Set dst (LShiftL src shift));
12380
12381 format %{ "shlxq $dst, $src, $shift" %}
12382 ins_encode %{
12383 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12384 %}
12385 ins_pipe(ialu_reg_reg);
12386 %}
12387
12388 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12389 %{
12390 predicate(VM_Version::supports_bmi2());
12391 match(Set dst (LShiftL (LoadL src) shift));
12392 ins_cost(175);
12393 format %{ "shlxq $dst, $src, $shift" %}
12394 ins_encode %{
12395 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12396 %}
12397 ins_pipe(ialu_reg_mem);
12398 %}
12399
12400 // Arithmetic Shift Right by 8-bit immediate
12401 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12402 %{
12403 predicate(!UseAPX);
12404 match(Set dst (RShiftL dst shift));
12405 effect(KILL cr);
12406
12407 format %{ "sarq $dst, $shift" %}
12408 ins_encode %{
12409 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12410 %}
12411 ins_pipe(ialu_mem_imm);
12412 %}
12413
12414 // Arithmetic Shift Right by 8-bit immediate
12415 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12416 %{
12417 predicate(UseAPX);
12418 match(Set dst (RShiftL src shift));
12419 effect(KILL cr);
12420 flag(PD::Flag_ndd_demotable_opr1);
12421
12422 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12423 ins_encode %{
12424 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12425 %}
12426 ins_pipe(ialu_mem_imm);
12427 %}
12428
12429 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12430 %{
12431 predicate(UseAPX);
12432 match(Set dst (RShiftL (LoadL src) shift));
12433 effect(KILL cr);
12434
12435 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12436 ins_encode %{
12437 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12438 %}
12439 ins_pipe(ialu_mem_imm);
12440 %}
12441
12442 // Arithmetic Shift Right by 8-bit immediate
12443 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12444 %{
12445 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12446 effect(KILL cr);
12447
12448 format %{ "sarq $dst, $shift" %}
12449 ins_encode %{
12450 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12451 %}
12452 ins_pipe(ialu_mem_imm);
12453 %}
12454
12455 // Arithmetic Shift Right by variable
12456 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12457 %{
12458 predicate(!VM_Version::supports_bmi2());
12459 match(Set dst (RShiftL dst shift));
12460 effect(KILL cr);
12461
12462 format %{ "sarq $dst, $shift" %}
12463 ins_encode %{
12464 __ sarq($dst$$Register);
12465 %}
12466 ins_pipe(ialu_reg_reg);
12467 %}
12468
12469 // Arithmetic Shift Right by variable
12470 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12471 %{
12472 predicate(!VM_Version::supports_bmi2());
12473 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12474 effect(KILL cr);
12475
12476 format %{ "sarq $dst, $shift" %}
12477 ins_encode %{
12478 __ sarq($dst$$Address);
12479 %}
12480 ins_pipe(ialu_mem_reg);
12481 %}
12482
12483 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12484 %{
12485 predicate(VM_Version::supports_bmi2());
12486 match(Set dst (RShiftL src shift));
12487
12488 format %{ "sarxq $dst, $src, $shift" %}
12489 ins_encode %{
12490 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12491 %}
12492 ins_pipe(ialu_reg_reg);
12493 %}
12494
12495 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12496 %{
12497 predicate(VM_Version::supports_bmi2());
12498 match(Set dst (RShiftL (LoadL src) shift));
12499 ins_cost(175);
12500 format %{ "sarxq $dst, $src, $shift" %}
12501 ins_encode %{
12502 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12503 %}
12504 ins_pipe(ialu_reg_mem);
12505 %}
12506
12507 // Logical Shift Right by 8-bit immediate
12508 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12509 %{
12510 predicate(!UseAPX);
12511 match(Set dst (URShiftL dst shift));
12512 effect(KILL cr);
12513
12514 format %{ "shrq $dst, $shift" %}
12515 ins_encode %{
12516 __ shrq($dst$$Register, $shift$$constant);
12517 %}
12518 ins_pipe(ialu_reg);
12519 %}
12520
12521 // Logical Shift Right by 8-bit immediate
12522 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12523 %{
12524 predicate(UseAPX);
12525 match(Set dst (URShiftL src shift));
12526 effect(KILL cr);
12527 flag(PD::Flag_ndd_demotable_opr1);
12528
12529 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12530 ins_encode %{
12531 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12532 %}
12533 ins_pipe(ialu_reg);
12534 %}
12535
12536 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12537 %{
12538 predicate(UseAPX);
12539 match(Set dst (URShiftL (LoadL src) shift));
12540 effect(KILL cr);
12541
12542 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12543 ins_encode %{
12544 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12545 %}
12546 ins_pipe(ialu_reg);
12547 %}
12548
12549 // Logical Shift Right by 8-bit immediate
12550 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12551 %{
12552 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12553 effect(KILL cr);
12554
12555 format %{ "shrq $dst, $shift" %}
12556 ins_encode %{
12557 __ shrq($dst$$Address, $shift$$constant);
12558 %}
12559 ins_pipe(ialu_mem_imm);
12560 %}
12561
12562 // Logical Shift Right by variable
12563 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12564 %{
12565 predicate(!VM_Version::supports_bmi2());
12566 match(Set dst (URShiftL dst shift));
12567 effect(KILL cr);
12568
12569 format %{ "shrq $dst, $shift" %}
12570 ins_encode %{
12571 __ shrq($dst$$Register);
12572 %}
12573 ins_pipe(ialu_reg_reg);
12574 %}
12575
12576 // Logical Shift Right by variable
12577 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12578 %{
12579 predicate(!VM_Version::supports_bmi2());
12580 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12581 effect(KILL cr);
12582
12583 format %{ "shrq $dst, $shift" %}
12584 ins_encode %{
12585 __ shrq($dst$$Address);
12586 %}
12587 ins_pipe(ialu_mem_reg);
12588 %}
12589
12590 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12591 %{
12592 predicate(VM_Version::supports_bmi2());
12593 match(Set dst (URShiftL src shift));
12594
12595 format %{ "shrxq $dst, $src, $shift" %}
12596 ins_encode %{
12597 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12598 %}
12599 ins_pipe(ialu_reg_reg);
12600 %}
12601
12602 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12603 %{
12604 predicate(VM_Version::supports_bmi2());
12605 match(Set dst (URShiftL (LoadL src) shift));
12606 ins_cost(175);
12607 format %{ "shrxq $dst, $src, $shift" %}
12608 ins_encode %{
12609 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12610 %}
12611 ins_pipe(ialu_reg_mem);
12612 %}
12613
12614 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12615 // This idiom is used by the compiler for the i2b bytecode.
12616 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12617 %{
12618 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12619
12620 format %{ "movsbl $dst, $src\t# i2b" %}
12621 ins_encode %{
12622 __ movsbl($dst$$Register, $src$$Register);
12623 %}
12624 ins_pipe(ialu_reg_reg);
12625 %}
12626
12627 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12628 // This idiom is used by the compiler the i2s bytecode.
12629 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12630 %{
12631 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12632
12633 format %{ "movswl $dst, $src\t# i2s" %}
12634 ins_encode %{
12635 __ movswl($dst$$Register, $src$$Register);
12636 %}
12637 ins_pipe(ialu_reg_reg);
12638 %}
12639
12640 // ROL/ROR instructions
12641
12642 // Rotate left by constant.
12643 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12644 %{
12645 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12646 match(Set dst (RotateLeft dst shift));
12647 effect(KILL cr);
12648 format %{ "roll $dst, $shift" %}
12649 ins_encode %{
12650 __ roll($dst$$Register, $shift$$constant);
12651 %}
12652 ins_pipe(ialu_reg);
12653 %}
12654
12655 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12656 %{
12657 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12658 match(Set dst (RotateLeft src shift));
12659 format %{ "rolxl $dst, $src, $shift" %}
12660 ins_encode %{
12661 int shift = 32 - ($shift$$constant & 31);
12662 __ rorxl($dst$$Register, $src$$Register, shift);
12663 %}
12664 ins_pipe(ialu_reg_reg);
12665 %}
12666
12667 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12668 %{
12669 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12670 match(Set dst (RotateLeft (LoadI src) shift));
12671 ins_cost(175);
12672 format %{ "rolxl $dst, $src, $shift" %}
12673 ins_encode %{
12674 int shift = 32 - ($shift$$constant & 31);
12675 __ rorxl($dst$$Register, $src$$Address, shift);
12676 %}
12677 ins_pipe(ialu_reg_mem);
12678 %}
12679
12680 // Rotate Left by variable
12681 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12682 %{
12683 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12684 match(Set dst (RotateLeft dst shift));
12685 effect(KILL cr);
12686 format %{ "roll $dst, $shift" %}
12687 ins_encode %{
12688 __ roll($dst$$Register);
12689 %}
12690 ins_pipe(ialu_reg_reg);
12691 %}
12692
12693 // Rotate Left by variable
12694 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12695 %{
12696 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12697 match(Set dst (RotateLeft src shift));
12698 effect(KILL cr);
12699 flag(PD::Flag_ndd_demotable_opr1);
12700
12701 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12702 ins_encode %{
12703 __ eroll($dst$$Register, $src$$Register, false);
12704 %}
12705 ins_pipe(ialu_reg_reg);
12706 %}
12707
12708 // Rotate Right by constant.
12709 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12710 %{
12711 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12712 match(Set dst (RotateRight dst shift));
12713 effect(KILL cr);
12714 format %{ "rorl $dst, $shift" %}
12715 ins_encode %{
12716 __ rorl($dst$$Register, $shift$$constant);
12717 %}
12718 ins_pipe(ialu_reg);
12719 %}
12720
12721 // Rotate Right by constant.
12722 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12723 %{
12724 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725 match(Set dst (RotateRight src shift));
12726 format %{ "rorxl $dst, $src, $shift" %}
12727 ins_encode %{
12728 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12729 %}
12730 ins_pipe(ialu_reg_reg);
12731 %}
12732
12733 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12734 %{
12735 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12736 match(Set dst (RotateRight (LoadI src) shift));
12737 ins_cost(175);
12738 format %{ "rorxl $dst, $src, $shift" %}
12739 ins_encode %{
12740 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12741 %}
12742 ins_pipe(ialu_reg_mem);
12743 %}
12744
12745 // Rotate Right by variable
12746 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12747 %{
12748 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12749 match(Set dst (RotateRight dst shift));
12750 effect(KILL cr);
12751 format %{ "rorl $dst, $shift" %}
12752 ins_encode %{
12753 __ rorl($dst$$Register);
12754 %}
12755 ins_pipe(ialu_reg_reg);
12756 %}
12757
12758 // Rotate Right by variable
12759 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12762 match(Set dst (RotateRight src shift));
12763 effect(KILL cr);
12764 flag(PD::Flag_ndd_demotable_opr1);
12765
12766 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12767 ins_encode %{
12768 __ erorl($dst$$Register, $src$$Register, false);
12769 %}
12770 ins_pipe(ialu_reg_reg);
12771 %}
12772
12773 // Rotate Left by constant.
12774 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12775 %{
12776 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12777 match(Set dst (RotateLeft dst shift));
12778 effect(KILL cr);
12779 format %{ "rolq $dst, $shift" %}
12780 ins_encode %{
12781 __ rolq($dst$$Register, $shift$$constant);
12782 %}
12783 ins_pipe(ialu_reg);
12784 %}
12785
12786 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12787 %{
12788 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12789 match(Set dst (RotateLeft src shift));
12790 format %{ "rolxq $dst, $src, $shift" %}
12791 ins_encode %{
12792 int shift = 64 - ($shift$$constant & 63);
12793 __ rorxq($dst$$Register, $src$$Register, shift);
12794 %}
12795 ins_pipe(ialu_reg_reg);
12796 %}
12797
12798 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12799 %{
12800 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12801 match(Set dst (RotateLeft (LoadL src) shift));
12802 ins_cost(175);
12803 format %{ "rolxq $dst, $src, $shift" %}
12804 ins_encode %{
12805 int shift = 64 - ($shift$$constant & 63);
12806 __ rorxq($dst$$Register, $src$$Address, shift);
12807 %}
12808 ins_pipe(ialu_reg_mem);
12809 %}
12810
12811 // Rotate Left by variable
12812 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12813 %{
12814 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12815 match(Set dst (RotateLeft dst shift));
12816 effect(KILL cr);
12817
12818 format %{ "rolq $dst, $shift" %}
12819 ins_encode %{
12820 __ rolq($dst$$Register);
12821 %}
12822 ins_pipe(ialu_reg_reg);
12823 %}
12824
12825 // Rotate Left by variable
12826 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12827 %{
12828 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12829 match(Set dst (RotateLeft src shift));
12830 effect(KILL cr);
12831 flag(PD::Flag_ndd_demotable_opr1);
12832
12833 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12834 ins_encode %{
12835 __ erolq($dst$$Register, $src$$Register, false);
12836 %}
12837 ins_pipe(ialu_reg_reg);
12838 %}
12839
12840 // Rotate Right by constant.
12841 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12842 %{
12843 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12844 match(Set dst (RotateRight dst shift));
12845 effect(KILL cr);
12846 format %{ "rorq $dst, $shift" %}
12847 ins_encode %{
12848 __ rorq($dst$$Register, $shift$$constant);
12849 %}
12850 ins_pipe(ialu_reg);
12851 %}
12852
12853 // Rotate Right by constant
12854 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12855 %{
12856 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12857 match(Set dst (RotateRight src shift));
12858 format %{ "rorxq $dst, $src, $shift" %}
12859 ins_encode %{
12860 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12861 %}
12862 ins_pipe(ialu_reg_reg);
12863 %}
12864
12865 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12866 %{
12867 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12868 match(Set dst (RotateRight (LoadL src) shift));
12869 ins_cost(175);
12870 format %{ "rorxq $dst, $src, $shift" %}
12871 ins_encode %{
12872 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12873 %}
12874 ins_pipe(ialu_reg_mem);
12875 %}
12876
12877 // Rotate Right by variable
12878 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12879 %{
12880 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12881 match(Set dst (RotateRight dst shift));
12882 effect(KILL cr);
12883 format %{ "rorq $dst, $shift" %}
12884 ins_encode %{
12885 __ rorq($dst$$Register);
12886 %}
12887 ins_pipe(ialu_reg_reg);
12888 %}
12889
12890 // Rotate Right by variable
12891 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12892 %{
12893 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12894 match(Set dst (RotateRight src shift));
12895 effect(KILL cr);
12896 flag(PD::Flag_ndd_demotable_opr1);
12897
12898 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12899 ins_encode %{
12900 __ erorq($dst$$Register, $src$$Register, false);
12901 %}
12902 ins_pipe(ialu_reg_reg);
12903 %}
12904
12905 //----------------------------- CompressBits/ExpandBits ------------------------
12906
12907 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12908 predicate(n->bottom_type()->isa_long());
12909 match(Set dst (CompressBits src mask));
12910 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12911 ins_encode %{
12912 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12913 %}
12914 ins_pipe( pipe_slow );
12915 %}
12916
12917 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12918 predicate(n->bottom_type()->isa_long());
12919 match(Set dst (ExpandBits src mask));
12920 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12921 ins_encode %{
12922 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12923 %}
12924 ins_pipe( pipe_slow );
12925 %}
12926
12927 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12928 predicate(n->bottom_type()->isa_long());
12929 match(Set dst (CompressBits src (LoadL mask)));
12930 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12931 ins_encode %{
12932 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12933 %}
12934 ins_pipe( pipe_slow );
12935 %}
12936
12937 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12938 predicate(n->bottom_type()->isa_long());
12939 match(Set dst (ExpandBits src (LoadL mask)));
12940 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12941 ins_encode %{
12942 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12943 %}
12944 ins_pipe( pipe_slow );
12945 %}
12946
12947
12948 // Logical Instructions
12949
12950 // Integer Logical Instructions
12951
12952 // And Instructions
12953 // And Register with Register
12954 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12955 %{
12956 predicate(!UseAPX);
12957 match(Set dst (AndI dst src));
12958 effect(KILL cr);
12959 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12960
12961 format %{ "andl $dst, $src\t# int" %}
12962 ins_encode %{
12963 __ andl($dst$$Register, $src$$Register);
12964 %}
12965 ins_pipe(ialu_reg_reg);
12966 %}
12967
12968 // And Register with Register using New Data Destination (NDD)
12969 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12970 %{
12971 predicate(UseAPX);
12972 match(Set dst (AndI src1 src2));
12973 effect(KILL cr);
12974 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12975
12976 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12977 ins_encode %{
12978 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12979
12980 %}
12981 ins_pipe(ialu_reg_reg);
12982 %}
12983
12984 // And Register with Immediate 255
12985 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12986 %{
12987 match(Set dst (AndI src mask));
12988
12989 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12990 ins_encode %{
12991 __ movzbl($dst$$Register, $src$$Register);
12992 %}
12993 ins_pipe(ialu_reg);
12994 %}
12995
12996 // And Register with Immediate 255 and promote to long
12997 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12998 %{
12999 match(Set dst (ConvI2L (AndI src mask)));
13000
13001 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13002 ins_encode %{
13003 __ movzbl($dst$$Register, $src$$Register);
13004 %}
13005 ins_pipe(ialu_reg);
13006 %}
13007
13008 // And Register with Immediate 65535
13009 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13010 %{
13011 match(Set dst (AndI src mask));
13012
13013 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13014 ins_encode %{
13015 __ movzwl($dst$$Register, $src$$Register);
13016 %}
13017 ins_pipe(ialu_reg);
13018 %}
13019
13020 // And Register with Immediate 65535 and promote to long
13021 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13022 %{
13023 match(Set dst (ConvI2L (AndI src mask)));
13024
13025 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13026 ins_encode %{
13027 __ movzwl($dst$$Register, $src$$Register);
13028 %}
13029 ins_pipe(ialu_reg);
13030 %}
13031
13032 // Can skip int2long conversions after AND with small bitmask
13033 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13034 %{
13035 predicate(VM_Version::supports_bmi2());
13036 ins_cost(125);
13037 effect(TEMP tmp, KILL cr);
13038 match(Set dst (ConvI2L (AndI src mask)));
13039 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13040 ins_encode %{
13041 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13042 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13043 %}
13044 ins_pipe(ialu_reg_reg);
13045 %}
13046
13047 // And Register with Immediate
13048 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13049 %{
13050 predicate(!UseAPX);
13051 match(Set dst (AndI dst src));
13052 effect(KILL cr);
13053 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13054
13055 format %{ "andl $dst, $src\t# int" %}
13056 ins_encode %{
13057 __ andl($dst$$Register, $src$$constant);
13058 %}
13059 ins_pipe(ialu_reg);
13060 %}
13061
13062 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13063 %{
13064 predicate(UseAPX);
13065 match(Set dst (AndI src1 src2));
13066 effect(KILL cr);
13067 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13068
13069 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13070 ins_encode %{
13071 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13072 %}
13073 ins_pipe(ialu_reg);
13074 %}
13075
13076 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13077 %{
13078 predicate(UseAPX);
13079 match(Set dst (AndI (LoadI src1) src2));
13080 effect(KILL cr);
13081 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13082
13083 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13084 ins_encode %{
13085 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13086 %}
13087 ins_pipe(ialu_reg);
13088 %}
13089
13090 // And Register with Memory
13091 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13092 %{
13093 predicate(!UseAPX);
13094 match(Set dst (AndI dst (LoadI src)));
13095 effect(KILL cr);
13096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13097
13098 ins_cost(150);
13099 format %{ "andl $dst, $src\t# int" %}
13100 ins_encode %{
13101 __ andl($dst$$Register, $src$$Address);
13102 %}
13103 ins_pipe(ialu_reg_mem);
13104 %}
13105
13106 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13107 %{
13108 predicate(UseAPX);
13109 match(Set dst (AndI src1 (LoadI src2)));
13110 effect(KILL cr);
13111 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13112
13113 ins_cost(150);
13114 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13115 ins_encode %{
13116 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13117 %}
13118 ins_pipe(ialu_reg_mem);
13119 %}
13120
13121 // And Memory with Register
13122 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13123 %{
13124 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13125 effect(KILL cr);
13126 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13127
13128 ins_cost(150);
13129 format %{ "andb $dst, $src\t# byte" %}
13130 ins_encode %{
13131 __ andb($dst$$Address, $src$$Register);
13132 %}
13133 ins_pipe(ialu_mem_reg);
13134 %}
13135
13136 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13137 %{
13138 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13139 effect(KILL cr);
13140 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13141
13142 ins_cost(150);
13143 format %{ "andl $dst, $src\t# int" %}
13144 ins_encode %{
13145 __ andl($dst$$Address, $src$$Register);
13146 %}
13147 ins_pipe(ialu_mem_reg);
13148 %}
13149
13150 // And Memory with Immediate
13151 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13152 %{
13153 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13154 effect(KILL cr);
13155 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13156
13157 ins_cost(125);
13158 format %{ "andl $dst, $src\t# int" %}
13159 ins_encode %{
13160 __ andl($dst$$Address, $src$$constant);
13161 %}
13162 ins_pipe(ialu_mem_imm);
13163 %}
13164
13165 // BMI1 instructions
13166 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13167 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13168 predicate(UseBMI1Instructions);
13169 effect(KILL cr);
13170 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13171
13172 ins_cost(125);
13173 format %{ "andnl $dst, $src1, $src2" %}
13174
13175 ins_encode %{
13176 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13177 %}
13178 ins_pipe(ialu_reg_mem);
13179 %}
13180
13181 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13182 match(Set dst (AndI (XorI src1 minus_1) src2));
13183 predicate(UseBMI1Instructions);
13184 effect(KILL cr);
13185 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13186
13187 format %{ "andnl $dst, $src1, $src2" %}
13188
13189 ins_encode %{
13190 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13191 %}
13192 ins_pipe(ialu_reg);
13193 %}
13194
13195 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13196 match(Set dst (AndI (SubI imm_zero src) src));
13197 predicate(UseBMI1Instructions);
13198 effect(KILL cr);
13199 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13200
13201 format %{ "blsil $dst, $src" %}
13202
13203 ins_encode %{
13204 __ blsil($dst$$Register, $src$$Register);
13205 %}
13206 ins_pipe(ialu_reg);
13207 %}
13208
13209 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13210 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13211 predicate(UseBMI1Instructions);
13212 effect(KILL cr);
13213 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13214
13215 ins_cost(125);
13216 format %{ "blsil $dst, $src" %}
13217
13218 ins_encode %{
13219 __ blsil($dst$$Register, $src$$Address);
13220 %}
13221 ins_pipe(ialu_reg_mem);
13222 %}
13223
13224 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13225 %{
13226 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13227 predicate(UseBMI1Instructions);
13228 effect(KILL cr);
13229 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13230
13231 ins_cost(125);
13232 format %{ "blsmskl $dst, $src" %}
13233
13234 ins_encode %{
13235 __ blsmskl($dst$$Register, $src$$Address);
13236 %}
13237 ins_pipe(ialu_reg_mem);
13238 %}
13239
13240 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13241 %{
13242 match(Set dst (XorI (AddI src minus_1) src));
13243 predicate(UseBMI1Instructions);
13244 effect(KILL cr);
13245 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13246
13247 format %{ "blsmskl $dst, $src" %}
13248
13249 ins_encode %{
13250 __ blsmskl($dst$$Register, $src$$Register);
13251 %}
13252
13253 ins_pipe(ialu_reg);
13254 %}
13255
13256 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13257 %{
13258 match(Set dst (AndI (AddI src minus_1) src) );
13259 predicate(UseBMI1Instructions);
13260 effect(KILL cr);
13261 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13262
13263 format %{ "blsrl $dst, $src" %}
13264
13265 ins_encode %{
13266 __ blsrl($dst$$Register, $src$$Register);
13267 %}
13268
13269 ins_pipe(ialu_reg_mem);
13270 %}
13271
13272 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13273 %{
13274 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13275 predicate(UseBMI1Instructions);
13276 effect(KILL cr);
13277 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13278
13279 ins_cost(125);
13280 format %{ "blsrl $dst, $src" %}
13281
13282 ins_encode %{
13283 __ blsrl($dst$$Register, $src$$Address);
13284 %}
13285
13286 ins_pipe(ialu_reg);
13287 %}
13288
13289 // Or Instructions
13290 // Or Register with Register
13291 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13292 %{
13293 predicate(!UseAPX);
13294 match(Set dst (OrI dst src));
13295 effect(KILL cr);
13296 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13297
13298 format %{ "orl $dst, $src\t# int" %}
13299 ins_encode %{
13300 __ orl($dst$$Register, $src$$Register);
13301 %}
13302 ins_pipe(ialu_reg_reg);
13303 %}
13304
13305 // Or Register with Register using New Data Destination (NDD)
13306 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13307 %{
13308 predicate(UseAPX);
13309 match(Set dst (OrI src1 src2));
13310 effect(KILL cr);
13311 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13312
13313 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13314 ins_encode %{
13315 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13316 %}
13317 ins_pipe(ialu_reg_reg);
13318 %}
13319
13320 // Or Register with Immediate
13321 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13322 %{
13323 predicate(!UseAPX);
13324 match(Set dst (OrI dst src));
13325 effect(KILL cr);
13326 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13327
13328 format %{ "orl $dst, $src\t# int" %}
13329 ins_encode %{
13330 __ orl($dst$$Register, $src$$constant);
13331 %}
13332 ins_pipe(ialu_reg);
13333 %}
13334
13335 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13336 %{
13337 predicate(UseAPX);
13338 match(Set dst (OrI src1 src2));
13339 effect(KILL cr);
13340 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13341
13342 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13343 ins_encode %{
13344 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13345 %}
13346 ins_pipe(ialu_reg);
13347 %}
13348
13349 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13350 %{
13351 predicate(UseAPX);
13352 match(Set dst (OrI src1 src2));
13353 effect(KILL cr);
13354 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13355
13356 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13357 ins_encode %{
13358 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13359 %}
13360 ins_pipe(ialu_reg);
13361 %}
13362
13363 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13364 %{
13365 predicate(UseAPX);
13366 match(Set dst (OrI (LoadI src1) src2));
13367 effect(KILL cr);
13368 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13369
13370 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13371 ins_encode %{
13372 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13373 %}
13374 ins_pipe(ialu_reg);
13375 %}
13376
13377 // Or Register with Memory
13378 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13379 %{
13380 predicate(!UseAPX);
13381 match(Set dst (OrI dst (LoadI src)));
13382 effect(KILL cr);
13383 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384
13385 ins_cost(150);
13386 format %{ "orl $dst, $src\t# int" %}
13387 ins_encode %{
13388 __ orl($dst$$Register, $src$$Address);
13389 %}
13390 ins_pipe(ialu_reg_mem);
13391 %}
13392
13393 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13394 %{
13395 predicate(UseAPX);
13396 match(Set dst (OrI src1 (LoadI src2)));
13397 effect(KILL cr);
13398 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13399
13400 ins_cost(150);
13401 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13402 ins_encode %{
13403 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13404 %}
13405 ins_pipe(ialu_reg_mem);
13406 %}
13407
13408 // Or Memory with Register
13409 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13410 %{
13411 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13412 effect(KILL cr);
13413 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414
13415 ins_cost(150);
13416 format %{ "orb $dst, $src\t# byte" %}
13417 ins_encode %{
13418 __ orb($dst$$Address, $src$$Register);
13419 %}
13420 ins_pipe(ialu_mem_reg);
13421 %}
13422
13423 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13424 %{
13425 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13426 effect(KILL cr);
13427 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13428
13429 ins_cost(150);
13430 format %{ "orl $dst, $src\t# int" %}
13431 ins_encode %{
13432 __ orl($dst$$Address, $src$$Register);
13433 %}
13434 ins_pipe(ialu_mem_reg);
13435 %}
13436
13437 // Or Memory with Immediate
13438 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13439 %{
13440 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13441 effect(KILL cr);
13442 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443
13444 ins_cost(125);
13445 format %{ "orl $dst, $src\t# int" %}
13446 ins_encode %{
13447 __ orl($dst$$Address, $src$$constant);
13448 %}
13449 ins_pipe(ialu_mem_imm);
13450 %}
13451
13452 // Xor Instructions
13453 // Xor Register with Register
13454 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13455 %{
13456 predicate(!UseAPX);
13457 match(Set dst (XorI dst src));
13458 effect(KILL cr);
13459 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13460
13461 format %{ "xorl $dst, $src\t# int" %}
13462 ins_encode %{
13463 __ xorl($dst$$Register, $src$$Register);
13464 %}
13465 ins_pipe(ialu_reg_reg);
13466 %}
13467
13468 // Xor Register with Register using New Data Destination (NDD)
13469 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13470 %{
13471 predicate(UseAPX);
13472 match(Set dst (XorI src1 src2));
13473 effect(KILL cr);
13474 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13475
13476 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13477 ins_encode %{
13478 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13479 %}
13480 ins_pipe(ialu_reg_reg);
13481 %}
13482
13483 // Xor Register with Immediate -1
13484 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13485 %{
13486 predicate(!UseAPX);
13487 match(Set dst (XorI dst imm));
13488
13489 format %{ "notl $dst" %}
13490 ins_encode %{
13491 __ notl($dst$$Register);
13492 %}
13493 ins_pipe(ialu_reg);
13494 %}
13495
13496 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13497 %{
13498 match(Set dst (XorI src imm));
13499 predicate(UseAPX);
13500 flag(PD::Flag_ndd_demotable_opr1);
13501
13502 format %{ "enotl $dst, $src" %}
13503 ins_encode %{
13504 __ enotl($dst$$Register, $src$$Register);
13505 %}
13506 ins_pipe(ialu_reg);
13507 %}
13508
13509 // Xor Register with Immediate
13510 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13511 %{
13512 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13513 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13514 match(Set dst (XorI dst src));
13515 effect(KILL cr);
13516 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13517
13518 format %{ "xorl $dst, $src\t# int" %}
13519 ins_encode %{
13520 __ xorl($dst$$Register, $src$$constant);
13521 %}
13522 ins_pipe(ialu_reg);
13523 %}
13524
13525 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13526 %{
13527 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13528 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13529 match(Set dst (XorI src1 src2));
13530 effect(KILL cr);
13531 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13532
13533 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13534 ins_encode %{
13535 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13536 %}
13537 ins_pipe(ialu_reg);
13538 %}
13539
13540 // Xor Memory with Immediate
13541 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13542 %{
13543 predicate(UseAPX);
13544 match(Set dst (XorI (LoadI src1) src2));
13545 effect(KILL cr);
13546 ins_cost(150);
13547 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13548
13549 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13550 ins_encode %{
13551 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13552 %}
13553 ins_pipe(ialu_reg);
13554 %}
13555
13556 // Xor Register with Memory
13557 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13558 %{
13559 predicate(!UseAPX);
13560 match(Set dst (XorI dst (LoadI src)));
13561 effect(KILL cr);
13562 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13563
13564 ins_cost(150);
13565 format %{ "xorl $dst, $src\t# int" %}
13566 ins_encode %{
13567 __ xorl($dst$$Register, $src$$Address);
13568 %}
13569 ins_pipe(ialu_reg_mem);
13570 %}
13571
13572 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13573 %{
13574 predicate(UseAPX);
13575 match(Set dst (XorI src1 (LoadI src2)));
13576 effect(KILL cr);
13577 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13578
13579 ins_cost(150);
13580 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13581 ins_encode %{
13582 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13583 %}
13584 ins_pipe(ialu_reg_mem);
13585 %}
13586
13587 // Xor Memory with Register
13588 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13589 %{
13590 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13591 effect(KILL cr);
13592 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593
13594 ins_cost(150);
13595 format %{ "xorb $dst, $src\t# byte" %}
13596 ins_encode %{
13597 __ xorb($dst$$Address, $src$$Register);
13598 %}
13599 ins_pipe(ialu_mem_reg);
13600 %}
13601
13602 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13603 %{
13604 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13605 effect(KILL cr);
13606 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13607
13608 ins_cost(150);
13609 format %{ "xorl $dst, $src\t# int" %}
13610 ins_encode %{
13611 __ xorl($dst$$Address, $src$$Register);
13612 %}
13613 ins_pipe(ialu_mem_reg);
13614 %}
13615
13616 // Xor Memory with Immediate
13617 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13618 %{
13619 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13620 effect(KILL cr);
13621 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13622
13623 ins_cost(125);
13624 format %{ "xorl $dst, $src\t# int" %}
13625 ins_encode %{
13626 __ xorl($dst$$Address, $src$$constant);
13627 %}
13628 ins_pipe(ialu_mem_imm);
13629 %}
13630
13631
13632 // Long Logical Instructions
13633
13634 // And Instructions
13635 // And Register with Register
13636 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13637 %{
13638 predicate(!UseAPX);
13639 match(Set dst (AndL dst src));
13640 effect(KILL cr);
13641 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13642
13643 format %{ "andq $dst, $src\t# long" %}
13644 ins_encode %{
13645 __ andq($dst$$Register, $src$$Register);
13646 %}
13647 ins_pipe(ialu_reg_reg);
13648 %}
13649
13650 // And Register with Register using New Data Destination (NDD)
13651 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13652 %{
13653 predicate(UseAPX);
13654 match(Set dst (AndL src1 src2));
13655 effect(KILL cr);
13656 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13657
13658 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13659 ins_encode %{
13660 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13661
13662 %}
13663 ins_pipe(ialu_reg_reg);
13664 %}
13665
13666 // And Register with Immediate 255
13667 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13668 %{
13669 match(Set dst (AndL src mask));
13670
13671 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13672 ins_encode %{
13673 // movzbl zeroes out the upper 32-bit and does not need REX.W
13674 __ movzbl($dst$$Register, $src$$Register);
13675 %}
13676 ins_pipe(ialu_reg);
13677 %}
13678
13679 // And Register with Immediate 65535
13680 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13681 %{
13682 match(Set dst (AndL src mask));
13683
13684 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13685 ins_encode %{
13686 // movzwl zeroes out the upper 32-bit and does not need REX.W
13687 __ movzwl($dst$$Register, $src$$Register);
13688 %}
13689 ins_pipe(ialu_reg);
13690 %}
13691
13692 // And Register with Immediate
13693 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13694 %{
13695 predicate(!UseAPX);
13696 match(Set dst (AndL dst src));
13697 effect(KILL cr);
13698 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13699
13700 format %{ "andq $dst, $src\t# long" %}
13701 ins_encode %{
13702 __ andq($dst$$Register, $src$$constant);
13703 %}
13704 ins_pipe(ialu_reg);
13705 %}
13706
13707 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13708 %{
13709 predicate(UseAPX);
13710 match(Set dst (AndL src1 src2));
13711 effect(KILL cr);
13712 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13713
13714 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13715 ins_encode %{
13716 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13717 %}
13718 ins_pipe(ialu_reg);
13719 %}
13720
13721 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13722 %{
13723 predicate(UseAPX);
13724 match(Set dst (AndL (LoadL src1) src2));
13725 effect(KILL cr);
13726 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13727
13728 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13729 ins_encode %{
13730 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13731 %}
13732 ins_pipe(ialu_reg);
13733 %}
13734
13735 // And Register with Memory
13736 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13737 %{
13738 predicate(!UseAPX);
13739 match(Set dst (AndL dst (LoadL src)));
13740 effect(KILL cr);
13741 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13742
13743 ins_cost(150);
13744 format %{ "andq $dst, $src\t# long" %}
13745 ins_encode %{
13746 __ andq($dst$$Register, $src$$Address);
13747 %}
13748 ins_pipe(ialu_reg_mem);
13749 %}
13750
13751 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13752 %{
13753 predicate(UseAPX);
13754 match(Set dst (AndL src1 (LoadL src2)));
13755 effect(KILL cr);
13756 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13757
13758 ins_cost(150);
13759 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13760 ins_encode %{
13761 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13762 %}
13763 ins_pipe(ialu_reg_mem);
13764 %}
13765
13766 // And Memory with Register
13767 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13768 %{
13769 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13770 effect(KILL cr);
13771 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13772
13773 ins_cost(150);
13774 format %{ "andq $dst, $src\t# long" %}
13775 ins_encode %{
13776 __ andq($dst$$Address, $src$$Register);
13777 %}
13778 ins_pipe(ialu_mem_reg);
13779 %}
13780
13781 // And Memory with Immediate
13782 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13783 %{
13784 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13785 effect(KILL cr);
13786 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13787
13788 ins_cost(125);
13789 format %{ "andq $dst, $src\t# long" %}
13790 ins_encode %{
13791 __ andq($dst$$Address, $src$$constant);
13792 %}
13793 ins_pipe(ialu_mem_imm);
13794 %}
13795
13796 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13797 %{
13798 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13799 // because AND/OR works well enough for 8/32-bit values.
13800 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13801
13802 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13803 effect(KILL cr);
13804
13805 ins_cost(125);
13806 format %{ "btrq $dst, log2(not($con))\t# long" %}
13807 ins_encode %{
13808 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13809 %}
13810 ins_pipe(ialu_mem_imm);
13811 %}
13812
13813 // BMI1 instructions
13814 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13815 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13816 predicate(UseBMI1Instructions);
13817 effect(KILL cr);
13818 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13819
13820 ins_cost(125);
13821 format %{ "andnq $dst, $src1, $src2" %}
13822
13823 ins_encode %{
13824 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13825 %}
13826 ins_pipe(ialu_reg_mem);
13827 %}
13828
13829 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13830 match(Set dst (AndL (XorL src1 minus_1) src2));
13831 predicate(UseBMI1Instructions);
13832 effect(KILL cr);
13833 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13834
13835 format %{ "andnq $dst, $src1, $src2" %}
13836
13837 ins_encode %{
13838 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13839 %}
13840 ins_pipe(ialu_reg_mem);
13841 %}
13842
13843 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13844 match(Set dst (AndL (SubL imm_zero src) src));
13845 predicate(UseBMI1Instructions);
13846 effect(KILL cr);
13847 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13848
13849 format %{ "blsiq $dst, $src" %}
13850
13851 ins_encode %{
13852 __ blsiq($dst$$Register, $src$$Register);
13853 %}
13854 ins_pipe(ialu_reg);
13855 %}
13856
13857 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13858 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13859 predicate(UseBMI1Instructions);
13860 effect(KILL cr);
13861 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13862
13863 ins_cost(125);
13864 format %{ "blsiq $dst, $src" %}
13865
13866 ins_encode %{
13867 __ blsiq($dst$$Register, $src$$Address);
13868 %}
13869 ins_pipe(ialu_reg_mem);
13870 %}
13871
13872 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13873 %{
13874 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13875 predicate(UseBMI1Instructions);
13876 effect(KILL cr);
13877 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13878
13879 ins_cost(125);
13880 format %{ "blsmskq $dst, $src" %}
13881
13882 ins_encode %{
13883 __ blsmskq($dst$$Register, $src$$Address);
13884 %}
13885 ins_pipe(ialu_reg_mem);
13886 %}
13887
13888 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13889 %{
13890 match(Set dst (XorL (AddL src minus_1) src));
13891 predicate(UseBMI1Instructions);
13892 effect(KILL cr);
13893 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13894
13895 format %{ "blsmskq $dst, $src" %}
13896
13897 ins_encode %{
13898 __ blsmskq($dst$$Register, $src$$Register);
13899 %}
13900
13901 ins_pipe(ialu_reg);
13902 %}
13903
13904 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13905 %{
13906 match(Set dst (AndL (AddL src minus_1) src) );
13907 predicate(UseBMI1Instructions);
13908 effect(KILL cr);
13909 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13910
13911 format %{ "blsrq $dst, $src" %}
13912
13913 ins_encode %{
13914 __ blsrq($dst$$Register, $src$$Register);
13915 %}
13916
13917 ins_pipe(ialu_reg);
13918 %}
13919
13920 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13921 %{
13922 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13923 predicate(UseBMI1Instructions);
13924 effect(KILL cr);
13925 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13926
13927 ins_cost(125);
13928 format %{ "blsrq $dst, $src" %}
13929
13930 ins_encode %{
13931 __ blsrq($dst$$Register, $src$$Address);
13932 %}
13933
13934 ins_pipe(ialu_reg);
13935 %}
13936
13937 // Or Instructions
13938 // Or Register with Register
13939 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13940 %{
13941 predicate(!UseAPX);
13942 match(Set dst (OrL dst src));
13943 effect(KILL cr);
13944 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13945
13946 format %{ "orq $dst, $src\t# long" %}
13947 ins_encode %{
13948 __ orq($dst$$Register, $src$$Register);
13949 %}
13950 ins_pipe(ialu_reg_reg);
13951 %}
13952
13953 // Or Register with Register using New Data Destination (NDD)
13954 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13955 %{
13956 predicate(UseAPX);
13957 match(Set dst (OrL src1 src2));
13958 effect(KILL cr);
13959 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13960
13961 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13962 ins_encode %{
13963 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13964
13965 %}
13966 ins_pipe(ialu_reg_reg);
13967 %}
13968
13969 // Use any_RegP to match R15 (TLS register) without spilling.
13970 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13971 match(Set dst (OrL dst (CastP2X src)));
13972 effect(KILL cr);
13973 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13974
13975 format %{ "orq $dst, $src\t# long" %}
13976 ins_encode %{
13977 __ orq($dst$$Register, $src$$Register);
13978 %}
13979 ins_pipe(ialu_reg_reg);
13980 %}
13981
13982 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13983 match(Set dst (OrL src1 (CastP2X src2)));
13984 effect(KILL cr);
13985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986
13987 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13988 ins_encode %{
13989 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13990 %}
13991 ins_pipe(ialu_reg_reg);
13992 %}
13993
13994 // Or Register with Immediate
13995 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13996 %{
13997 predicate(!UseAPX);
13998 match(Set dst (OrL dst src));
13999 effect(KILL cr);
14000 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14001
14002 format %{ "orq $dst, $src\t# long" %}
14003 ins_encode %{
14004 __ orq($dst$$Register, $src$$constant);
14005 %}
14006 ins_pipe(ialu_reg);
14007 %}
14008
14009 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14010 %{
14011 predicate(UseAPX);
14012 match(Set dst (OrL src1 src2));
14013 effect(KILL cr);
14014 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14015
14016 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14017 ins_encode %{
14018 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14019 %}
14020 ins_pipe(ialu_reg);
14021 %}
14022
14023 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14024 %{
14025 predicate(UseAPX);
14026 match(Set dst (OrL src1 src2));
14027 effect(KILL cr);
14028 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14029
14030 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14031 ins_encode %{
14032 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14033 %}
14034 ins_pipe(ialu_reg);
14035 %}
14036
14037 // Or Memory with Immediate
14038 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14039 %{
14040 predicate(UseAPX);
14041 match(Set dst (OrL (LoadL src1) src2));
14042 effect(KILL cr);
14043 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14044
14045 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14046 ins_encode %{
14047 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14048 %}
14049 ins_pipe(ialu_reg);
14050 %}
14051
14052 // Or Register with Memory
14053 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14054 %{
14055 predicate(!UseAPX);
14056 match(Set dst (OrL dst (LoadL src)));
14057 effect(KILL cr);
14058 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14059
14060 ins_cost(150);
14061 format %{ "orq $dst, $src\t# long" %}
14062 ins_encode %{
14063 __ orq($dst$$Register, $src$$Address);
14064 %}
14065 ins_pipe(ialu_reg_mem);
14066 %}
14067
14068 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14069 %{
14070 predicate(UseAPX);
14071 match(Set dst (OrL src1 (LoadL src2)));
14072 effect(KILL cr);
14073 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14074
14075 ins_cost(150);
14076 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14077 ins_encode %{
14078 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14079 %}
14080 ins_pipe(ialu_reg_mem);
14081 %}
14082
14083 // Or Memory with Register
14084 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14085 %{
14086 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14087 effect(KILL cr);
14088 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14089
14090 ins_cost(150);
14091 format %{ "orq $dst, $src\t# long" %}
14092 ins_encode %{
14093 __ orq($dst$$Address, $src$$Register);
14094 %}
14095 ins_pipe(ialu_mem_reg);
14096 %}
14097
14098 // Or Memory with Immediate
14099 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14100 %{
14101 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14102 effect(KILL cr);
14103 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14104
14105 ins_cost(125);
14106 format %{ "orq $dst, $src\t# long" %}
14107 ins_encode %{
14108 __ orq($dst$$Address, $src$$constant);
14109 %}
14110 ins_pipe(ialu_mem_imm);
14111 %}
14112
14113 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14114 %{
14115 // con should be a pure 64-bit power of 2 immediate
14116 // because AND/OR works well enough for 8/32-bit values.
14117 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14118
14119 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14120 effect(KILL cr);
14121
14122 ins_cost(125);
14123 format %{ "btsq $dst, log2($con)\t# long" %}
14124 ins_encode %{
14125 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14126 %}
14127 ins_pipe(ialu_mem_imm);
14128 %}
14129
14130 // Xor Instructions
14131 // Xor Register with Register
14132 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14133 %{
14134 predicate(!UseAPX);
14135 match(Set dst (XorL dst src));
14136 effect(KILL cr);
14137 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14138
14139 format %{ "xorq $dst, $src\t# long" %}
14140 ins_encode %{
14141 __ xorq($dst$$Register, $src$$Register);
14142 %}
14143 ins_pipe(ialu_reg_reg);
14144 %}
14145
14146 // Xor Register with Register using New Data Destination (NDD)
14147 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14148 %{
14149 predicate(UseAPX);
14150 match(Set dst (XorL src1 src2));
14151 effect(KILL cr);
14152 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14153
14154 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14155 ins_encode %{
14156 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14157 %}
14158 ins_pipe(ialu_reg_reg);
14159 %}
14160
14161 // Xor Register with Immediate -1
14162 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14163 %{
14164 predicate(!UseAPX);
14165 match(Set dst (XorL dst imm));
14166
14167 format %{ "notq $dst" %}
14168 ins_encode %{
14169 __ notq($dst$$Register);
14170 %}
14171 ins_pipe(ialu_reg);
14172 %}
14173
14174 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14175 %{
14176 predicate(UseAPX);
14177 match(Set dst (XorL src imm));
14178 flag(PD::Flag_ndd_demotable_opr1);
14179
14180 format %{ "enotq $dst, $src" %}
14181 ins_encode %{
14182 __ enotq($dst$$Register, $src$$Register);
14183 %}
14184 ins_pipe(ialu_reg);
14185 %}
14186
14187 // Xor Register with Immediate
14188 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14189 %{
14190 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14191 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14192 match(Set dst (XorL dst src));
14193 effect(KILL cr);
14194 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14195
14196 format %{ "xorq $dst, $src\t# long" %}
14197 ins_encode %{
14198 __ xorq($dst$$Register, $src$$constant);
14199 %}
14200 ins_pipe(ialu_reg);
14201 %}
14202
14203 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14204 %{
14205 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14206 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14207 match(Set dst (XorL src1 src2));
14208 effect(KILL cr);
14209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14210
14211 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14212 ins_encode %{
14213 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14214 %}
14215 ins_pipe(ialu_reg);
14216 %}
14217
14218 // Xor Memory with Immediate
14219 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14220 %{
14221 predicate(UseAPX);
14222 match(Set dst (XorL (LoadL src1) src2));
14223 effect(KILL cr);
14224 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225 ins_cost(150);
14226
14227 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14228 ins_encode %{
14229 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14230 %}
14231 ins_pipe(ialu_reg);
14232 %}
14233
14234 // Xor Register with Memory
14235 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14236 %{
14237 predicate(!UseAPX);
14238 match(Set dst (XorL dst (LoadL src)));
14239 effect(KILL cr);
14240 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14241
14242 ins_cost(150);
14243 format %{ "xorq $dst, $src\t# long" %}
14244 ins_encode %{
14245 __ xorq($dst$$Register, $src$$Address);
14246 %}
14247 ins_pipe(ialu_reg_mem);
14248 %}
14249
14250 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14251 %{
14252 predicate(UseAPX);
14253 match(Set dst (XorL src1 (LoadL src2)));
14254 effect(KILL cr);
14255 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14256
14257 ins_cost(150);
14258 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14259 ins_encode %{
14260 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14261 %}
14262 ins_pipe(ialu_reg_mem);
14263 %}
14264
14265 // Xor Memory with Register
14266 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14267 %{
14268 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14269 effect(KILL cr);
14270 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14271
14272 ins_cost(150);
14273 format %{ "xorq $dst, $src\t# long" %}
14274 ins_encode %{
14275 __ xorq($dst$$Address, $src$$Register);
14276 %}
14277 ins_pipe(ialu_mem_reg);
14278 %}
14279
14280 // Xor Memory with Immediate
14281 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14282 %{
14283 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14284 effect(KILL cr);
14285 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14286
14287 ins_cost(125);
14288 format %{ "xorq $dst, $src\t# long" %}
14289 ins_encode %{
14290 __ xorq($dst$$Address, $src$$constant);
14291 %}
14292 ins_pipe(ialu_mem_imm);
14293 %}
14294
14295 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14296 %{
14297 match(Set dst (CmpLTMask p q));
14298 effect(KILL cr);
14299
14300 ins_cost(400);
14301 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14302 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14303 "negl $dst" %}
14304 ins_encode %{
14305 __ cmpl($p$$Register, $q$$Register);
14306 __ setcc(Assembler::less, $dst$$Register);
14307 __ negl($dst$$Register);
14308 %}
14309 ins_pipe(pipe_slow);
14310 %}
14311
14312 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14313 %{
14314 match(Set dst (CmpLTMask dst zero));
14315 effect(KILL cr);
14316
14317 ins_cost(100);
14318 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14319 ins_encode %{
14320 __ sarl($dst$$Register, 31);
14321 %}
14322 ins_pipe(ialu_reg);
14323 %}
14324
14325 /* Better to save a register than avoid a branch */
14326 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14327 %{
14328 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14329 effect(KILL cr);
14330 ins_cost(300);
14331 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14332 "jge done\n\t"
14333 "addl $p,$y\n"
14334 "done: " %}
14335 ins_encode %{
14336 Register Rp = $p$$Register;
14337 Register Rq = $q$$Register;
14338 Register Ry = $y$$Register;
14339 Label done;
14340 __ subl(Rp, Rq);
14341 __ jccb(Assembler::greaterEqual, done);
14342 __ addl(Rp, Ry);
14343 __ bind(done);
14344 %}
14345 ins_pipe(pipe_cmplt);
14346 %}
14347
14348 /* Better to save a register than avoid a branch */
14349 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14350 %{
14351 match(Set y (AndI (CmpLTMask p q) y));
14352 effect(KILL cr);
14353
14354 ins_cost(300);
14355
14356 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14357 "jlt done\n\t"
14358 "xorl $y, $y\n"
14359 "done: " %}
14360 ins_encode %{
14361 Register Rp = $p$$Register;
14362 Register Rq = $q$$Register;
14363 Register Ry = $y$$Register;
14364 Label done;
14365 __ cmpl(Rp, Rq);
14366 __ jccb(Assembler::less, done);
14367 __ xorl(Ry, Ry);
14368 __ bind(done);
14369 %}
14370 ins_pipe(pipe_cmplt);
14371 %}
14372
14373
14374 //---------- FP Instructions------------------------------------------------
14375
14376 // Really expensive, avoid
14377 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14378 %{
14379 match(Set cr (CmpF src1 src2));
14380
14381 ins_cost(500);
14382 format %{ "ucomiss $src1, $src2\n\t"
14383 "jnp,s exit\n\t"
14384 "pushfq\t# saw NaN, set CF\n\t"
14385 "andq [rsp], #0xffffff2b\n\t"
14386 "popfq\n"
14387 "exit:" %}
14388 ins_encode %{
14389 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14390 emit_cmpfp_fixup(masm);
14391 %}
14392 ins_pipe(pipe_slow);
14393 %}
14394
14395 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14396 match(Set cr (CmpF src1 src2));
14397
14398 ins_cost(100);
14399 format %{ "ucomiss $src1, $src2" %}
14400 ins_encode %{
14401 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14402 %}
14403 ins_pipe(pipe_slow);
14404 %}
14405
14406 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14407 match(Set cr (CmpF src1 src2));
14408
14409 ins_cost(100);
14410 format %{ "evucomxss $src1, $src2" %}
14411 ins_encode %{
14412 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14413 %}
14414 ins_pipe(pipe_slow);
14415 %}
14416
14417 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14418 match(Set cr (CmpF src1 (LoadF src2)));
14419
14420 ins_cost(100);
14421 format %{ "ucomiss $src1, $src2" %}
14422 ins_encode %{
14423 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14424 %}
14425 ins_pipe(pipe_slow);
14426 %}
14427
14428 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14429 match(Set cr (CmpF src1 (LoadF src2)));
14430
14431 ins_cost(100);
14432 format %{ "evucomxss $src1, $src2" %}
14433 ins_encode %{
14434 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14435 %}
14436 ins_pipe(pipe_slow);
14437 %}
14438
14439 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14440 match(Set cr (CmpF src con));
14441
14442 ins_cost(100);
14443 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14444 ins_encode %{
14445 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14446 %}
14447 ins_pipe(pipe_slow);
14448 %}
14449
14450 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14451 match(Set cr (CmpF src con));
14452
14453 ins_cost(100);
14454 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14455 ins_encode %{
14456 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14457 %}
14458 ins_pipe(pipe_slow);
14459 %}
14460
14461 // Really expensive, avoid
14462 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14463 %{
14464 match(Set cr (CmpD src1 src2));
14465
14466 ins_cost(500);
14467 format %{ "ucomisd $src1, $src2\n\t"
14468 "jnp,s exit\n\t"
14469 "pushfq\t# saw NaN, set CF\n\t"
14470 "andq [rsp], #0xffffff2b\n\t"
14471 "popfq\n"
14472 "exit:" %}
14473 ins_encode %{
14474 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14475 emit_cmpfp_fixup(masm);
14476 %}
14477 ins_pipe(pipe_slow);
14478 %}
14479
14480 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14481 match(Set cr (CmpD src1 src2));
14482
14483 ins_cost(100);
14484 format %{ "ucomisd $src1, $src2 test" %}
14485 ins_encode %{
14486 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14487 %}
14488 ins_pipe(pipe_slow);
14489 %}
14490
14491 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14492 match(Set cr (CmpD src1 src2));
14493
14494 ins_cost(100);
14495 format %{ "evucomxsd $src1, $src2 test" %}
14496 ins_encode %{
14497 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14498 %}
14499 ins_pipe(pipe_slow);
14500 %}
14501
14502 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14503 match(Set cr (CmpD src1 (LoadD src2)));
14504
14505 ins_cost(100);
14506 format %{ "ucomisd $src1, $src2" %}
14507 ins_encode %{
14508 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14509 %}
14510 ins_pipe(pipe_slow);
14511 %}
14512
14513 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14514 match(Set cr (CmpD src1 (LoadD src2)));
14515
14516 ins_cost(100);
14517 format %{ "evucomxsd $src1, $src2" %}
14518 ins_encode %{
14519 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14520 %}
14521 ins_pipe(pipe_slow);
14522 %}
14523
14524 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14525 match(Set cr (CmpD src con));
14526 ins_cost(100);
14527 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14528 ins_encode %{
14529 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14530 %}
14531 ins_pipe(pipe_slow);
14532 %}
14533
14534 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14535 match(Set cr (CmpD src con));
14536
14537 ins_cost(100);
14538 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14539 ins_encode %{
14540 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14541 %}
14542 ins_pipe(pipe_slow);
14543 %}
14544
14545 // Compare into -1,0,1
14546 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14547 %{
14548 match(Set dst (CmpF3 src1 src2));
14549 effect(KILL cr);
14550
14551 ins_cost(275);
14552 format %{ "ucomiss $src1, $src2\n\t"
14553 "movl $dst, #-1\n\t"
14554 "jp,s done\n\t"
14555 "jb,s done\n\t"
14556 "setne $dst\n\t"
14557 "movzbl $dst, $dst\n"
14558 "done:" %}
14559 ins_encode %{
14560 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14561 emit_cmpfp3(masm, $dst$$Register);
14562 %}
14563 ins_pipe(pipe_slow);
14564 %}
14565
14566 // Compare into -1,0,1
14567 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14568 %{
14569 match(Set dst (CmpF3 src1 (LoadF src2)));
14570 effect(KILL cr);
14571
14572 ins_cost(275);
14573 format %{ "ucomiss $src1, $src2\n\t"
14574 "movl $dst, #-1\n\t"
14575 "jp,s done\n\t"
14576 "jb,s done\n\t"
14577 "setne $dst\n\t"
14578 "movzbl $dst, $dst\n"
14579 "done:" %}
14580 ins_encode %{
14581 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14582 emit_cmpfp3(masm, $dst$$Register);
14583 %}
14584 ins_pipe(pipe_slow);
14585 %}
14586
14587 // Compare into -1,0,1
14588 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14589 match(Set dst (CmpF3 src con));
14590 effect(KILL cr);
14591
14592 ins_cost(275);
14593 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14594 "movl $dst, #-1\n\t"
14595 "jp,s done\n\t"
14596 "jb,s done\n\t"
14597 "setne $dst\n\t"
14598 "movzbl $dst, $dst\n"
14599 "done:" %}
14600 ins_encode %{
14601 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14602 emit_cmpfp3(masm, $dst$$Register);
14603 %}
14604 ins_pipe(pipe_slow);
14605 %}
14606
14607 // Compare into -1,0,1
14608 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14609 %{
14610 match(Set dst (CmpD3 src1 src2));
14611 effect(KILL cr);
14612
14613 ins_cost(275);
14614 format %{ "ucomisd $src1, $src2\n\t"
14615 "movl $dst, #-1\n\t"
14616 "jp,s done\n\t"
14617 "jb,s done\n\t"
14618 "setne $dst\n\t"
14619 "movzbl $dst, $dst\n"
14620 "done:" %}
14621 ins_encode %{
14622 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14623 emit_cmpfp3(masm, $dst$$Register);
14624 %}
14625 ins_pipe(pipe_slow);
14626 %}
14627
14628 // Compare into -1,0,1
14629 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14630 %{
14631 match(Set dst (CmpD3 src1 (LoadD src2)));
14632 effect(KILL cr);
14633
14634 ins_cost(275);
14635 format %{ "ucomisd $src1, $src2\n\t"
14636 "movl $dst, #-1\n\t"
14637 "jp,s done\n\t"
14638 "jb,s done\n\t"
14639 "setne $dst\n\t"
14640 "movzbl $dst, $dst\n"
14641 "done:" %}
14642 ins_encode %{
14643 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14644 emit_cmpfp3(masm, $dst$$Register);
14645 %}
14646 ins_pipe(pipe_slow);
14647 %}
14648
14649 // Compare into -1,0,1
14650 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14651 match(Set dst (CmpD3 src con));
14652 effect(KILL cr);
14653
14654 ins_cost(275);
14655 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14656 "movl $dst, #-1\n\t"
14657 "jp,s done\n\t"
14658 "jb,s done\n\t"
14659 "setne $dst\n\t"
14660 "movzbl $dst, $dst\n"
14661 "done:" %}
14662 ins_encode %{
14663 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14664 emit_cmpfp3(masm, $dst$$Register);
14665 %}
14666 ins_pipe(pipe_slow);
14667 %}
14668
14669 //----------Arithmetic Conversion Instructions---------------------------------
14670
14671 instruct convF2D_reg_reg(regD dst, regF src)
14672 %{
14673 match(Set dst (ConvF2D src));
14674
14675 format %{ "cvtss2sd $dst, $src" %}
14676 ins_encode %{
14677 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14678 %}
14679 ins_pipe(pipe_slow); // XXX
14680 %}
14681
14682 instruct convF2D_reg_mem(regD dst, memory src)
14683 %{
14684 predicate(UseAVX == 0);
14685 match(Set dst (ConvF2D (LoadF src)));
14686
14687 format %{ "cvtss2sd $dst, $src" %}
14688 ins_encode %{
14689 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14690 %}
14691 ins_pipe(pipe_slow); // XXX
14692 %}
14693
14694 instruct convD2F_reg_reg(regF dst, regD src)
14695 %{
14696 match(Set dst (ConvD2F src));
14697
14698 format %{ "cvtsd2ss $dst, $src" %}
14699 ins_encode %{
14700 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14701 %}
14702 ins_pipe(pipe_slow); // XXX
14703 %}
14704
14705 instruct convD2F_reg_mem(regF dst, memory src)
14706 %{
14707 predicate(UseAVX == 0);
14708 match(Set dst (ConvD2F (LoadD src)));
14709
14710 format %{ "cvtsd2ss $dst, $src" %}
14711 ins_encode %{
14712 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14713 %}
14714 ins_pipe(pipe_slow); // XXX
14715 %}
14716
14717 // XXX do mem variants
14718 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14719 %{
14720 predicate(!VM_Version::supports_avx10_2());
14721 match(Set dst (ConvF2I src));
14722 effect(KILL cr);
14723 format %{ "convert_f2i $dst, $src" %}
14724 ins_encode %{
14725 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14726 %}
14727 ins_pipe(pipe_slow);
14728 %}
14729
14730 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14731 %{
14732 predicate(VM_Version::supports_avx10_2());
14733 match(Set dst (ConvF2I src));
14734 format %{ "evcvttss2sisl $dst, $src" %}
14735 ins_encode %{
14736 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14737 %}
14738 ins_pipe(pipe_slow);
14739 %}
14740
14741 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14742 %{
14743 predicate(VM_Version::supports_avx10_2());
14744 match(Set dst (ConvF2I (LoadF src)));
14745 format %{ "evcvttss2sisl $dst, $src" %}
14746 ins_encode %{
14747 __ evcvttss2sisl($dst$$Register, $src$$Address);
14748 %}
14749 ins_pipe(pipe_slow);
14750 %}
14751
14752 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14753 %{
14754 predicate(!VM_Version::supports_avx10_2());
14755 match(Set dst (ConvF2L src));
14756 effect(KILL cr);
14757 format %{ "convert_f2l $dst, $src"%}
14758 ins_encode %{
14759 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14760 %}
14761 ins_pipe(pipe_slow);
14762 %}
14763
14764 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14765 %{
14766 predicate(VM_Version::supports_avx10_2());
14767 match(Set dst (ConvF2L src));
14768 format %{ "evcvttss2sisq $dst, $src" %}
14769 ins_encode %{
14770 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14771 %}
14772 ins_pipe(pipe_slow);
14773 %}
14774
14775 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14776 %{
14777 predicate(VM_Version::supports_avx10_2());
14778 match(Set dst (ConvF2L (LoadF src)));
14779 format %{ "evcvttss2sisq $dst, $src" %}
14780 ins_encode %{
14781 __ evcvttss2sisq($dst$$Register, $src$$Address);
14782 %}
14783 ins_pipe(pipe_slow);
14784 %}
14785
14786 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14787 %{
14788 predicate(!VM_Version::supports_avx10_2());
14789 match(Set dst (ConvD2I src));
14790 effect(KILL cr);
14791 format %{ "convert_d2i $dst, $src"%}
14792 ins_encode %{
14793 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14794 %}
14795 ins_pipe(pipe_slow);
14796 %}
14797
14798 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14799 %{
14800 predicate(VM_Version::supports_avx10_2());
14801 match(Set dst (ConvD2I src));
14802 format %{ "evcvttsd2sisl $dst, $src" %}
14803 ins_encode %{
14804 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14805 %}
14806 ins_pipe(pipe_slow);
14807 %}
14808
14809 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14810 %{
14811 predicate(VM_Version::supports_avx10_2());
14812 match(Set dst (ConvD2I (LoadD src)));
14813 format %{ "evcvttsd2sisl $dst, $src" %}
14814 ins_encode %{
14815 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14816 %}
14817 ins_pipe(pipe_slow);
14818 %}
14819
14820 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14821 %{
14822 predicate(!VM_Version::supports_avx10_2());
14823 match(Set dst (ConvD2L src));
14824 effect(KILL cr);
14825 format %{ "convert_d2l $dst, $src"%}
14826 ins_encode %{
14827 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14828 %}
14829 ins_pipe(pipe_slow);
14830 %}
14831
14832 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14833 %{
14834 predicate(VM_Version::supports_avx10_2());
14835 match(Set dst (ConvD2L src));
14836 format %{ "evcvttsd2sisq $dst, $src" %}
14837 ins_encode %{
14838 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14839 %}
14840 ins_pipe(pipe_slow);
14841 %}
14842
14843 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14844 %{
14845 predicate(VM_Version::supports_avx10_2());
14846 match(Set dst (ConvD2L (LoadD src)));
14847 format %{ "evcvttsd2sisq $dst, $src" %}
14848 ins_encode %{
14849 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14850 %}
14851 ins_pipe(pipe_slow);
14852 %}
14853
14854 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14855 %{
14856 match(Set dst (RoundD src));
14857 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14858 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14859 ins_encode %{
14860 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14861 %}
14862 ins_pipe(pipe_slow);
14863 %}
14864
14865 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14866 %{
14867 match(Set dst (RoundF src));
14868 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14869 format %{ "round_float $dst,$src" %}
14870 ins_encode %{
14871 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14872 %}
14873 ins_pipe(pipe_slow);
14874 %}
14875
14876 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14877 %{
14878 predicate(!UseXmmI2F);
14879 match(Set dst (ConvI2F src));
14880
14881 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14882 ins_encode %{
14883 if (UseAVX > 0) {
14884 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14885 }
14886 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14887 %}
14888 ins_pipe(pipe_slow); // XXX
14889 %}
14890
14891 instruct convI2F_reg_mem(regF dst, memory src)
14892 %{
14893 predicate(UseAVX == 0);
14894 match(Set dst (ConvI2F (LoadI src)));
14895
14896 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14897 ins_encode %{
14898 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14899 %}
14900 ins_pipe(pipe_slow); // XXX
14901 %}
14902
14903 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14904 %{
14905 predicate(!UseXmmI2D);
14906 match(Set dst (ConvI2D src));
14907
14908 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14909 ins_encode %{
14910 if (UseAVX > 0) {
14911 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14912 }
14913 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14914 %}
14915 ins_pipe(pipe_slow); // XXX
14916 %}
14917
14918 instruct convI2D_reg_mem(regD dst, memory src)
14919 %{
14920 predicate(UseAVX == 0);
14921 match(Set dst (ConvI2D (LoadI src)));
14922
14923 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14924 ins_encode %{
14925 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14926 %}
14927 ins_pipe(pipe_slow); // XXX
14928 %}
14929
14930 instruct convXI2F_reg(regF dst, rRegI src)
14931 %{
14932 predicate(UseXmmI2F);
14933 match(Set dst (ConvI2F src));
14934
14935 format %{ "movdl $dst, $src\n\t"
14936 "cvtdq2psl $dst, $dst\t# i2f" %}
14937 ins_encode %{
14938 __ movdl($dst$$XMMRegister, $src$$Register);
14939 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14940 %}
14941 ins_pipe(pipe_slow); // XXX
14942 %}
14943
14944 instruct convXI2D_reg(regD dst, rRegI src)
14945 %{
14946 predicate(UseXmmI2D);
14947 match(Set dst (ConvI2D src));
14948
14949 format %{ "movdl $dst, $src\n\t"
14950 "cvtdq2pdl $dst, $dst\t# i2d" %}
14951 ins_encode %{
14952 __ movdl($dst$$XMMRegister, $src$$Register);
14953 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14954 %}
14955 ins_pipe(pipe_slow); // XXX
14956 %}
14957
14958 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14959 %{
14960 match(Set dst (ConvL2F src));
14961
14962 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14963 ins_encode %{
14964 if (UseAVX > 0) {
14965 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14966 }
14967 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14968 %}
14969 ins_pipe(pipe_slow); // XXX
14970 %}
14971
14972 instruct convL2F_reg_mem(regF dst, memory src)
14973 %{
14974 predicate(UseAVX == 0);
14975 match(Set dst (ConvL2F (LoadL src)));
14976
14977 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14978 ins_encode %{
14979 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14980 %}
14981 ins_pipe(pipe_slow); // XXX
14982 %}
14983
14984 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14985 %{
14986 match(Set dst (ConvL2D src));
14987
14988 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14989 ins_encode %{
14990 if (UseAVX > 0) {
14991 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14992 }
14993 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14994 %}
14995 ins_pipe(pipe_slow); // XXX
14996 %}
14997
14998 instruct convL2D_reg_mem(regD dst, memory src)
14999 %{
15000 predicate(UseAVX == 0);
15001 match(Set dst (ConvL2D (LoadL src)));
15002
15003 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15004 ins_encode %{
15005 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15006 %}
15007 ins_pipe(pipe_slow); // XXX
15008 %}
15009
15010 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15011 %{
15012 match(Set dst (ConvI2L src));
15013
15014 ins_cost(125);
15015 format %{ "movslq $dst, $src\t# i2l" %}
15016 ins_encode %{
15017 __ movslq($dst$$Register, $src$$Register);
15018 %}
15019 ins_pipe(ialu_reg_reg);
15020 %}
15021
15022 // Zero-extend convert int to long
15023 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15024 %{
15025 match(Set dst (AndL (ConvI2L src) mask));
15026
15027 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15028 ins_encode %{
15029 if ($dst$$reg != $src$$reg) {
15030 __ movl($dst$$Register, $src$$Register);
15031 }
15032 %}
15033 ins_pipe(ialu_reg_reg);
15034 %}
15035
15036 // Zero-extend convert int to long
15037 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15038 %{
15039 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15040
15041 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15042 ins_encode %{
15043 __ movl($dst$$Register, $src$$Address);
15044 %}
15045 ins_pipe(ialu_reg_mem);
15046 %}
15047
15048 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15049 %{
15050 match(Set dst (AndL src mask));
15051
15052 format %{ "movl $dst, $src\t# zero-extend long" %}
15053 ins_encode %{
15054 __ movl($dst$$Register, $src$$Register);
15055 %}
15056 ins_pipe(ialu_reg_reg);
15057 %}
15058
15059 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15060 %{
15061 match(Set dst (ConvL2I src));
15062
15063 format %{ "movl $dst, $src\t# l2i" %}
15064 ins_encode %{
15065 __ movl($dst$$Register, $src$$Register);
15066 %}
15067 ins_pipe(ialu_reg_reg);
15068 %}
15069
15070
15071 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15072 match(Set dst (MoveF2I src));
15073 effect(DEF dst, USE src);
15074
15075 ins_cost(125);
15076 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15077 ins_encode %{
15078 __ movl($dst$$Register, Address(rsp, $src$$disp));
15079 %}
15080 ins_pipe(ialu_reg_mem);
15081 %}
15082
15083 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15084 match(Set dst (MoveI2F src));
15085 effect(DEF dst, USE src);
15086
15087 ins_cost(125);
15088 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15089 ins_encode %{
15090 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15091 %}
15092 ins_pipe(pipe_slow);
15093 %}
15094
15095 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15096 match(Set dst (MoveD2L src));
15097 effect(DEF dst, USE src);
15098
15099 ins_cost(125);
15100 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15101 ins_encode %{
15102 __ movq($dst$$Register, Address(rsp, $src$$disp));
15103 %}
15104 ins_pipe(ialu_reg_mem);
15105 %}
15106
15107 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15108 predicate(!UseXmmLoadAndClearUpper);
15109 match(Set dst (MoveL2D src));
15110 effect(DEF dst, USE src);
15111
15112 ins_cost(125);
15113 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15114 ins_encode %{
15115 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15116 %}
15117 ins_pipe(pipe_slow);
15118 %}
15119
15120 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15121 predicate(UseXmmLoadAndClearUpper);
15122 match(Set dst (MoveL2D src));
15123 effect(DEF dst, USE src);
15124
15125 ins_cost(125);
15126 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15127 ins_encode %{
15128 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15129 %}
15130 ins_pipe(pipe_slow);
15131 %}
15132
15133
15134 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15135 match(Set dst (MoveF2I src));
15136 effect(DEF dst, USE src);
15137
15138 ins_cost(95); // XXX
15139 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15140 ins_encode %{
15141 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15142 %}
15143 ins_pipe(pipe_slow);
15144 %}
15145
15146 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15147 match(Set dst (MoveI2F src));
15148 effect(DEF dst, USE src);
15149
15150 ins_cost(100);
15151 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15152 ins_encode %{
15153 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15154 %}
15155 ins_pipe( ialu_mem_reg );
15156 %}
15157
15158 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15159 match(Set dst (MoveD2L src));
15160 effect(DEF dst, USE src);
15161
15162 ins_cost(95); // XXX
15163 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15164 ins_encode %{
15165 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15166 %}
15167 ins_pipe(pipe_slow);
15168 %}
15169
15170 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15171 match(Set dst (MoveL2D src));
15172 effect(DEF dst, USE src);
15173
15174 ins_cost(100);
15175 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15176 ins_encode %{
15177 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15178 %}
15179 ins_pipe(ialu_mem_reg);
15180 %}
15181
15182 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15183 match(Set dst (MoveF2I src));
15184 effect(DEF dst, USE src);
15185 ins_cost(85);
15186 format %{ "movd $dst,$src\t# MoveF2I" %}
15187 ins_encode %{
15188 __ movdl($dst$$Register, $src$$XMMRegister);
15189 %}
15190 ins_pipe( pipe_slow );
15191 %}
15192
15193 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15194 match(Set dst (MoveD2L src));
15195 effect(DEF dst, USE src);
15196 ins_cost(85);
15197 format %{ "movd $dst,$src\t# MoveD2L" %}
15198 ins_encode %{
15199 __ movdq($dst$$Register, $src$$XMMRegister);
15200 %}
15201 ins_pipe( pipe_slow );
15202 %}
15203
15204 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15205 match(Set dst (MoveI2F src));
15206 effect(DEF dst, USE src);
15207 ins_cost(100);
15208 format %{ "movd $dst,$src\t# MoveI2F" %}
15209 ins_encode %{
15210 __ movdl($dst$$XMMRegister, $src$$Register);
15211 %}
15212 ins_pipe( pipe_slow );
15213 %}
15214
15215 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15216 match(Set dst (MoveL2D src));
15217 effect(DEF dst, USE src);
15218 ins_cost(100);
15219 format %{ "movd $dst,$src\t# MoveL2D" %}
15220 ins_encode %{
15221 __ movdq($dst$$XMMRegister, $src$$Register);
15222 %}
15223 ins_pipe( pipe_slow );
15224 %}
15225
15226 // Fast clearing of an array
15227 // Small non-constant lenght ClearArray for non-AVX512 targets.
15228 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15229 Universe dummy, rFlagsReg cr)
15230 %{
15231 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15232 match(Set dummy (ClearArray cnt base));
15233 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15234
15235 format %{ $$template
15236 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15237 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15238 $$emit$$"jg LARGE\n\t"
15239 $$emit$$"dec rcx\n\t"
15240 $$emit$$"js DONE\t# Zero length\n\t"
15241 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15242 $$emit$$"dec rcx\n\t"
15243 $$emit$$"jge LOOP\n\t"
15244 $$emit$$"jmp DONE\n\t"
15245 $$emit$$"# LARGE:\n\t"
15246 if (UseFastStosb) {
15247 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15248 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15249 } else if (UseXMMForObjInit) {
15250 $$emit$$"mov rdi,rax\n\t"
15251 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15252 $$emit$$"jmpq L_zero_64_bytes\n\t"
15253 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15254 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15256 $$emit$$"add 0x40,rax\n\t"
15257 $$emit$$"# L_zero_64_bytes:\n\t"
15258 $$emit$$"sub 0x8,rcx\n\t"
15259 $$emit$$"jge L_loop\n\t"
15260 $$emit$$"add 0x4,rcx\n\t"
15261 $$emit$$"jl L_tail\n\t"
15262 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15263 $$emit$$"add 0x20,rax\n\t"
15264 $$emit$$"sub 0x4,rcx\n\t"
15265 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15266 $$emit$$"add 0x4,rcx\n\t"
15267 $$emit$$"jle L_end\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15270 $$emit$$"vmovq xmm0,(rax)\n\t"
15271 $$emit$$"add 0x8,rax\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge L_sloop\n\t"
15274 $$emit$$"# L_end:\n\t"
15275 } else {
15276 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15277 }
15278 $$emit$$"# DONE"
15279 %}
15280 ins_encode %{
15281 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15282 $tmp$$XMMRegister, false, knoreg);
15283 %}
15284 ins_pipe(pipe_slow);
15285 %}
15286
15287 // Small non-constant length ClearArray for AVX512 targets.
15288 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15289 Universe dummy, rFlagsReg cr)
15290 %{
15291 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15292 match(Set dummy (ClearArray cnt base));
15293 ins_cost(125);
15294 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15295
15296 format %{ $$template
15297 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15298 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15299 $$emit$$"jg LARGE\n\t"
15300 $$emit$$"dec rcx\n\t"
15301 $$emit$$"js DONE\t# Zero length\n\t"
15302 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"jge LOOP\n\t"
15305 $$emit$$"jmp DONE\n\t"
15306 $$emit$$"# LARGE:\n\t"
15307 if (UseFastStosb) {
15308 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15309 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15310 } else if (UseXMMForObjInit) {
15311 $$emit$$"mov rdi,rax\n\t"
15312 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15313 $$emit$$"jmpq L_zero_64_bytes\n\t"
15314 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15315 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15317 $$emit$$"add 0x40,rax\n\t"
15318 $$emit$$"# L_zero_64_bytes:\n\t"
15319 $$emit$$"sub 0x8,rcx\n\t"
15320 $$emit$$"jge L_loop\n\t"
15321 $$emit$$"add 0x4,rcx\n\t"
15322 $$emit$$"jl L_tail\n\t"
15323 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15324 $$emit$$"add 0x20,rax\n\t"
15325 $$emit$$"sub 0x4,rcx\n\t"
15326 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15327 $$emit$$"add 0x4,rcx\n\t"
15328 $$emit$$"jle L_end\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15331 $$emit$$"vmovq xmm0,(rax)\n\t"
15332 $$emit$$"add 0x8,rax\n\t"
15333 $$emit$$"dec rcx\n\t"
15334 $$emit$$"jge L_sloop\n\t"
15335 $$emit$$"# L_end:\n\t"
15336 } else {
15337 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15338 }
15339 $$emit$$"# DONE"
15340 %}
15341 ins_encode %{
15342 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15343 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15344 %}
15345 ins_pipe(pipe_slow);
15346 %}
15347
15348 // Large non-constant length ClearArray for non-AVX512 targets.
15349 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15350 Universe dummy, rFlagsReg cr)
15351 %{
15352 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15353 match(Set dummy (ClearArray cnt base));
15354 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15355
15356 format %{ $$template
15357 if (UseFastStosb) {
15358 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15359 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15360 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15361 } else if (UseXMMForObjInit) {
15362 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15363 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15364 $$emit$$"jmpq L_zero_64_bytes\n\t"
15365 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15366 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15368 $$emit$$"add 0x40,rax\n\t"
15369 $$emit$$"# L_zero_64_bytes:\n\t"
15370 $$emit$$"sub 0x8,rcx\n\t"
15371 $$emit$$"jge L_loop\n\t"
15372 $$emit$$"add 0x4,rcx\n\t"
15373 $$emit$$"jl L_tail\n\t"
15374 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15375 $$emit$$"add 0x20,rax\n\t"
15376 $$emit$$"sub 0x4,rcx\n\t"
15377 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15378 $$emit$$"add 0x4,rcx\n\t"
15379 $$emit$$"jle L_end\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15382 $$emit$$"vmovq xmm0,(rax)\n\t"
15383 $$emit$$"add 0x8,rax\n\t"
15384 $$emit$$"dec rcx\n\t"
15385 $$emit$$"jge L_sloop\n\t"
15386 $$emit$$"# L_end:\n\t"
15387 } else {
15388 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15389 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15390 }
15391 %}
15392 ins_encode %{
15393 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15394 $tmp$$XMMRegister, true, knoreg);
15395 %}
15396 ins_pipe(pipe_slow);
15397 %}
15398
15399 // Large non-constant length ClearArray for AVX512 targets.
15400 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15401 Universe dummy, rFlagsReg cr)
15402 %{
15403 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15404 match(Set dummy (ClearArray cnt base));
15405 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15406
15407 format %{ $$template
15408 if (UseFastStosb) {
15409 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15410 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15411 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15412 } else if (UseXMMForObjInit) {
15413 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15414 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15415 $$emit$$"jmpq L_zero_64_bytes\n\t"
15416 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15417 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15419 $$emit$$"add 0x40,rax\n\t"
15420 $$emit$$"# L_zero_64_bytes:\n\t"
15421 $$emit$$"sub 0x8,rcx\n\t"
15422 $$emit$$"jge L_loop\n\t"
15423 $$emit$$"add 0x4,rcx\n\t"
15424 $$emit$$"jl L_tail\n\t"
15425 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15426 $$emit$$"add 0x20,rax\n\t"
15427 $$emit$$"sub 0x4,rcx\n\t"
15428 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15429 $$emit$$"add 0x4,rcx\n\t"
15430 $$emit$$"jle L_end\n\t"
15431 $$emit$$"dec rcx\n\t"
15432 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15433 $$emit$$"vmovq xmm0,(rax)\n\t"
15434 $$emit$$"add 0x8,rax\n\t"
15435 $$emit$$"dec rcx\n\t"
15436 $$emit$$"jge L_sloop\n\t"
15437 $$emit$$"# L_end:\n\t"
15438 } else {
15439 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15440 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15441 }
15442 %}
15443 ins_encode %{
15444 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15445 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15446 %}
15447 ins_pipe(pipe_slow);
15448 %}
15449
15450 // Small constant length ClearArray for AVX512 targets.
15451 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15452 %{
15453 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15454 match(Set dummy (ClearArray cnt base));
15455 ins_cost(100);
15456 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15457 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15458 ins_encode %{
15459 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15460 %}
15461 ins_pipe(pipe_slow);
15462 %}
15463
15464 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15465 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15466 %{
15467 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15468 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15469 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15470
15471 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15472 ins_encode %{
15473 __ string_compare($str1$$Register, $str2$$Register,
15474 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15475 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15476 %}
15477 ins_pipe( pipe_slow );
15478 %}
15479
15480 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15481 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15482 %{
15483 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15484 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15485 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15486
15487 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15488 ins_encode %{
15489 __ string_compare($str1$$Register, $str2$$Register,
15490 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15491 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15492 %}
15493 ins_pipe( pipe_slow );
15494 %}
15495
15496 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15497 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15498 %{
15499 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15500 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15501 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15502
15503 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15504 ins_encode %{
15505 __ string_compare($str1$$Register, $str2$$Register,
15506 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15507 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15508 %}
15509 ins_pipe( pipe_slow );
15510 %}
15511
15512 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15513 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15514 %{
15515 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15516 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15517 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15518
15519 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15520 ins_encode %{
15521 __ string_compare($str1$$Register, $str2$$Register,
15522 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15523 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15524 %}
15525 ins_pipe( pipe_slow );
15526 %}
15527
15528 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15529 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15530 %{
15531 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15532 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15533 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15534
15535 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15536 ins_encode %{
15537 __ string_compare($str1$$Register, $str2$$Register,
15538 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15539 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15540 %}
15541 ins_pipe( pipe_slow );
15542 %}
15543
15544 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15545 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15546 %{
15547 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15548 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15549 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15550
15551 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15552 ins_encode %{
15553 __ string_compare($str1$$Register, $str2$$Register,
15554 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15555 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15556 %}
15557 ins_pipe( pipe_slow );
15558 %}
15559
15560 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15561 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15562 %{
15563 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15564 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15565 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15566
15567 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15568 ins_encode %{
15569 __ string_compare($str2$$Register, $str1$$Register,
15570 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15571 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15572 %}
15573 ins_pipe( pipe_slow );
15574 %}
15575
15576 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15577 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15578 %{
15579 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15580 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15581 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15582
15583 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15584 ins_encode %{
15585 __ string_compare($str2$$Register, $str1$$Register,
15586 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15587 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15588 %}
15589 ins_pipe( pipe_slow );
15590 %}
15591
15592 // fast search of substring with known size.
15593 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15594 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15595 %{
15596 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15597 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15598 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15599
15600 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15601 ins_encode %{
15602 int icnt2 = (int)$int_cnt2$$constant;
15603 if (icnt2 >= 16) {
15604 // IndexOf for constant substrings with size >= 16 elements
15605 // which don't need to be loaded through stack.
15606 __ string_indexofC8($str1$$Register, $str2$$Register,
15607 $cnt1$$Register, $cnt2$$Register,
15608 icnt2, $result$$Register,
15609 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15610 } else {
15611 // Small strings are loaded through stack if they cross page boundary.
15612 __ string_indexof($str1$$Register, $str2$$Register,
15613 $cnt1$$Register, $cnt2$$Register,
15614 icnt2, $result$$Register,
15615 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15616 }
15617 %}
15618 ins_pipe( pipe_slow );
15619 %}
15620
15621 // fast search of substring with known size.
15622 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15623 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15624 %{
15625 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15626 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15627 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15628
15629 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15630 ins_encode %{
15631 int icnt2 = (int)$int_cnt2$$constant;
15632 if (icnt2 >= 8) {
15633 // IndexOf for constant substrings with size >= 8 elements
15634 // which don't need to be loaded through stack.
15635 __ string_indexofC8($str1$$Register, $str2$$Register,
15636 $cnt1$$Register, $cnt2$$Register,
15637 icnt2, $result$$Register,
15638 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15639 } else {
15640 // Small strings are loaded through stack if they cross page boundary.
15641 __ string_indexof($str1$$Register, $str2$$Register,
15642 $cnt1$$Register, $cnt2$$Register,
15643 icnt2, $result$$Register,
15644 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15645 }
15646 %}
15647 ins_pipe( pipe_slow );
15648 %}
15649
15650 // fast search of substring with known size.
15651 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15652 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15653 %{
15654 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15655 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15656 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15657
15658 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15659 ins_encode %{
15660 int icnt2 = (int)$int_cnt2$$constant;
15661 if (icnt2 >= 8) {
15662 // IndexOf for constant substrings with size >= 8 elements
15663 // which don't need to be loaded through stack.
15664 __ string_indexofC8($str1$$Register, $str2$$Register,
15665 $cnt1$$Register, $cnt2$$Register,
15666 icnt2, $result$$Register,
15667 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15668 } else {
15669 // Small strings are loaded through stack if they cross page boundary.
15670 __ string_indexof($str1$$Register, $str2$$Register,
15671 $cnt1$$Register, $cnt2$$Register,
15672 icnt2, $result$$Register,
15673 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15674 }
15675 %}
15676 ins_pipe( pipe_slow );
15677 %}
15678
15679 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15680 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15681 %{
15682 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15683 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15684 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15685
15686 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15687 ins_encode %{
15688 __ string_indexof($str1$$Register, $str2$$Register,
15689 $cnt1$$Register, $cnt2$$Register,
15690 (-1), $result$$Register,
15691 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15692 %}
15693 ins_pipe( pipe_slow );
15694 %}
15695
15696 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15697 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15698 %{
15699 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15700 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15701 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15702
15703 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15704 ins_encode %{
15705 __ string_indexof($str1$$Register, $str2$$Register,
15706 $cnt1$$Register, $cnt2$$Register,
15707 (-1), $result$$Register,
15708 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15709 %}
15710 ins_pipe( pipe_slow );
15711 %}
15712
15713 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15714 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15715 %{
15716 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15717 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15718 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15719
15720 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15721 ins_encode %{
15722 __ string_indexof($str1$$Register, $str2$$Register,
15723 $cnt1$$Register, $cnt2$$Register,
15724 (-1), $result$$Register,
15725 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15726 %}
15727 ins_pipe( pipe_slow );
15728 %}
15729
15730 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15731 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15732 %{
15733 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15734 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15735 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15736 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15737 ins_encode %{
15738 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15739 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15740 %}
15741 ins_pipe( pipe_slow );
15742 %}
15743
15744 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15745 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15746 %{
15747 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15748 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15749 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15750 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15751 ins_encode %{
15752 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15753 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15754 %}
15755 ins_pipe( pipe_slow );
15756 %}
15757
15758 // fast string equals
15759 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15760 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15761 %{
15762 predicate(!VM_Version::supports_avx512vlbw());
15763 match(Set result (StrEquals (Binary str1 str2) cnt));
15764 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15765
15766 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15767 ins_encode %{
15768 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15769 $cnt$$Register, $result$$Register, $tmp3$$Register,
15770 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15771 %}
15772 ins_pipe( pipe_slow );
15773 %}
15774
15775 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15776 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15777 %{
15778 predicate(VM_Version::supports_avx512vlbw());
15779 match(Set result (StrEquals (Binary str1 str2) cnt));
15780 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15781
15782 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15783 ins_encode %{
15784 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15785 $cnt$$Register, $result$$Register, $tmp3$$Register,
15786 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15787 %}
15788 ins_pipe( pipe_slow );
15789 %}
15790
15791 // fast array equals
15792 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15793 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15794 %{
15795 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15796 match(Set result (AryEq ary1 ary2));
15797 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15798
15799 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15800 ins_encode %{
15801 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15802 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15803 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15804 %}
15805 ins_pipe( pipe_slow );
15806 %}
15807
15808 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15809 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15810 %{
15811 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15812 match(Set result (AryEq ary1 ary2));
15813 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15814
15815 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15816 ins_encode %{
15817 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15818 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15819 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15820 %}
15821 ins_pipe( pipe_slow );
15822 %}
15823
15824 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15825 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15826 %{
15827 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15828 match(Set result (AryEq ary1 ary2));
15829 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15830
15831 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15832 ins_encode %{
15833 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15834 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15835 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15836 %}
15837 ins_pipe( pipe_slow );
15838 %}
15839
15840 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15841 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15842 %{
15843 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15844 match(Set result (AryEq ary1 ary2));
15845 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15846
15847 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15848 ins_encode %{
15849 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15850 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15851 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15852 %}
15853 ins_pipe( pipe_slow );
15854 %}
15855
15856 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15857 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15858 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15859 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15860 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15861 %{
15862 predicate(UseAVX >= 2);
15863 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15864 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15865 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15866 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15867 USE basic_type, KILL cr);
15868
15869 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15870 ins_encode %{
15871 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15872 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15873 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15874 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15875 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15876 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15877 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15878 %}
15879 ins_pipe( pipe_slow );
15880 %}
15881
15882 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15883 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15884 %{
15885 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15886 match(Set result (CountPositives ary1 len));
15887 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15888
15889 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15890 ins_encode %{
15891 __ count_positives($ary1$$Register, $len$$Register,
15892 $result$$Register, $tmp3$$Register,
15893 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15894 %}
15895 ins_pipe( pipe_slow );
15896 %}
15897
15898 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15899 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15900 %{
15901 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15902 match(Set result (CountPositives ary1 len));
15903 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15904
15905 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15906 ins_encode %{
15907 __ count_positives($ary1$$Register, $len$$Register,
15908 $result$$Register, $tmp3$$Register,
15909 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15910 %}
15911 ins_pipe( pipe_slow );
15912 %}
15913
15914 // fast char[] to byte[] compression
15915 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15916 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15917 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15918 match(Set result (StrCompressedCopy src (Binary dst len)));
15919 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15920 USE_KILL len, KILL tmp5, KILL cr);
15921
15922 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15923 ins_encode %{
15924 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15925 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15926 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15927 knoreg, knoreg);
15928 %}
15929 ins_pipe( pipe_slow );
15930 %}
15931
15932 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15933 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15934 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15935 match(Set result (StrCompressedCopy src (Binary dst len)));
15936 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15937 USE_KILL len, KILL tmp5, KILL cr);
15938
15939 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15940 ins_encode %{
15941 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15942 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15943 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15944 $ktmp1$$KRegister, $ktmp2$$KRegister);
15945 %}
15946 ins_pipe( pipe_slow );
15947 %}
15948 // fast byte[] to char[] inflation
15949 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15950 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15951 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15952 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15953 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15954
15955 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15956 ins_encode %{
15957 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15958 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15959 %}
15960 ins_pipe( pipe_slow );
15961 %}
15962
15963 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15964 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15965 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15966 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15967 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15968
15969 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15970 ins_encode %{
15971 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15972 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15973 %}
15974 ins_pipe( pipe_slow );
15975 %}
15976
15977 // encode char[] to byte[] in ISO_8859_1
15978 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15979 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15980 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15981 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15982 match(Set result (EncodeISOArray src (Binary dst len)));
15983 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15984
15985 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15986 ins_encode %{
15987 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15988 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15989 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15990 %}
15991 ins_pipe( pipe_slow );
15992 %}
15993
15994 // encode char[] to byte[] in ASCII
15995 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15996 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15997 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15998 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15999 match(Set result (EncodeISOArray src (Binary dst len)));
16000 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16001
16002 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16003 ins_encode %{
16004 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16005 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16006 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16007 %}
16008 ins_pipe( pipe_slow );
16009 %}
16010
16011 //----------Overflow Math Instructions-----------------------------------------
16012
16013 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16014 %{
16015 match(Set cr (OverflowAddI op1 op2));
16016 effect(DEF cr, USE_KILL op1, USE op2);
16017
16018 format %{ "addl $op1, $op2\t# overflow check int" %}
16019
16020 ins_encode %{
16021 __ addl($op1$$Register, $op2$$Register);
16022 %}
16023 ins_pipe(ialu_reg_reg);
16024 %}
16025
16026 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16027 %{
16028 match(Set cr (OverflowAddI op1 op2));
16029 effect(DEF cr, USE_KILL op1, USE op2);
16030
16031 format %{ "addl $op1, $op2\t# overflow check int" %}
16032
16033 ins_encode %{
16034 __ addl($op1$$Register, $op2$$constant);
16035 %}
16036 ins_pipe(ialu_reg_reg);
16037 %}
16038
16039 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16040 %{
16041 match(Set cr (OverflowAddL op1 op2));
16042 effect(DEF cr, USE_KILL op1, USE op2);
16043
16044 format %{ "addq $op1, $op2\t# overflow check long" %}
16045 ins_encode %{
16046 __ addq($op1$$Register, $op2$$Register);
16047 %}
16048 ins_pipe(ialu_reg_reg);
16049 %}
16050
16051 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16052 %{
16053 match(Set cr (OverflowAddL op1 op2));
16054 effect(DEF cr, USE_KILL op1, USE op2);
16055
16056 format %{ "addq $op1, $op2\t# overflow check long" %}
16057 ins_encode %{
16058 __ addq($op1$$Register, $op2$$constant);
16059 %}
16060 ins_pipe(ialu_reg_reg);
16061 %}
16062
16063 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16064 %{
16065 match(Set cr (OverflowSubI op1 op2));
16066
16067 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16068 ins_encode %{
16069 __ cmpl($op1$$Register, $op2$$Register);
16070 %}
16071 ins_pipe(ialu_reg_reg);
16072 %}
16073
16074 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16075 %{
16076 match(Set cr (OverflowSubI op1 op2));
16077
16078 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16079 ins_encode %{
16080 __ cmpl($op1$$Register, $op2$$constant);
16081 %}
16082 ins_pipe(ialu_reg_reg);
16083 %}
16084
16085 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16086 %{
16087 match(Set cr (OverflowSubL op1 op2));
16088
16089 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16090 ins_encode %{
16091 __ cmpq($op1$$Register, $op2$$Register);
16092 %}
16093 ins_pipe(ialu_reg_reg);
16094 %}
16095
16096 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16097 %{
16098 match(Set cr (OverflowSubL op1 op2));
16099
16100 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16101 ins_encode %{
16102 __ cmpq($op1$$Register, $op2$$constant);
16103 %}
16104 ins_pipe(ialu_reg_reg);
16105 %}
16106
16107 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16108 %{
16109 match(Set cr (OverflowSubI zero op2));
16110 effect(DEF cr, USE_KILL op2);
16111
16112 format %{ "negl $op2\t# overflow check int" %}
16113 ins_encode %{
16114 __ negl($op2$$Register);
16115 %}
16116 ins_pipe(ialu_reg_reg);
16117 %}
16118
16119 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16120 %{
16121 match(Set cr (OverflowSubL zero op2));
16122 effect(DEF cr, USE_KILL op2);
16123
16124 format %{ "negq $op2\t# overflow check long" %}
16125 ins_encode %{
16126 __ negq($op2$$Register);
16127 %}
16128 ins_pipe(ialu_reg_reg);
16129 %}
16130
16131 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16132 %{
16133 match(Set cr (OverflowMulI op1 op2));
16134 effect(DEF cr, USE_KILL op1, USE op2);
16135
16136 format %{ "imull $op1, $op2\t# overflow check int" %}
16137 ins_encode %{
16138 __ imull($op1$$Register, $op2$$Register);
16139 %}
16140 ins_pipe(ialu_reg_reg_alu0);
16141 %}
16142
16143 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16144 %{
16145 match(Set cr (OverflowMulI op1 op2));
16146 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16147
16148 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16149 ins_encode %{
16150 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16151 %}
16152 ins_pipe(ialu_reg_reg_alu0);
16153 %}
16154
16155 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16156 %{
16157 match(Set cr (OverflowMulL op1 op2));
16158 effect(DEF cr, USE_KILL op1, USE op2);
16159
16160 format %{ "imulq $op1, $op2\t# overflow check long" %}
16161 ins_encode %{
16162 __ imulq($op1$$Register, $op2$$Register);
16163 %}
16164 ins_pipe(ialu_reg_reg_alu0);
16165 %}
16166
16167 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16168 %{
16169 match(Set cr (OverflowMulL op1 op2));
16170 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16171
16172 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16173 ins_encode %{
16174 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16175 %}
16176 ins_pipe(ialu_reg_reg_alu0);
16177 %}
16178
16179
16180 //----------Control Flow Instructions------------------------------------------
16181 // Signed compare Instructions
16182
16183 // XXX more variants!!
16184 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16185 %{
16186 match(Set cr (CmpI op1 op2));
16187 effect(DEF cr, USE op1, USE op2);
16188
16189 format %{ "cmpl $op1, $op2" %}
16190 ins_encode %{
16191 __ cmpl($op1$$Register, $op2$$Register);
16192 %}
16193 ins_pipe(ialu_cr_reg_reg);
16194 %}
16195
16196 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16197 %{
16198 match(Set cr (CmpI op1 op2));
16199
16200 format %{ "cmpl $op1, $op2" %}
16201 ins_encode %{
16202 __ cmpl($op1$$Register, $op2$$constant);
16203 %}
16204 ins_pipe(ialu_cr_reg_imm);
16205 %}
16206
16207 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16208 %{
16209 match(Set cr (CmpI op1 (LoadI op2)));
16210
16211 ins_cost(500); // XXX
16212 format %{ "cmpl $op1, $op2" %}
16213 ins_encode %{
16214 __ cmpl($op1$$Register, $op2$$Address);
16215 %}
16216 ins_pipe(ialu_cr_reg_mem);
16217 %}
16218
16219 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16220 %{
16221 match(Set cr (CmpI src zero));
16222
16223 format %{ "testl $src, $src" %}
16224 ins_encode %{
16225 __ testl($src$$Register, $src$$Register);
16226 %}
16227 ins_pipe(ialu_cr_reg_imm);
16228 %}
16229
16230 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16231 %{
16232 match(Set cr (CmpI (AndI src con) zero));
16233
16234 format %{ "testl $src, $con" %}
16235 ins_encode %{
16236 __ testl($src$$Register, $con$$constant);
16237 %}
16238 ins_pipe(ialu_cr_reg_imm);
16239 %}
16240
16241 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16242 %{
16243 match(Set cr (CmpI (AndI src1 src2) zero));
16244
16245 format %{ "testl $src1, $src2" %}
16246 ins_encode %{
16247 __ testl($src1$$Register, $src2$$Register);
16248 %}
16249 ins_pipe(ialu_cr_reg_imm);
16250 %}
16251
16252 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16253 %{
16254 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16255
16256 format %{ "testl $src, $mem" %}
16257 ins_encode %{
16258 __ testl($src$$Register, $mem$$Address);
16259 %}
16260 ins_pipe(ialu_cr_reg_mem);
16261 %}
16262
16263 // Unsigned compare Instructions; really, same as signed except they
16264 // produce an rFlagsRegU instead of rFlagsReg.
16265 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16266 %{
16267 match(Set cr (CmpU op1 op2));
16268
16269 format %{ "cmpl $op1, $op2\t# unsigned" %}
16270 ins_encode %{
16271 __ cmpl($op1$$Register, $op2$$Register);
16272 %}
16273 ins_pipe(ialu_cr_reg_reg);
16274 %}
16275
16276 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16277 %{
16278 match(Set cr (CmpU op1 op2));
16279
16280 format %{ "cmpl $op1, $op2\t# unsigned" %}
16281 ins_encode %{
16282 __ cmpl($op1$$Register, $op2$$constant);
16283 %}
16284 ins_pipe(ialu_cr_reg_imm);
16285 %}
16286
16287 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16288 %{
16289 match(Set cr (CmpU op1 (LoadI op2)));
16290
16291 ins_cost(500); // XXX
16292 format %{ "cmpl $op1, $op2\t# unsigned" %}
16293 ins_encode %{
16294 __ cmpl($op1$$Register, $op2$$Address);
16295 %}
16296 ins_pipe(ialu_cr_reg_mem);
16297 %}
16298
16299 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16300 %{
16301 match(Set cr (CmpU src zero));
16302
16303 format %{ "testl $src, $src\t# unsigned" %}
16304 ins_encode %{
16305 __ testl($src$$Register, $src$$Register);
16306 %}
16307 ins_pipe(ialu_cr_reg_imm);
16308 %}
16309
16310 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16311 %{
16312 match(Set cr (CmpP op1 op2));
16313
16314 format %{ "cmpq $op1, $op2\t# ptr" %}
16315 ins_encode %{
16316 __ cmpq($op1$$Register, $op2$$Register);
16317 %}
16318 ins_pipe(ialu_cr_reg_reg);
16319 %}
16320
16321 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16322 %{
16323 match(Set cr (CmpP op1 (LoadP op2)));
16324 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16325
16326 ins_cost(500); // XXX
16327 format %{ "cmpq $op1, $op2\t# ptr" %}
16328 ins_encode %{
16329 __ cmpq($op1$$Register, $op2$$Address);
16330 %}
16331 ins_pipe(ialu_cr_reg_mem);
16332 %}
16333
16334 // XXX this is generalized by compP_rReg_mem???
16335 // Compare raw pointer (used in out-of-heap check).
16336 // Only works because non-oop pointers must be raw pointers
16337 // and raw pointers have no anti-dependencies.
16338 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16339 %{
16340 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16341 n->in(2)->as_Load()->barrier_data() == 0);
16342 match(Set cr (CmpP op1 (LoadP op2)));
16343
16344 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16345 ins_encode %{
16346 __ cmpq($op1$$Register, $op2$$Address);
16347 %}
16348 ins_pipe(ialu_cr_reg_mem);
16349 %}
16350
16351 // This will generate a signed flags result. This should be OK since
16352 // any compare to a zero should be eq/neq.
16353 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16354 %{
16355 match(Set cr (CmpP src zero));
16356
16357 format %{ "testq $src, $src\t# ptr" %}
16358 ins_encode %{
16359 __ testq($src$$Register, $src$$Register);
16360 %}
16361 ins_pipe(ialu_cr_reg_imm);
16362 %}
16363
16364 // This will generate a signed flags result. This should be OK since
16365 // any compare to a zero should be eq/neq.
16366 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16367 %{
16368 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16369 n->in(1)->as_Load()->barrier_data() == 0);
16370 match(Set cr (CmpP (LoadP op) zero));
16371
16372 ins_cost(500); // XXX
16373 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16374 ins_encode %{
16375 __ testq($op$$Address, 0xFFFFFFFF);
16376 %}
16377 ins_pipe(ialu_cr_reg_imm);
16378 %}
16379
16380 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16381 %{
16382 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16383 n->in(1)->as_Load()->barrier_data() == 0);
16384 match(Set cr (CmpP (LoadP mem) zero));
16385
16386 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16387 ins_encode %{
16388 __ cmpq(r12, $mem$$Address);
16389 %}
16390 ins_pipe(ialu_cr_reg_mem);
16391 %}
16392
16393 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16394 %{
16395 match(Set cr (CmpN op1 op2));
16396
16397 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16398 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16399 ins_pipe(ialu_cr_reg_reg);
16400 %}
16401
16402 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16403 %{
16404 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16405 match(Set cr (CmpN src (LoadN mem)));
16406
16407 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16408 ins_encode %{
16409 __ cmpl($src$$Register, $mem$$Address);
16410 %}
16411 ins_pipe(ialu_cr_reg_mem);
16412 %}
16413
16414 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16415 match(Set cr (CmpN op1 op2));
16416
16417 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16418 ins_encode %{
16419 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16420 %}
16421 ins_pipe(ialu_cr_reg_imm);
16422 %}
16423
16424 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16425 %{
16426 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16427 match(Set cr (CmpN src (LoadN mem)));
16428
16429 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16430 ins_encode %{
16431 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16432 %}
16433 ins_pipe(ialu_cr_reg_mem);
16434 %}
16435
16436 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16437 match(Set cr (CmpN op1 op2));
16438
16439 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16440 ins_encode %{
16441 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16442 %}
16443 ins_pipe(ialu_cr_reg_imm);
16444 %}
16445
16446 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16447 %{
16448 predicate(!UseCompactObjectHeaders);
16449 match(Set cr (CmpN src (LoadNKlass mem)));
16450
16451 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16452 ins_encode %{
16453 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16454 %}
16455 ins_pipe(ialu_cr_reg_mem);
16456 %}
16457
16458 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16459 match(Set cr (CmpN src zero));
16460
16461 format %{ "testl $src, $src\t# compressed ptr" %}
16462 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16463 ins_pipe(ialu_cr_reg_imm);
16464 %}
16465
16466 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16467 %{
16468 predicate(CompressedOops::base() != nullptr &&
16469 n->in(1)->as_Load()->barrier_data() == 0);
16470 match(Set cr (CmpN (LoadN mem) zero));
16471
16472 ins_cost(500); // XXX
16473 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16474 ins_encode %{
16475 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16476 %}
16477 ins_pipe(ialu_cr_reg_mem);
16478 %}
16479
16480 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16481 %{
16482 predicate(CompressedOops::base() == nullptr &&
16483 n->in(1)->as_Load()->barrier_data() == 0);
16484 match(Set cr (CmpN (LoadN mem) zero));
16485
16486 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16487 ins_encode %{
16488 __ cmpl(r12, $mem$$Address);
16489 %}
16490 ins_pipe(ialu_cr_reg_mem);
16491 %}
16492
16493 // Yanked all unsigned pointer compare operations.
16494 // Pointer compares are done with CmpP which is already unsigned.
16495
16496 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16497 %{
16498 match(Set cr (CmpL op1 op2));
16499
16500 format %{ "cmpq $op1, $op2" %}
16501 ins_encode %{
16502 __ cmpq($op1$$Register, $op2$$Register);
16503 %}
16504 ins_pipe(ialu_cr_reg_reg);
16505 %}
16506
16507 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16508 %{
16509 match(Set cr (CmpL op1 op2));
16510
16511 format %{ "cmpq $op1, $op2" %}
16512 ins_encode %{
16513 __ cmpq($op1$$Register, $op2$$constant);
16514 %}
16515 ins_pipe(ialu_cr_reg_imm);
16516 %}
16517
16518 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16519 %{
16520 match(Set cr (CmpL op1 (LoadL op2)));
16521
16522 format %{ "cmpq $op1, $op2" %}
16523 ins_encode %{
16524 __ cmpq($op1$$Register, $op2$$Address);
16525 %}
16526 ins_pipe(ialu_cr_reg_mem);
16527 %}
16528
16529 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16530 %{
16531 match(Set cr (CmpL src zero));
16532
16533 format %{ "testq $src, $src" %}
16534 ins_encode %{
16535 __ testq($src$$Register, $src$$Register);
16536 %}
16537 ins_pipe(ialu_cr_reg_imm);
16538 %}
16539
16540 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16541 %{
16542 match(Set cr (CmpL (AndL src con) zero));
16543
16544 format %{ "testq $src, $con\t# long" %}
16545 ins_encode %{
16546 __ testq($src$$Register, $con$$constant);
16547 %}
16548 ins_pipe(ialu_cr_reg_imm);
16549 %}
16550
16551 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16552 %{
16553 match(Set cr (CmpL (AndL src1 src2) zero));
16554
16555 format %{ "testq $src1, $src2\t# long" %}
16556 ins_encode %{
16557 __ testq($src1$$Register, $src2$$Register);
16558 %}
16559 ins_pipe(ialu_cr_reg_imm);
16560 %}
16561
16562 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16563 %{
16564 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16565
16566 format %{ "testq $src, $mem" %}
16567 ins_encode %{
16568 __ testq($src$$Register, $mem$$Address);
16569 %}
16570 ins_pipe(ialu_cr_reg_mem);
16571 %}
16572
16573 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16574 %{
16575 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16576
16577 format %{ "testq $src, $mem" %}
16578 ins_encode %{
16579 __ testq($src$$Register, $mem$$Address);
16580 %}
16581 ins_pipe(ialu_cr_reg_mem);
16582 %}
16583
16584 // Manifest a CmpU result in an integer register. Very painful.
16585 // This is the test to avoid.
16586 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16587 %{
16588 match(Set dst (CmpU3 src1 src2));
16589 effect(KILL flags);
16590
16591 ins_cost(275); // XXX
16592 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16593 "movl $dst, -1\n\t"
16594 "jb,u done\n\t"
16595 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16596 "done:" %}
16597 ins_encode %{
16598 Label done;
16599 __ cmpl($src1$$Register, $src2$$Register);
16600 __ movl($dst$$Register, -1);
16601 __ jccb(Assembler::below, done);
16602 __ setcc(Assembler::notZero, $dst$$Register);
16603 __ bind(done);
16604 %}
16605 ins_pipe(pipe_slow);
16606 %}
16607
16608 // Manifest a CmpL result in an integer register. Very painful.
16609 // This is the test to avoid.
16610 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16611 %{
16612 match(Set dst (CmpL3 src1 src2));
16613 effect(KILL flags);
16614
16615 ins_cost(275); // XXX
16616 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16617 "movl $dst, -1\n\t"
16618 "jl,s done\n\t"
16619 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16620 "done:" %}
16621 ins_encode %{
16622 Label done;
16623 __ cmpq($src1$$Register, $src2$$Register);
16624 __ movl($dst$$Register, -1);
16625 __ jccb(Assembler::less, done);
16626 __ setcc(Assembler::notZero, $dst$$Register);
16627 __ bind(done);
16628 %}
16629 ins_pipe(pipe_slow);
16630 %}
16631
16632 // Manifest a CmpUL result in an integer register. Very painful.
16633 // This is the test to avoid.
16634 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16635 %{
16636 match(Set dst (CmpUL3 src1 src2));
16637 effect(KILL flags);
16638
16639 ins_cost(275); // XXX
16640 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16641 "movl $dst, -1\n\t"
16642 "jb,u done\n\t"
16643 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16644 "done:" %}
16645 ins_encode %{
16646 Label done;
16647 __ cmpq($src1$$Register, $src2$$Register);
16648 __ movl($dst$$Register, -1);
16649 __ jccb(Assembler::below, done);
16650 __ setcc(Assembler::notZero, $dst$$Register);
16651 __ bind(done);
16652 %}
16653 ins_pipe(pipe_slow);
16654 %}
16655
16656 // Unsigned long compare Instructions; really, same as signed long except they
16657 // produce an rFlagsRegU instead of rFlagsReg.
16658 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16659 %{
16660 match(Set cr (CmpUL op1 op2));
16661
16662 format %{ "cmpq $op1, $op2\t# unsigned" %}
16663 ins_encode %{
16664 __ cmpq($op1$$Register, $op2$$Register);
16665 %}
16666 ins_pipe(ialu_cr_reg_reg);
16667 %}
16668
16669 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16670 %{
16671 match(Set cr (CmpUL op1 op2));
16672
16673 format %{ "cmpq $op1, $op2\t# unsigned" %}
16674 ins_encode %{
16675 __ cmpq($op1$$Register, $op2$$constant);
16676 %}
16677 ins_pipe(ialu_cr_reg_imm);
16678 %}
16679
16680 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16681 %{
16682 match(Set cr (CmpUL op1 (LoadL op2)));
16683
16684 format %{ "cmpq $op1, $op2\t# unsigned" %}
16685 ins_encode %{
16686 __ cmpq($op1$$Register, $op2$$Address);
16687 %}
16688 ins_pipe(ialu_cr_reg_mem);
16689 %}
16690
16691 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16692 %{
16693 match(Set cr (CmpUL src zero));
16694
16695 format %{ "testq $src, $src\t# unsigned" %}
16696 ins_encode %{
16697 __ testq($src$$Register, $src$$Register);
16698 %}
16699 ins_pipe(ialu_cr_reg_imm);
16700 %}
16701
16702 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16703 %{
16704 match(Set cr (CmpI (LoadB mem) imm));
16705
16706 ins_cost(125);
16707 format %{ "cmpb $mem, $imm" %}
16708 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16709 ins_pipe(ialu_cr_reg_mem);
16710 %}
16711
16712 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16713 %{
16714 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16715
16716 ins_cost(125);
16717 format %{ "testb $mem, $imm\t# ubyte" %}
16718 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16719 ins_pipe(ialu_cr_reg_mem);
16720 %}
16721
16722 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16723 %{
16724 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16725
16726 ins_cost(125);
16727 format %{ "testb $mem, $imm\t# byte" %}
16728 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16729 ins_pipe(ialu_cr_reg_mem);
16730 %}
16731
16732 //----------Max and Min--------------------------------------------------------
16733 // Min Instructions
16734
16735 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16736 %{
16737 predicate(!UseAPX);
16738 effect(USE_DEF dst, USE src, USE cr);
16739
16740 format %{ "cmovlgt $dst, $src\t# min" %}
16741 ins_encode %{
16742 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16743 %}
16744 ins_pipe(pipe_cmov_reg);
16745 %}
16746
16747 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16748 %{
16749 predicate(UseAPX);
16750 effect(DEF dst, USE src1, USE src2, USE cr);
16751
16752 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16753 ins_encode %{
16754 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16755 %}
16756 ins_pipe(pipe_cmov_reg);
16757 %}
16758
16759 instruct minI_rReg(rRegI dst, rRegI src)
16760 %{
16761 predicate(!UseAPX);
16762 match(Set dst (MinI dst src));
16763
16764 ins_cost(200);
16765 expand %{
16766 rFlagsReg cr;
16767 compI_rReg(cr, dst, src);
16768 cmovI_reg_g(dst, src, cr);
16769 %}
16770 %}
16771
16772 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16773 %{
16774 predicate(UseAPX);
16775 match(Set dst (MinI src1 src2));
16776 effect(DEF dst, USE src1, USE src2);
16777 flag(PD::Flag_ndd_demotable_opr1);
16778
16779 ins_cost(200);
16780 expand %{
16781 rFlagsReg cr;
16782 compI_rReg(cr, src1, src2);
16783 cmovI_reg_g_ndd(dst, src1, src2, cr);
16784 %}
16785 %}
16786
16787 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16788 %{
16789 predicate(!UseAPX);
16790 effect(USE_DEF dst, USE src, USE cr);
16791
16792 format %{ "cmovllt $dst, $src\t# max" %}
16793 ins_encode %{
16794 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16795 %}
16796 ins_pipe(pipe_cmov_reg);
16797 %}
16798
16799 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16800 %{
16801 predicate(UseAPX);
16802 effect(DEF dst, USE src1, USE src2, USE cr);
16803
16804 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16805 ins_encode %{
16806 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16807 %}
16808 ins_pipe(pipe_cmov_reg);
16809 %}
16810
16811 instruct maxI_rReg(rRegI dst, rRegI src)
16812 %{
16813 predicate(!UseAPX);
16814 match(Set dst (MaxI dst src));
16815
16816 ins_cost(200);
16817 expand %{
16818 rFlagsReg cr;
16819 compI_rReg(cr, dst, src);
16820 cmovI_reg_l(dst, src, cr);
16821 %}
16822 %}
16823
16824 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16825 %{
16826 predicate(UseAPX);
16827 match(Set dst (MaxI src1 src2));
16828 effect(DEF dst, USE src1, USE src2);
16829 flag(PD::Flag_ndd_demotable_opr1);
16830
16831 ins_cost(200);
16832 expand %{
16833 rFlagsReg cr;
16834 compI_rReg(cr, src1, src2);
16835 cmovI_reg_l_ndd(dst, src1, src2, cr);
16836 %}
16837 %}
16838
16839 // ============================================================================
16840 // Branch Instructions
16841
16842 // Jump Direct - Label defines a relative address from JMP+1
16843 instruct jmpDir(label labl)
16844 %{
16845 match(Goto);
16846 effect(USE labl);
16847
16848 ins_cost(300);
16849 format %{ "jmp $labl" %}
16850 size(5);
16851 ins_encode %{
16852 Label* L = $labl$$label;
16853 __ jmp(*L, false); // Always long jump
16854 %}
16855 ins_pipe(pipe_jmp);
16856 %}
16857
16858 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16859 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16860 %{
16861 match(If cop cr);
16862 effect(USE labl);
16863
16864 ins_cost(300);
16865 format %{ "j$cop $labl" %}
16866 size(6);
16867 ins_encode %{
16868 Label* L = $labl$$label;
16869 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16870 %}
16871 ins_pipe(pipe_jcc);
16872 %}
16873
16874 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16875 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16876 %{
16877 match(CountedLoopEnd cop cr);
16878 effect(USE labl);
16879
16880 ins_cost(300);
16881 format %{ "j$cop $labl\t# loop end" %}
16882 size(6);
16883 ins_encode %{
16884 Label* L = $labl$$label;
16885 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16886 %}
16887 ins_pipe(pipe_jcc);
16888 %}
16889
16890 // Jump Direct Conditional - using unsigned comparison
16891 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16892 match(If cop cmp);
16893 effect(USE labl);
16894
16895 ins_cost(300);
16896 format %{ "j$cop,u $labl" %}
16897 size(6);
16898 ins_encode %{
16899 Label* L = $labl$$label;
16900 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16901 %}
16902 ins_pipe(pipe_jcc);
16903 %}
16904
16905 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16906 match(If cop cmp);
16907 effect(USE labl);
16908
16909 ins_cost(200);
16910 format %{ "j$cop,u $labl" %}
16911 size(6);
16912 ins_encode %{
16913 Label* L = $labl$$label;
16914 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16915 %}
16916 ins_pipe(pipe_jcc);
16917 %}
16918
16919 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16920 match(If cop cmp);
16921 effect(USE labl);
16922
16923 ins_cost(200);
16924 format %{ $$template
16925 if ($cop$$cmpcode == Assembler::notEqual) {
16926 $$emit$$"jp,u $labl\n\t"
16927 $$emit$$"j$cop,u $labl"
16928 } else {
16929 $$emit$$"jp,u done\n\t"
16930 $$emit$$"j$cop,u $labl\n\t"
16931 $$emit$$"done:"
16932 }
16933 %}
16934 ins_encode %{
16935 Label* l = $labl$$label;
16936 if ($cop$$cmpcode == Assembler::notEqual) {
16937 __ jcc(Assembler::parity, *l, false);
16938 __ jcc(Assembler::notEqual, *l, false);
16939 } else if ($cop$$cmpcode == Assembler::equal) {
16940 Label done;
16941 __ jccb(Assembler::parity, done);
16942 __ jcc(Assembler::equal, *l, false);
16943 __ bind(done);
16944 } else {
16945 ShouldNotReachHere();
16946 }
16947 %}
16948 ins_pipe(pipe_jcc);
16949 %}
16950
16951 // Jump Direct Conditional - using signed and unsigned comparison
16952 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16953 match(If cop cmp);
16954 effect(USE labl);
16955
16956 ins_cost(200);
16957 format %{ "j$cop,su $labl" %}
16958 size(6);
16959 ins_encode %{
16960 Label* L = $labl$$label;
16961 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16962 %}
16963 ins_pipe(pipe_jcc);
16964 %}
16965
16966 // ============================================================================
16967 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16968 // superklass array for an instance of the superklass. Set a hidden
16969 // internal cache on a hit (cache is checked with exposed code in
16970 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16971 // encoding ALSO sets flags.
16972
16973 instruct partialSubtypeCheck(rdi_RegP result,
16974 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16975 rFlagsReg cr)
16976 %{
16977 match(Set result (PartialSubtypeCheck sub super));
16978 predicate(!UseSecondarySupersTable);
16979 effect(KILL rcx, KILL cr);
16980
16981 ins_cost(1100); // slightly larger than the next version
16982 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16983 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16984 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16985 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16986 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16987 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16988 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16989 "miss:\t" %}
16990
16991 ins_encode %{
16992 Label miss;
16993 // NB: Callers may assume that, when $result is a valid register,
16994 // check_klass_subtype_slow_path_linear sets it to a nonzero
16995 // value.
16996 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16997 $rcx$$Register, $result$$Register,
16998 nullptr, &miss,
16999 /*set_cond_codes:*/ true);
17000 __ xorptr($result$$Register, $result$$Register);
17001 __ bind(miss);
17002 %}
17003
17004 ins_pipe(pipe_slow);
17005 %}
17006
17007 // ============================================================================
17008 // Two versions of hashtable-based partialSubtypeCheck, both used when
17009 // we need to search for a super class in the secondary supers array.
17010 // The first is used when we don't know _a priori_ the class being
17011 // searched for. The second, far more common, is used when we do know:
17012 // this is used for instanceof, checkcast, and any case where C2 can
17013 // determine it by constant propagation.
17014
17015 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17016 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17017 rFlagsReg cr)
17018 %{
17019 match(Set result (PartialSubtypeCheck sub super));
17020 predicate(UseSecondarySupersTable);
17021 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17022
17023 ins_cost(1000);
17024 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17025
17026 ins_encode %{
17027 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17028 $temp3$$Register, $temp4$$Register, $result$$Register);
17029 %}
17030
17031 ins_pipe(pipe_slow);
17032 %}
17033
17034 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17035 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17036 rFlagsReg cr)
17037 %{
17038 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17039 predicate(UseSecondarySupersTable);
17040 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17041
17042 ins_cost(700); // smaller than the next version
17043 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17044
17045 ins_encode %{
17046 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17047 if (InlineSecondarySupersTest) {
17048 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17049 $temp3$$Register, $temp4$$Register, $result$$Register,
17050 super_klass_slot);
17051 } else {
17052 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17053 }
17054 %}
17055
17056 ins_pipe(pipe_slow);
17057 %}
17058
17059 // ============================================================================
17060 // Branch Instructions -- short offset versions
17061 //
17062 // These instructions are used to replace jumps of a long offset (the default
17063 // match) with jumps of a shorter offset. These instructions are all tagged
17064 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17065 // match rules in general matching. Instead, the ADLC generates a conversion
17066 // method in the MachNode which can be used to do in-place replacement of the
17067 // long variant with the shorter variant. The compiler will determine if a
17068 // branch can be taken by the is_short_branch_offset() predicate in the machine
17069 // specific code section of the file.
17070
17071 // Jump Direct - Label defines a relative address from JMP+1
17072 instruct jmpDir_short(label labl) %{
17073 match(Goto);
17074 effect(USE labl);
17075
17076 ins_cost(300);
17077 format %{ "jmp,s $labl" %}
17078 size(2);
17079 ins_encode %{
17080 Label* L = $labl$$label;
17081 __ jmpb(*L);
17082 %}
17083 ins_pipe(pipe_jmp);
17084 ins_short_branch(1);
17085 %}
17086
17087 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17088 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17089 match(If cop cr);
17090 effect(USE labl);
17091
17092 ins_cost(300);
17093 format %{ "j$cop,s $labl" %}
17094 size(2);
17095 ins_encode %{
17096 Label* L = $labl$$label;
17097 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17098 %}
17099 ins_pipe(pipe_jcc);
17100 ins_short_branch(1);
17101 %}
17102
17103 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17104 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17105 match(CountedLoopEnd cop cr);
17106 effect(USE labl);
17107
17108 ins_cost(300);
17109 format %{ "j$cop,s $labl\t# loop end" %}
17110 size(2);
17111 ins_encode %{
17112 Label* L = $labl$$label;
17113 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17114 %}
17115 ins_pipe(pipe_jcc);
17116 ins_short_branch(1);
17117 %}
17118
17119 // Jump Direct Conditional - using unsigned comparison
17120 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17121 match(If cop cmp);
17122 effect(USE labl);
17123
17124 ins_cost(300);
17125 format %{ "j$cop,us $labl" %}
17126 size(2);
17127 ins_encode %{
17128 Label* L = $labl$$label;
17129 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17130 %}
17131 ins_pipe(pipe_jcc);
17132 ins_short_branch(1);
17133 %}
17134
17135 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17136 match(If cop cmp);
17137 effect(USE labl);
17138
17139 ins_cost(300);
17140 format %{ "j$cop,us $labl" %}
17141 size(2);
17142 ins_encode %{
17143 Label* L = $labl$$label;
17144 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17145 %}
17146 ins_pipe(pipe_jcc);
17147 ins_short_branch(1);
17148 %}
17149
17150 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17151 match(If cop cmp);
17152 effect(USE labl);
17153
17154 ins_cost(300);
17155 format %{ $$template
17156 if ($cop$$cmpcode == Assembler::notEqual) {
17157 $$emit$$"jp,u,s $labl\n\t"
17158 $$emit$$"j$cop,u,s $labl"
17159 } else {
17160 $$emit$$"jp,u,s done\n\t"
17161 $$emit$$"j$cop,u,s $labl\n\t"
17162 $$emit$$"done:"
17163 }
17164 %}
17165 size(4);
17166 ins_encode %{
17167 Label* l = $labl$$label;
17168 if ($cop$$cmpcode == Assembler::notEqual) {
17169 __ jccb(Assembler::parity, *l);
17170 __ jccb(Assembler::notEqual, *l);
17171 } else if ($cop$$cmpcode == Assembler::equal) {
17172 Label done;
17173 __ jccb(Assembler::parity, done);
17174 __ jccb(Assembler::equal, *l);
17175 __ bind(done);
17176 } else {
17177 ShouldNotReachHere();
17178 }
17179 %}
17180 ins_pipe(pipe_jcc);
17181 ins_short_branch(1);
17182 %}
17183
17184 // Jump Direct Conditional - using signed and unsigned comparison
17185 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17186 match(If cop cmp);
17187 effect(USE labl);
17188
17189 ins_cost(300);
17190 format %{ "j$cop,sus $labl" %}
17191 size(2);
17192 ins_encode %{
17193 Label* L = $labl$$label;
17194 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17195 %}
17196 ins_pipe(pipe_jcc);
17197 ins_short_branch(1);
17198 %}
17199
17200 // ============================================================================
17201 // inlined locking and unlocking
17202
17203 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17204 match(Set cr (FastLock object box));
17205 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17206 ins_cost(300);
17207 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17208 ins_encode %{
17209 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17210 %}
17211 ins_pipe(pipe_slow);
17212 %}
17213
17214 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17215 match(Set cr (FastUnlock object rax_reg));
17216 effect(TEMP tmp, USE_KILL rax_reg);
17217 ins_cost(300);
17218 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17219 ins_encode %{
17220 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17221 %}
17222 ins_pipe(pipe_slow);
17223 %}
17224
17225
17226 // ============================================================================
17227 // Safepoint Instructions
17228 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17229 %{
17230 match(SafePoint poll);
17231 effect(KILL cr, USE poll);
17232
17233 format %{ "testl rax, [$poll]\t"
17234 "# Safepoint: poll for GC" %}
17235 ins_cost(125);
17236 ins_encode %{
17237 __ relocate(relocInfo::poll_type);
17238 address pre_pc = __ pc();
17239 __ testl(rax, Address($poll$$Register, 0));
17240 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17241 %}
17242 ins_pipe(ialu_reg_mem);
17243 %}
17244
17245 instruct mask_all_evexL(kReg dst, rRegL src) %{
17246 match(Set dst (MaskAll src));
17247 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17248 ins_encode %{
17249 int mask_len = Matcher::vector_length(this);
17250 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17251 %}
17252 ins_pipe( pipe_slow );
17253 %}
17254
17255 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17256 predicate(Matcher::vector_length(n) > 32);
17257 match(Set dst (MaskAll src));
17258 effect(TEMP tmp);
17259 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17260 ins_encode %{
17261 int mask_len = Matcher::vector_length(this);
17262 __ movslq($tmp$$Register, $src$$Register);
17263 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17264 %}
17265 ins_pipe( pipe_slow );
17266 %}
17267
17268 // ============================================================================
17269 // Procedure Call/Return Instructions
17270 // Call Java Static Instruction
17271 // Note: If this code changes, the corresponding ret_addr_offset() and
17272 // compute_padding() functions will have to be adjusted.
17273 instruct CallStaticJavaDirect(method meth) %{
17274 match(CallStaticJava);
17275 effect(USE meth);
17276
17277 ins_cost(300);
17278 format %{ "call,static " %}
17279 opcode(0xE8); /* E8 cd */
17280 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17281 ins_pipe(pipe_slow);
17282 ins_alignment(4);
17283 %}
17284
17285 // Call Java Dynamic Instruction
17286 // Note: If this code changes, the corresponding ret_addr_offset() and
17287 // compute_padding() functions will have to be adjusted.
17288 instruct CallDynamicJavaDirect(method meth)
17289 %{
17290 match(CallDynamicJava);
17291 effect(USE meth);
17292
17293 ins_cost(300);
17294 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17295 "call,dynamic " %}
17296 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17297 ins_pipe(pipe_slow);
17298 ins_alignment(4);
17299 %}
17300
17301 // Call Runtime Instruction
17302 instruct CallRuntimeDirect(method meth)
17303 %{
17304 match(CallRuntime);
17305 effect(USE meth);
17306
17307 ins_cost(300);
17308 format %{ "call,runtime " %}
17309 ins_encode(clear_avx, Java_To_Runtime(meth));
17310 ins_pipe(pipe_slow);
17311 %}
17312
17313 // Call runtime without safepoint
17314 instruct CallLeafDirect(method meth)
17315 %{
17316 match(CallLeaf);
17317 effect(USE meth);
17318
17319 ins_cost(300);
17320 format %{ "call_leaf,runtime " %}
17321 ins_encode(clear_avx, Java_To_Runtime(meth));
17322 ins_pipe(pipe_slow);
17323 %}
17324
17325 // Call runtime without safepoint and with vector arguments
17326 instruct CallLeafDirectVector(method meth)
17327 %{
17328 match(CallLeafVector);
17329 effect(USE meth);
17330
17331 ins_cost(300);
17332 format %{ "call_leaf,vector " %}
17333 ins_encode(Java_To_Runtime(meth));
17334 ins_pipe(pipe_slow);
17335 %}
17336
17337 // Call runtime without safepoint
17338 instruct CallLeafNoFPDirect(method meth)
17339 %{
17340 match(CallLeafNoFP);
17341 effect(USE meth);
17342
17343 ins_cost(300);
17344 format %{ "call_leaf_nofp,runtime " %}
17345 ins_encode(clear_avx, Java_To_Runtime(meth));
17346 ins_pipe(pipe_slow);
17347 %}
17348
17349 // Return Instruction
17350 // Remove the return address & jump to it.
17351 // Notice: We always emit a nop after a ret to make sure there is room
17352 // for safepoint patching
17353 instruct Ret()
17354 %{
17355 match(Return);
17356
17357 format %{ "ret" %}
17358 ins_encode %{
17359 __ ret(0);
17360 %}
17361 ins_pipe(pipe_jmp);
17362 %}
17363
17364 // Tail Call; Jump from runtime stub to Java code.
17365 // Also known as an 'interprocedural jump'.
17366 // Target of jump will eventually return to caller.
17367 // TailJump below removes the return address.
17368 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17369 // emitted just above the TailCall which has reset rbp to the caller state.
17370 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17371 %{
17372 match(TailCall jump_target method_ptr);
17373
17374 ins_cost(300);
17375 format %{ "jmp $jump_target\t# rbx holds method" %}
17376 ins_encode %{
17377 __ jmp($jump_target$$Register);
17378 %}
17379 ins_pipe(pipe_jmp);
17380 %}
17381
17382 // Tail Jump; remove the return address; jump to target.
17383 // TailCall above leaves the return address around.
17384 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17385 %{
17386 match(TailJump jump_target ex_oop);
17387
17388 ins_cost(300);
17389 format %{ "popq rdx\t# pop return address\n\t"
17390 "jmp $jump_target" %}
17391 ins_encode %{
17392 __ popq(as_Register(RDX_enc));
17393 __ jmp($jump_target$$Register);
17394 %}
17395 ins_pipe(pipe_jmp);
17396 %}
17397
17398 // Forward exception.
17399 instruct ForwardExceptionjmp()
17400 %{
17401 match(ForwardException);
17402
17403 format %{ "jmp forward_exception_stub" %}
17404 ins_encode %{
17405 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17406 %}
17407 ins_pipe(pipe_jmp);
17408 %}
17409
17410 // Create exception oop: created by stack-crawling runtime code.
17411 // Created exception is now available to this handler, and is setup
17412 // just prior to jumping to this handler. No code emitted.
17413 instruct CreateException(rax_RegP ex_oop)
17414 %{
17415 match(Set ex_oop (CreateEx));
17416
17417 size(0);
17418 // use the following format syntax
17419 format %{ "# exception oop is in rax; no code emitted" %}
17420 ins_encode();
17421 ins_pipe(empty);
17422 %}
17423
17424 // Rethrow exception:
17425 // The exception oop will come in the first argument position.
17426 // Then JUMP (not call) to the rethrow stub code.
17427 instruct RethrowException()
17428 %{
17429 match(Rethrow);
17430
17431 // use the following format syntax
17432 format %{ "jmp rethrow_stub" %}
17433 ins_encode %{
17434 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17435 %}
17436 ins_pipe(pipe_jmp);
17437 %}
17438
17439 // ============================================================================
17440 // This name is KNOWN by the ADLC and cannot be changed.
17441 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17442 // for this guy.
17443 instruct tlsLoadP(r15_RegP dst) %{
17444 match(Set dst (ThreadLocal));
17445 effect(DEF dst);
17446
17447 size(0);
17448 format %{ "# TLS is in R15" %}
17449 ins_encode( /*empty encoding*/ );
17450 ins_pipe(ialu_reg_reg);
17451 %}
17452
17453 instruct addF_reg(regF dst, regF src) %{
17454 predicate(UseAVX == 0);
17455 match(Set dst (AddF dst src));
17456
17457 format %{ "addss $dst, $src" %}
17458 ins_cost(150);
17459 ins_encode %{
17460 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17461 %}
17462 ins_pipe(pipe_slow);
17463 %}
17464
17465 instruct addF_mem(regF dst, memory src) %{
17466 predicate(UseAVX == 0);
17467 match(Set dst (AddF dst (LoadF src)));
17468
17469 format %{ "addss $dst, $src" %}
17470 ins_cost(150);
17471 ins_encode %{
17472 __ addss($dst$$XMMRegister, $src$$Address);
17473 %}
17474 ins_pipe(pipe_slow);
17475 %}
17476
17477 instruct addF_imm(regF dst, immF con) %{
17478 predicate(UseAVX == 0);
17479 match(Set dst (AddF dst con));
17480 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17481 ins_cost(150);
17482 ins_encode %{
17483 __ addss($dst$$XMMRegister, $constantaddress($con));
17484 %}
17485 ins_pipe(pipe_slow);
17486 %}
17487
17488 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17489 predicate(UseAVX > 0);
17490 match(Set dst (AddF src1 src2));
17491
17492 format %{ "vaddss $dst, $src1, $src2" %}
17493 ins_cost(150);
17494 ins_encode %{
17495 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17496 %}
17497 ins_pipe(pipe_slow);
17498 %}
17499
17500 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17501 predicate(UseAVX > 0);
17502 match(Set dst (AddF src1 (LoadF src2)));
17503
17504 format %{ "vaddss $dst, $src1, $src2" %}
17505 ins_cost(150);
17506 ins_encode %{
17507 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17508 %}
17509 ins_pipe(pipe_slow);
17510 %}
17511
17512 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17513 predicate(UseAVX > 0);
17514 match(Set dst (AddF src con));
17515
17516 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17517 ins_cost(150);
17518 ins_encode %{
17519 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17520 %}
17521 ins_pipe(pipe_slow);
17522 %}
17523
17524 instruct addD_reg(regD dst, regD src) %{
17525 predicate(UseAVX == 0);
17526 match(Set dst (AddD dst src));
17527
17528 format %{ "addsd $dst, $src" %}
17529 ins_cost(150);
17530 ins_encode %{
17531 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17532 %}
17533 ins_pipe(pipe_slow);
17534 %}
17535
17536 instruct addD_mem(regD dst, memory src) %{
17537 predicate(UseAVX == 0);
17538 match(Set dst (AddD dst (LoadD src)));
17539
17540 format %{ "addsd $dst, $src" %}
17541 ins_cost(150);
17542 ins_encode %{
17543 __ addsd($dst$$XMMRegister, $src$$Address);
17544 %}
17545 ins_pipe(pipe_slow);
17546 %}
17547
17548 instruct addD_imm(regD dst, immD con) %{
17549 predicate(UseAVX == 0);
17550 match(Set dst (AddD dst con));
17551 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17552 ins_cost(150);
17553 ins_encode %{
17554 __ addsd($dst$$XMMRegister, $constantaddress($con));
17555 %}
17556 ins_pipe(pipe_slow);
17557 %}
17558
17559 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17560 predicate(UseAVX > 0);
17561 match(Set dst (AddD src1 src2));
17562
17563 format %{ "vaddsd $dst, $src1, $src2" %}
17564 ins_cost(150);
17565 ins_encode %{
17566 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17567 %}
17568 ins_pipe(pipe_slow);
17569 %}
17570
17571 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17572 predicate(UseAVX > 0);
17573 match(Set dst (AddD src1 (LoadD src2)));
17574
17575 format %{ "vaddsd $dst, $src1, $src2" %}
17576 ins_cost(150);
17577 ins_encode %{
17578 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17579 %}
17580 ins_pipe(pipe_slow);
17581 %}
17582
17583 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17584 predicate(UseAVX > 0);
17585 match(Set dst (AddD src con));
17586
17587 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17588 ins_cost(150);
17589 ins_encode %{
17590 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17591 %}
17592 ins_pipe(pipe_slow);
17593 %}
17594
17595 instruct subF_reg(regF dst, regF src) %{
17596 predicate(UseAVX == 0);
17597 match(Set dst (SubF dst src));
17598
17599 format %{ "subss $dst, $src" %}
17600 ins_cost(150);
17601 ins_encode %{
17602 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17603 %}
17604 ins_pipe(pipe_slow);
17605 %}
17606
17607 instruct subF_mem(regF dst, memory src) %{
17608 predicate(UseAVX == 0);
17609 match(Set dst (SubF dst (LoadF src)));
17610
17611 format %{ "subss $dst, $src" %}
17612 ins_cost(150);
17613 ins_encode %{
17614 __ subss($dst$$XMMRegister, $src$$Address);
17615 %}
17616 ins_pipe(pipe_slow);
17617 %}
17618
17619 instruct subF_imm(regF dst, immF con) %{
17620 predicate(UseAVX == 0);
17621 match(Set dst (SubF dst con));
17622 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17623 ins_cost(150);
17624 ins_encode %{
17625 __ subss($dst$$XMMRegister, $constantaddress($con));
17626 %}
17627 ins_pipe(pipe_slow);
17628 %}
17629
17630 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17631 predicate(UseAVX > 0);
17632 match(Set dst (SubF src1 src2));
17633
17634 format %{ "vsubss $dst, $src1, $src2" %}
17635 ins_cost(150);
17636 ins_encode %{
17637 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17638 %}
17639 ins_pipe(pipe_slow);
17640 %}
17641
17642 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17643 predicate(UseAVX > 0);
17644 match(Set dst (SubF src1 (LoadF src2)));
17645
17646 format %{ "vsubss $dst, $src1, $src2" %}
17647 ins_cost(150);
17648 ins_encode %{
17649 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17650 %}
17651 ins_pipe(pipe_slow);
17652 %}
17653
17654 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17655 predicate(UseAVX > 0);
17656 match(Set dst (SubF src con));
17657
17658 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17659 ins_cost(150);
17660 ins_encode %{
17661 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17662 %}
17663 ins_pipe(pipe_slow);
17664 %}
17665
17666 instruct subD_reg(regD dst, regD src) %{
17667 predicate(UseAVX == 0);
17668 match(Set dst (SubD dst src));
17669
17670 format %{ "subsd $dst, $src" %}
17671 ins_cost(150);
17672 ins_encode %{
17673 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17674 %}
17675 ins_pipe(pipe_slow);
17676 %}
17677
17678 instruct subD_mem(regD dst, memory src) %{
17679 predicate(UseAVX == 0);
17680 match(Set dst (SubD dst (LoadD src)));
17681
17682 format %{ "subsd $dst, $src" %}
17683 ins_cost(150);
17684 ins_encode %{
17685 __ subsd($dst$$XMMRegister, $src$$Address);
17686 %}
17687 ins_pipe(pipe_slow);
17688 %}
17689
17690 instruct subD_imm(regD dst, immD con) %{
17691 predicate(UseAVX == 0);
17692 match(Set dst (SubD dst con));
17693 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17694 ins_cost(150);
17695 ins_encode %{
17696 __ subsd($dst$$XMMRegister, $constantaddress($con));
17697 %}
17698 ins_pipe(pipe_slow);
17699 %}
17700
17701 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17702 predicate(UseAVX > 0);
17703 match(Set dst (SubD src1 src2));
17704
17705 format %{ "vsubsd $dst, $src1, $src2" %}
17706 ins_cost(150);
17707 ins_encode %{
17708 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17709 %}
17710 ins_pipe(pipe_slow);
17711 %}
17712
17713 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17714 predicate(UseAVX > 0);
17715 match(Set dst (SubD src1 (LoadD src2)));
17716
17717 format %{ "vsubsd $dst, $src1, $src2" %}
17718 ins_cost(150);
17719 ins_encode %{
17720 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17721 %}
17722 ins_pipe(pipe_slow);
17723 %}
17724
17725 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17726 predicate(UseAVX > 0);
17727 match(Set dst (SubD src con));
17728
17729 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17730 ins_cost(150);
17731 ins_encode %{
17732 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17733 %}
17734 ins_pipe(pipe_slow);
17735 %}
17736
17737 instruct mulF_reg(regF dst, regF src) %{
17738 predicate(UseAVX == 0);
17739 match(Set dst (MulF dst src));
17740
17741 format %{ "mulss $dst, $src" %}
17742 ins_cost(150);
17743 ins_encode %{
17744 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17745 %}
17746 ins_pipe(pipe_slow);
17747 %}
17748
17749 instruct mulF_mem(regF dst, memory src) %{
17750 predicate(UseAVX == 0);
17751 match(Set dst (MulF dst (LoadF src)));
17752
17753 format %{ "mulss $dst, $src" %}
17754 ins_cost(150);
17755 ins_encode %{
17756 __ mulss($dst$$XMMRegister, $src$$Address);
17757 %}
17758 ins_pipe(pipe_slow);
17759 %}
17760
17761 instruct mulF_imm(regF dst, immF con) %{
17762 predicate(UseAVX == 0);
17763 match(Set dst (MulF dst con));
17764 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17765 ins_cost(150);
17766 ins_encode %{
17767 __ mulss($dst$$XMMRegister, $constantaddress($con));
17768 %}
17769 ins_pipe(pipe_slow);
17770 %}
17771
17772 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17773 predicate(UseAVX > 0);
17774 match(Set dst (MulF src1 src2));
17775
17776 format %{ "vmulss $dst, $src1, $src2" %}
17777 ins_cost(150);
17778 ins_encode %{
17779 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17780 %}
17781 ins_pipe(pipe_slow);
17782 %}
17783
17784 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17785 predicate(UseAVX > 0);
17786 match(Set dst (MulF src1 (LoadF src2)));
17787
17788 format %{ "vmulss $dst, $src1, $src2" %}
17789 ins_cost(150);
17790 ins_encode %{
17791 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17792 %}
17793 ins_pipe(pipe_slow);
17794 %}
17795
17796 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17797 predicate(UseAVX > 0);
17798 match(Set dst (MulF src con));
17799
17800 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17801 ins_cost(150);
17802 ins_encode %{
17803 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17804 %}
17805 ins_pipe(pipe_slow);
17806 %}
17807
17808 instruct mulD_reg(regD dst, regD src) %{
17809 predicate(UseAVX == 0);
17810 match(Set dst (MulD dst src));
17811
17812 format %{ "mulsd $dst, $src" %}
17813 ins_cost(150);
17814 ins_encode %{
17815 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17816 %}
17817 ins_pipe(pipe_slow);
17818 %}
17819
17820 instruct mulD_mem(regD dst, memory src) %{
17821 predicate(UseAVX == 0);
17822 match(Set dst (MulD dst (LoadD src)));
17823
17824 format %{ "mulsd $dst, $src" %}
17825 ins_cost(150);
17826 ins_encode %{
17827 __ mulsd($dst$$XMMRegister, $src$$Address);
17828 %}
17829 ins_pipe(pipe_slow);
17830 %}
17831
17832 instruct mulD_imm(regD dst, immD con) %{
17833 predicate(UseAVX == 0);
17834 match(Set dst (MulD dst con));
17835 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17836 ins_cost(150);
17837 ins_encode %{
17838 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17839 %}
17840 ins_pipe(pipe_slow);
17841 %}
17842
17843 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17844 predicate(UseAVX > 0);
17845 match(Set dst (MulD src1 src2));
17846
17847 format %{ "vmulsd $dst, $src1, $src2" %}
17848 ins_cost(150);
17849 ins_encode %{
17850 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17851 %}
17852 ins_pipe(pipe_slow);
17853 %}
17854
17855 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17856 predicate(UseAVX > 0);
17857 match(Set dst (MulD src1 (LoadD src2)));
17858
17859 format %{ "vmulsd $dst, $src1, $src2" %}
17860 ins_cost(150);
17861 ins_encode %{
17862 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17863 %}
17864 ins_pipe(pipe_slow);
17865 %}
17866
17867 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17868 predicate(UseAVX > 0);
17869 match(Set dst (MulD src con));
17870
17871 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17872 ins_cost(150);
17873 ins_encode %{
17874 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17875 %}
17876 ins_pipe(pipe_slow);
17877 %}
17878
17879 instruct divF_reg(regF dst, regF src) %{
17880 predicate(UseAVX == 0);
17881 match(Set dst (DivF dst src));
17882
17883 format %{ "divss $dst, $src" %}
17884 ins_cost(150);
17885 ins_encode %{
17886 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17887 %}
17888 ins_pipe(pipe_slow);
17889 %}
17890
17891 instruct divF_mem(regF dst, memory src) %{
17892 predicate(UseAVX == 0);
17893 match(Set dst (DivF dst (LoadF src)));
17894
17895 format %{ "divss $dst, $src" %}
17896 ins_cost(150);
17897 ins_encode %{
17898 __ divss($dst$$XMMRegister, $src$$Address);
17899 %}
17900 ins_pipe(pipe_slow);
17901 %}
17902
17903 instruct divF_imm(regF dst, immF con) %{
17904 predicate(UseAVX == 0);
17905 match(Set dst (DivF dst con));
17906 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17907 ins_cost(150);
17908 ins_encode %{
17909 __ divss($dst$$XMMRegister, $constantaddress($con));
17910 %}
17911 ins_pipe(pipe_slow);
17912 %}
17913
17914 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17915 predicate(UseAVX > 0);
17916 match(Set dst (DivF src1 src2));
17917
17918 format %{ "vdivss $dst, $src1, $src2" %}
17919 ins_cost(150);
17920 ins_encode %{
17921 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17922 %}
17923 ins_pipe(pipe_slow);
17924 %}
17925
17926 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17927 predicate(UseAVX > 0);
17928 match(Set dst (DivF src1 (LoadF src2)));
17929
17930 format %{ "vdivss $dst, $src1, $src2" %}
17931 ins_cost(150);
17932 ins_encode %{
17933 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17934 %}
17935 ins_pipe(pipe_slow);
17936 %}
17937
17938 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17939 predicate(UseAVX > 0);
17940 match(Set dst (DivF src con));
17941
17942 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17943 ins_cost(150);
17944 ins_encode %{
17945 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17946 %}
17947 ins_pipe(pipe_slow);
17948 %}
17949
17950 instruct divD_reg(regD dst, regD src) %{
17951 predicate(UseAVX == 0);
17952 match(Set dst (DivD dst src));
17953
17954 format %{ "divsd $dst, $src" %}
17955 ins_cost(150);
17956 ins_encode %{
17957 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17958 %}
17959 ins_pipe(pipe_slow);
17960 %}
17961
17962 instruct divD_mem(regD dst, memory src) %{
17963 predicate(UseAVX == 0);
17964 match(Set dst (DivD dst (LoadD src)));
17965
17966 format %{ "divsd $dst, $src" %}
17967 ins_cost(150);
17968 ins_encode %{
17969 __ divsd($dst$$XMMRegister, $src$$Address);
17970 %}
17971 ins_pipe(pipe_slow);
17972 %}
17973
17974 instruct divD_imm(regD dst, immD con) %{
17975 predicate(UseAVX == 0);
17976 match(Set dst (DivD dst con));
17977 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17978 ins_cost(150);
17979 ins_encode %{
17980 __ divsd($dst$$XMMRegister, $constantaddress($con));
17981 %}
17982 ins_pipe(pipe_slow);
17983 %}
17984
17985 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17986 predicate(UseAVX > 0);
17987 match(Set dst (DivD src1 src2));
17988
17989 format %{ "vdivsd $dst, $src1, $src2" %}
17990 ins_cost(150);
17991 ins_encode %{
17992 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17993 %}
17994 ins_pipe(pipe_slow);
17995 %}
17996
17997 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17998 predicate(UseAVX > 0);
17999 match(Set dst (DivD src1 (LoadD src2)));
18000
18001 format %{ "vdivsd $dst, $src1, $src2" %}
18002 ins_cost(150);
18003 ins_encode %{
18004 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18005 %}
18006 ins_pipe(pipe_slow);
18007 %}
18008
18009 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18010 predicate(UseAVX > 0);
18011 match(Set dst (DivD src con));
18012
18013 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18014 ins_cost(150);
18015 ins_encode %{
18016 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18017 %}
18018 ins_pipe(pipe_slow);
18019 %}
18020
18021 instruct absF_reg(regF dst) %{
18022 predicate(UseAVX == 0);
18023 match(Set dst (AbsF dst));
18024 ins_cost(150);
18025 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18026 ins_encode %{
18027 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18028 %}
18029 ins_pipe(pipe_slow);
18030 %}
18031
18032 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18033 predicate(UseAVX > 0);
18034 match(Set dst (AbsF src));
18035 ins_cost(150);
18036 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18037 ins_encode %{
18038 int vlen_enc = Assembler::AVX_128bit;
18039 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18040 ExternalAddress(float_signmask()), vlen_enc);
18041 %}
18042 ins_pipe(pipe_slow);
18043 %}
18044
18045 instruct absD_reg(regD dst) %{
18046 predicate(UseAVX == 0);
18047 match(Set dst (AbsD dst));
18048 ins_cost(150);
18049 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18050 "# abs double by sign masking" %}
18051 ins_encode %{
18052 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18053 %}
18054 ins_pipe(pipe_slow);
18055 %}
18056
18057 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18058 predicate(UseAVX > 0);
18059 match(Set dst (AbsD src));
18060 ins_cost(150);
18061 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18062 "# abs double by sign masking" %}
18063 ins_encode %{
18064 int vlen_enc = Assembler::AVX_128bit;
18065 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18066 ExternalAddress(double_signmask()), vlen_enc);
18067 %}
18068 ins_pipe(pipe_slow);
18069 %}
18070
18071 instruct negF_reg(regF dst) %{
18072 predicate(UseAVX == 0);
18073 match(Set dst (NegF dst));
18074 ins_cost(150);
18075 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18076 ins_encode %{
18077 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18078 %}
18079 ins_pipe(pipe_slow);
18080 %}
18081
18082 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18083 predicate(UseAVX > 0);
18084 match(Set dst (NegF src));
18085 ins_cost(150);
18086 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18087 ins_encode %{
18088 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18089 ExternalAddress(float_signflip()));
18090 %}
18091 ins_pipe(pipe_slow);
18092 %}
18093
18094 instruct negD_reg(regD dst) %{
18095 predicate(UseAVX == 0);
18096 match(Set dst (NegD dst));
18097 ins_cost(150);
18098 format %{ "xorpd $dst, [0x8000000000000000]\t"
18099 "# neg double by sign flipping" %}
18100 ins_encode %{
18101 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18102 %}
18103 ins_pipe(pipe_slow);
18104 %}
18105
18106 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18107 predicate(UseAVX > 0);
18108 match(Set dst (NegD src));
18109 ins_cost(150);
18110 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18111 "# neg double by sign flipping" %}
18112 ins_encode %{
18113 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18114 ExternalAddress(double_signflip()));
18115 %}
18116 ins_pipe(pipe_slow);
18117 %}
18118
18119 // sqrtss instruction needs destination register to be pre initialized for best performance
18120 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18121 instruct sqrtF_reg(regF dst) %{
18122 match(Set dst (SqrtF dst));
18123 format %{ "sqrtss $dst, $dst" %}
18124 ins_encode %{
18125 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18126 %}
18127 ins_pipe(pipe_slow);
18128 %}
18129
18130 // sqrtsd instruction needs destination register to be pre initialized for best performance
18131 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18132 instruct sqrtD_reg(regD dst) %{
18133 match(Set dst (SqrtD dst));
18134 format %{ "sqrtsd $dst, $dst" %}
18135 ins_encode %{
18136 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18137 %}
18138 ins_pipe(pipe_slow);
18139 %}
18140
18141 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18142 effect(TEMP tmp);
18143 match(Set dst (ConvF2HF src));
18144 ins_cost(125);
18145 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18146 ins_encode %{
18147 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18148 %}
18149 ins_pipe( pipe_slow );
18150 %}
18151
18152 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18153 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18154 effect(TEMP ktmp, TEMP rtmp);
18155 match(Set mem (StoreC mem (ConvF2HF src)));
18156 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18157 ins_encode %{
18158 __ movl($rtmp$$Register, 0x1);
18159 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18160 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18161 %}
18162 ins_pipe( pipe_slow );
18163 %}
18164
18165 instruct vconvF2HF(vec dst, vec src) %{
18166 match(Set dst (VectorCastF2HF src));
18167 format %{ "vector_conv_F2HF $dst $src" %}
18168 ins_encode %{
18169 int vlen_enc = vector_length_encoding(this, $src);
18170 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18171 %}
18172 ins_pipe( pipe_slow );
18173 %}
18174
18175 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18176 predicate(n->as_StoreVector()->memory_size() >= 16);
18177 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18178 format %{ "vcvtps2ph $mem,$src" %}
18179 ins_encode %{
18180 int vlen_enc = vector_length_encoding(this, $src);
18181 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18182 %}
18183 ins_pipe( pipe_slow );
18184 %}
18185
18186 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18187 match(Set dst (ConvHF2F src));
18188 format %{ "vcvtph2ps $dst,$src" %}
18189 ins_encode %{
18190 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18191 %}
18192 ins_pipe( pipe_slow );
18193 %}
18194
18195 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18196 match(Set dst (VectorCastHF2F (LoadVector mem)));
18197 format %{ "vcvtph2ps $dst,$mem" %}
18198 ins_encode %{
18199 int vlen_enc = vector_length_encoding(this);
18200 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18201 %}
18202 ins_pipe( pipe_slow );
18203 %}
18204
18205 instruct vconvHF2F(vec dst, vec src) %{
18206 match(Set dst (VectorCastHF2F src));
18207 ins_cost(125);
18208 format %{ "vector_conv_HF2F $dst,$src" %}
18209 ins_encode %{
18210 int vlen_enc = vector_length_encoding(this);
18211 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18212 %}
18213 ins_pipe( pipe_slow );
18214 %}
18215
18216 // ---------------------------------------- VectorReinterpret ------------------------------------
18217 instruct reinterpret_mask(kReg dst) %{
18218 predicate(n->bottom_type()->isa_vectmask() &&
18219 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18220 match(Set dst (VectorReinterpret dst));
18221 ins_cost(125);
18222 format %{ "vector_reinterpret $dst\t!" %}
18223 ins_encode %{
18224 // empty
18225 %}
18226 ins_pipe( pipe_slow );
18227 %}
18228
18229 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18230 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18231 n->bottom_type()->isa_vectmask() &&
18232 n->in(1)->bottom_type()->isa_vectmask() &&
18233 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18234 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18235 match(Set dst (VectorReinterpret src));
18236 effect(TEMP xtmp);
18237 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18238 ins_encode %{
18239 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18240 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18241 assert(src_sz == dst_sz , "src and dst size mismatch");
18242 int vlen_enc = vector_length_encoding(src_sz);
18243 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18244 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18245 %}
18246 ins_pipe( pipe_slow );
18247 %}
18248
18249 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18250 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18251 n->bottom_type()->isa_vectmask() &&
18252 n->in(1)->bottom_type()->isa_vectmask() &&
18253 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18254 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18255 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18256 match(Set dst (VectorReinterpret src));
18257 effect(TEMP xtmp);
18258 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18259 ins_encode %{
18260 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18261 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18262 assert(src_sz == dst_sz , "src and dst size mismatch");
18263 int vlen_enc = vector_length_encoding(src_sz);
18264 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18265 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18266 %}
18267 ins_pipe( pipe_slow );
18268 %}
18269
18270 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18271 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18272 n->bottom_type()->isa_vectmask() &&
18273 n->in(1)->bottom_type()->isa_vectmask() &&
18274 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18275 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18276 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18277 match(Set dst (VectorReinterpret src));
18278 effect(TEMP xtmp);
18279 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18280 ins_encode %{
18281 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18282 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18283 assert(src_sz == dst_sz , "src and dst size mismatch");
18284 int vlen_enc = vector_length_encoding(src_sz);
18285 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18286 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18287 %}
18288 ins_pipe( pipe_slow );
18289 %}
18290
18291 instruct reinterpret(vec dst) %{
18292 predicate(!n->bottom_type()->isa_vectmask() &&
18293 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18294 match(Set dst (VectorReinterpret dst));
18295 ins_cost(125);
18296 format %{ "vector_reinterpret $dst\t!" %}
18297 ins_encode %{
18298 // empty
18299 %}
18300 ins_pipe( pipe_slow );
18301 %}
18302
18303 instruct reinterpret_expand(vec dst, vec src) %{
18304 predicate(UseAVX == 0 &&
18305 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18306 match(Set dst (VectorReinterpret src));
18307 ins_cost(125);
18308 effect(TEMP dst);
18309 format %{ "vector_reinterpret_expand $dst,$src" %}
18310 ins_encode %{
18311 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18312 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18313
18314 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18315 if (src_vlen_in_bytes == 4) {
18316 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18317 } else {
18318 assert(src_vlen_in_bytes == 8, "");
18319 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18320 }
18321 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18322 %}
18323 ins_pipe( pipe_slow );
18324 %}
18325
18326 instruct vreinterpret_expand4(legVec dst, vec src) %{
18327 predicate(UseAVX > 0 &&
18328 !n->bottom_type()->isa_vectmask() &&
18329 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18330 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18331 match(Set dst (VectorReinterpret src));
18332 ins_cost(125);
18333 format %{ "vector_reinterpret_expand $dst,$src" %}
18334 ins_encode %{
18335 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18336 %}
18337 ins_pipe( pipe_slow );
18338 %}
18339
18340
18341 instruct vreinterpret_expand(legVec dst, vec src) %{
18342 predicate(UseAVX > 0 &&
18343 !n->bottom_type()->isa_vectmask() &&
18344 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18345 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18346 match(Set dst (VectorReinterpret src));
18347 ins_cost(125);
18348 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18349 ins_encode %{
18350 switch (Matcher::vector_length_in_bytes(this, $src)) {
18351 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18352 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18353 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18354 default: ShouldNotReachHere();
18355 }
18356 %}
18357 ins_pipe( pipe_slow );
18358 %}
18359
18360 instruct reinterpret_shrink(vec dst, legVec src) %{
18361 predicate(!n->bottom_type()->isa_vectmask() &&
18362 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18363 match(Set dst (VectorReinterpret src));
18364 ins_cost(125);
18365 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18366 ins_encode %{
18367 switch (Matcher::vector_length_in_bytes(this)) {
18368 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18369 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18370 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18371 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18372 default: ShouldNotReachHere();
18373 }
18374 %}
18375 ins_pipe( pipe_slow );
18376 %}
18377
18378 // ----------------------------------------------------------------------------------------------------
18379
18380 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18381 match(Set dst (RoundDoubleMode src rmode));
18382 format %{ "roundsd $dst,$src" %}
18383 ins_cost(150);
18384 ins_encode %{
18385 assert(UseSSE >= 4, "required");
18386 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18387 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18388 }
18389 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18390 %}
18391 ins_pipe(pipe_slow);
18392 %}
18393
18394 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18395 match(Set dst (RoundDoubleMode con rmode));
18396 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18397 ins_cost(150);
18398 ins_encode %{
18399 assert(UseSSE >= 4, "required");
18400 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18401 %}
18402 ins_pipe(pipe_slow);
18403 %}
18404
18405 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18406 predicate(Matcher::vector_length(n) < 8);
18407 match(Set dst (RoundDoubleModeV src rmode));
18408 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18409 ins_encode %{
18410 assert(UseAVX > 0, "required");
18411 int vlen_enc = vector_length_encoding(this);
18412 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18413 %}
18414 ins_pipe( pipe_slow );
18415 %}
18416
18417 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18418 predicate(Matcher::vector_length(n) == 8);
18419 match(Set dst (RoundDoubleModeV src rmode));
18420 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18421 ins_encode %{
18422 assert(UseAVX > 2, "required");
18423 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18424 %}
18425 ins_pipe( pipe_slow );
18426 %}
18427
18428 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18429 predicate(Matcher::vector_length(n) < 8);
18430 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18431 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18432 ins_encode %{
18433 assert(UseAVX > 0, "required");
18434 int vlen_enc = vector_length_encoding(this);
18435 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18436 %}
18437 ins_pipe( pipe_slow );
18438 %}
18439
18440 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18441 predicate(Matcher::vector_length(n) == 8);
18442 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18443 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18444 ins_encode %{
18445 assert(UseAVX > 2, "required");
18446 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18447 %}
18448 ins_pipe( pipe_slow );
18449 %}
18450
18451 instruct onspinwait() %{
18452 match(OnSpinWait);
18453 ins_cost(200);
18454
18455 format %{
18456 $$template
18457 $$emit$$"pause\t! membar_onspinwait"
18458 %}
18459 ins_encode %{
18460 __ pause();
18461 %}
18462 ins_pipe(pipe_slow);
18463 %}
18464
18465 // a * b + c
18466 instruct fmaD_reg(regD a, regD b, regD c) %{
18467 match(Set c (FmaD c (Binary a b)));
18468 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18469 ins_cost(150);
18470 ins_encode %{
18471 assert(UseFMA, "Needs FMA instructions support.");
18472 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18473 %}
18474 ins_pipe( pipe_slow );
18475 %}
18476
18477 // a * b + c
18478 instruct fmaF_reg(regF a, regF b, regF c) %{
18479 match(Set c (FmaF c (Binary a b)));
18480 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18481 ins_cost(150);
18482 ins_encode %{
18483 assert(UseFMA, "Needs FMA instructions support.");
18484 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18485 %}
18486 ins_pipe( pipe_slow );
18487 %}
18488
18489 // ====================VECTOR INSTRUCTIONS=====================================
18490
18491 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18492 instruct MoveVec2Leg(legVec dst, vec src) %{
18493 match(Set dst src);
18494 format %{ "" %}
18495 ins_encode %{
18496 ShouldNotReachHere();
18497 %}
18498 ins_pipe( fpu_reg_reg );
18499 %}
18500
18501 instruct MoveLeg2Vec(vec dst, legVec src) %{
18502 match(Set dst src);
18503 format %{ "" %}
18504 ins_encode %{
18505 ShouldNotReachHere();
18506 %}
18507 ins_pipe( fpu_reg_reg );
18508 %}
18509
18510 // ============================================================================
18511
18512 // Load vectors generic operand pattern
18513 instruct loadV(vec dst, memory mem) %{
18514 match(Set dst (LoadVector mem));
18515 ins_cost(125);
18516 format %{ "load_vector $dst,$mem" %}
18517 ins_encode %{
18518 BasicType bt = Matcher::vector_element_basic_type(this);
18519 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18520 %}
18521 ins_pipe( pipe_slow );
18522 %}
18523
18524 // Store vectors generic operand pattern.
18525 instruct storeV(memory mem, vec src) %{
18526 match(Set mem (StoreVector mem src));
18527 ins_cost(145);
18528 format %{ "store_vector $mem,$src\n\t" %}
18529 ins_encode %{
18530 switch (Matcher::vector_length_in_bytes(this, $src)) {
18531 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18532 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18533 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18534 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18535 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18536 default: ShouldNotReachHere();
18537 }
18538 %}
18539 ins_pipe( pipe_slow );
18540 %}
18541
18542 // ---------------------------------------- Gather ------------------------------------
18543
18544 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18545
18546 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18547 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18548 Matcher::vector_length_in_bytes(n) <= 32);
18549 match(Set dst (LoadVectorGather mem idx));
18550 effect(TEMP dst, TEMP tmp, TEMP mask);
18551 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18552 ins_encode %{
18553 int vlen_enc = vector_length_encoding(this);
18554 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18555 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18556 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18557 __ lea($tmp$$Register, $mem$$Address);
18558 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18559 %}
18560 ins_pipe( pipe_slow );
18561 %}
18562
18563
18564 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18565 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18566 !is_subword_type(Matcher::vector_element_basic_type(n)));
18567 match(Set dst (LoadVectorGather mem idx));
18568 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18569 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18570 ins_encode %{
18571 int vlen_enc = vector_length_encoding(this);
18572 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18573 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18574 __ lea($tmp$$Register, $mem$$Address);
18575 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18576 %}
18577 ins_pipe( pipe_slow );
18578 %}
18579
18580 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18581 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18582 !is_subword_type(Matcher::vector_element_basic_type(n)));
18583 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18584 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18585 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18586 ins_encode %{
18587 assert(UseAVX > 2, "sanity");
18588 int vlen_enc = vector_length_encoding(this);
18589 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18590 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18591 // Note: Since gather instruction partially updates the opmask register used
18592 // for predication hense moving mask operand to a temporary.
18593 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18594 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18595 __ lea($tmp$$Register, $mem$$Address);
18596 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18597 %}
18598 ins_pipe( pipe_slow );
18599 %}
18600
18601 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18602 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18603 match(Set dst (LoadVectorGather mem idx_base));
18604 effect(TEMP tmp, TEMP rtmp);
18605 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18606 ins_encode %{
18607 int vlen_enc = vector_length_encoding(this);
18608 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18609 __ lea($tmp$$Register, $mem$$Address);
18610 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18611 %}
18612 ins_pipe( pipe_slow );
18613 %}
18614
18615 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18616 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18617 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18618 match(Set dst (LoadVectorGather mem idx_base));
18619 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18620 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18621 ins_encode %{
18622 int vlen_enc = vector_length_encoding(this);
18623 int vector_len = Matcher::vector_length(this);
18624 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18625 __ lea($tmp$$Register, $mem$$Address);
18626 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18627 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18628 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18629 %}
18630 ins_pipe( pipe_slow );
18631 %}
18632
18633 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18634 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18635 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18636 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18637 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18638 ins_encode %{
18639 int vlen_enc = vector_length_encoding(this);
18640 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18641 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18642 __ lea($tmp$$Register, $mem$$Address);
18643 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18644 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18645 %}
18646 ins_pipe( pipe_slow );
18647 %}
18648
18649 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18650 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18651 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18652 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18653 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18654 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18655 ins_encode %{
18656 int vlen_enc = vector_length_encoding(this);
18657 int vector_len = Matcher::vector_length(this);
18658 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18659 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18660 __ lea($tmp$$Register, $mem$$Address);
18661 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18662 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18663 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18664 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18665 %}
18666 ins_pipe( pipe_slow );
18667 %}
18668
18669 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18670 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18671 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18672 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18673 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18674 ins_encode %{
18675 int vlen_enc = vector_length_encoding(this);
18676 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18677 __ lea($tmp$$Register, $mem$$Address);
18678 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18679 if (elem_bt == T_SHORT) {
18680 __ movl($mask_idx$$Register, 0x55555555);
18681 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18682 }
18683 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18684 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18685 %}
18686 ins_pipe( pipe_slow );
18687 %}
18688
18689 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18690 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18691 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18692 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18693 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18694 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18695 ins_encode %{
18696 int vlen_enc = vector_length_encoding(this);
18697 int vector_len = Matcher::vector_length(this);
18698 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18699 __ lea($tmp$$Register, $mem$$Address);
18700 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18701 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18702 if (elem_bt == T_SHORT) {
18703 __ movl($mask_idx$$Register, 0x55555555);
18704 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18705 }
18706 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18707 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18708 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18709 %}
18710 ins_pipe( pipe_slow );
18711 %}
18712
18713 // ====================Scatter=======================================
18714
18715 // Scatter INT, LONG, FLOAT, DOUBLE
18716
18717 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18718 predicate(UseAVX > 2);
18719 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18720 effect(TEMP tmp, TEMP ktmp);
18721 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18722 ins_encode %{
18723 int vlen_enc = vector_length_encoding(this, $src);
18724 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18725
18726 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18727 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18728
18729 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18730 __ lea($tmp$$Register, $mem$$Address);
18731 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18732 %}
18733 ins_pipe( pipe_slow );
18734 %}
18735
18736 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18737 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18738 effect(TEMP tmp, TEMP ktmp);
18739 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18740 ins_encode %{
18741 int vlen_enc = vector_length_encoding(this, $src);
18742 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18743 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18744 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18745 // Note: Since scatter instruction partially updates the opmask register used
18746 // for predication hense moving mask operand to a temporary.
18747 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18748 __ lea($tmp$$Register, $mem$$Address);
18749 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18750 %}
18751 ins_pipe( pipe_slow );
18752 %}
18753
18754 // ====================REPLICATE=======================================
18755
18756 // Replicate byte scalar to be vector
18757 instruct vReplB_reg(vec dst, rRegI src) %{
18758 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18759 match(Set dst (Replicate src));
18760 format %{ "replicateB $dst,$src" %}
18761 ins_encode %{
18762 uint vlen = Matcher::vector_length(this);
18763 if (UseAVX >= 2) {
18764 int vlen_enc = vector_length_encoding(this);
18765 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18766 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18767 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18768 } else {
18769 __ movdl($dst$$XMMRegister, $src$$Register);
18770 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18771 }
18772 } else {
18773 assert(UseAVX < 2, "");
18774 __ movdl($dst$$XMMRegister, $src$$Register);
18775 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18776 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18777 if (vlen >= 16) {
18778 assert(vlen == 16, "");
18779 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18780 }
18781 }
18782 %}
18783 ins_pipe( pipe_slow );
18784 %}
18785
18786 instruct ReplB_mem(vec dst, memory mem) %{
18787 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18788 match(Set dst (Replicate (LoadB mem)));
18789 format %{ "replicateB $dst,$mem" %}
18790 ins_encode %{
18791 int vlen_enc = vector_length_encoding(this);
18792 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18793 %}
18794 ins_pipe( pipe_slow );
18795 %}
18796
18797 // ====================ReplicateS=======================================
18798
18799 instruct vReplS_reg(vec dst, rRegI src) %{
18800 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18801 match(Set dst (Replicate src));
18802 format %{ "replicateS $dst,$src" %}
18803 ins_encode %{
18804 uint vlen = Matcher::vector_length(this);
18805 int vlen_enc = vector_length_encoding(this);
18806 if (UseAVX >= 2) {
18807 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18808 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18809 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18810 } else {
18811 __ movdl($dst$$XMMRegister, $src$$Register);
18812 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18813 }
18814 } else {
18815 assert(UseAVX < 2, "");
18816 __ movdl($dst$$XMMRegister, $src$$Register);
18817 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18818 if (vlen >= 8) {
18819 assert(vlen == 8, "");
18820 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18821 }
18822 }
18823 %}
18824 ins_pipe( pipe_slow );
18825 %}
18826
18827 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18828 match(Set dst (Replicate con));
18829 effect(TEMP rtmp);
18830 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18831 ins_encode %{
18832 int vlen_enc = vector_length_encoding(this);
18833 BasicType bt = Matcher::vector_element_basic_type(this);
18834 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18835 __ movl($rtmp$$Register, $con$$constant);
18836 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18837 %}
18838 ins_pipe( pipe_slow );
18839 %}
18840
18841 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18842 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18843 match(Set dst (Replicate src));
18844 effect(TEMP rtmp);
18845 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18846 ins_encode %{
18847 int vlen_enc = vector_length_encoding(this);
18848 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18849 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18850 %}
18851 ins_pipe( pipe_slow );
18852 %}
18853
18854 instruct ReplS_mem(vec dst, memory mem) %{
18855 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18856 match(Set dst (Replicate (LoadS mem)));
18857 format %{ "replicateS $dst,$mem" %}
18858 ins_encode %{
18859 int vlen_enc = vector_length_encoding(this);
18860 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18861 %}
18862 ins_pipe( pipe_slow );
18863 %}
18864
18865 // ====================ReplicateI=======================================
18866
18867 instruct ReplI_reg(vec dst, rRegI src) %{
18868 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18869 match(Set dst (Replicate src));
18870 format %{ "replicateI $dst,$src" %}
18871 ins_encode %{
18872 uint vlen = Matcher::vector_length(this);
18873 int vlen_enc = vector_length_encoding(this);
18874 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18875 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18876 } else if (VM_Version::supports_avx2()) {
18877 __ movdl($dst$$XMMRegister, $src$$Register);
18878 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18879 } else {
18880 __ movdl($dst$$XMMRegister, $src$$Register);
18881 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18882 }
18883 %}
18884 ins_pipe( pipe_slow );
18885 %}
18886
18887 instruct ReplI_mem(vec dst, memory mem) %{
18888 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18889 match(Set dst (Replicate (LoadI mem)));
18890 format %{ "replicateI $dst,$mem" %}
18891 ins_encode %{
18892 int vlen_enc = vector_length_encoding(this);
18893 if (VM_Version::supports_avx2()) {
18894 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18895 } else if (VM_Version::supports_avx()) {
18896 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18897 } else {
18898 __ movdl($dst$$XMMRegister, $mem$$Address);
18899 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18900 }
18901 %}
18902 ins_pipe( pipe_slow );
18903 %}
18904
18905 instruct ReplI_imm(vec dst, immI con) %{
18906 predicate(Matcher::is_non_long_integral_vector(n));
18907 match(Set dst (Replicate con));
18908 format %{ "replicateI $dst,$con" %}
18909 ins_encode %{
18910 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18911 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18912 type2aelembytes(Matcher::vector_element_basic_type(this))));
18913 BasicType bt = Matcher::vector_element_basic_type(this);
18914 int vlen = Matcher::vector_length_in_bytes(this);
18915 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18916 %}
18917 ins_pipe( pipe_slow );
18918 %}
18919
18920 // Replicate scalar zero to be vector
18921 instruct ReplI_zero(vec dst, immI_0 zero) %{
18922 predicate(Matcher::is_non_long_integral_vector(n));
18923 match(Set dst (Replicate zero));
18924 format %{ "replicateI $dst,$zero" %}
18925 ins_encode %{
18926 int vlen_enc = vector_length_encoding(this);
18927 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18928 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18929 } else {
18930 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18931 }
18932 %}
18933 ins_pipe( fpu_reg_reg );
18934 %}
18935
18936 instruct ReplI_M1(vec dst, immI_M1 con) %{
18937 predicate(Matcher::is_non_long_integral_vector(n));
18938 match(Set dst (Replicate con));
18939 format %{ "vallones $dst" %}
18940 ins_encode %{
18941 int vector_len = vector_length_encoding(this);
18942 __ vallones($dst$$XMMRegister, vector_len);
18943 %}
18944 ins_pipe( pipe_slow );
18945 %}
18946
18947 // ====================ReplicateL=======================================
18948
18949 // Replicate long (8 byte) scalar to be vector
18950 instruct ReplL_reg(vec dst, rRegL src) %{
18951 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18952 match(Set dst (Replicate src));
18953 format %{ "replicateL $dst,$src" %}
18954 ins_encode %{
18955 int vlen = Matcher::vector_length(this);
18956 int vlen_enc = vector_length_encoding(this);
18957 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18958 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18959 } else if (VM_Version::supports_avx2()) {
18960 __ movdq($dst$$XMMRegister, $src$$Register);
18961 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18962 } else {
18963 __ movdq($dst$$XMMRegister, $src$$Register);
18964 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18965 }
18966 %}
18967 ins_pipe( pipe_slow );
18968 %}
18969
18970 instruct ReplL_mem(vec dst, memory mem) %{
18971 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18972 match(Set dst (Replicate (LoadL mem)));
18973 format %{ "replicateL $dst,$mem" %}
18974 ins_encode %{
18975 int vlen_enc = vector_length_encoding(this);
18976 if (VM_Version::supports_avx2()) {
18977 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18978 } else if (VM_Version::supports_sse3()) {
18979 __ movddup($dst$$XMMRegister, $mem$$Address);
18980 } else {
18981 __ movq($dst$$XMMRegister, $mem$$Address);
18982 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18983 }
18984 %}
18985 ins_pipe( pipe_slow );
18986 %}
18987
18988 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18989 instruct ReplL_imm(vec dst, immL con) %{
18990 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18991 match(Set dst (Replicate con));
18992 format %{ "replicateL $dst,$con" %}
18993 ins_encode %{
18994 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18995 int vlen = Matcher::vector_length_in_bytes(this);
18996 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18997 %}
18998 ins_pipe( pipe_slow );
18999 %}
19000
19001 instruct ReplL_zero(vec dst, immL0 zero) %{
19002 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19003 match(Set dst (Replicate zero));
19004 format %{ "replicateL $dst,$zero" %}
19005 ins_encode %{
19006 int vlen_enc = vector_length_encoding(this);
19007 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19008 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19009 } else {
19010 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19011 }
19012 %}
19013 ins_pipe( fpu_reg_reg );
19014 %}
19015
19016 instruct ReplL_M1(vec dst, immL_M1 con) %{
19017 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19018 match(Set dst (Replicate con));
19019 format %{ "vallones $dst" %}
19020 ins_encode %{
19021 int vector_len = vector_length_encoding(this);
19022 __ vallones($dst$$XMMRegister, vector_len);
19023 %}
19024 ins_pipe( pipe_slow );
19025 %}
19026
19027 // ====================ReplicateF=======================================
19028
19029 instruct vReplF_reg(vec dst, vlRegF src) %{
19030 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19031 match(Set dst (Replicate src));
19032 format %{ "replicateF $dst,$src" %}
19033 ins_encode %{
19034 uint vlen = Matcher::vector_length(this);
19035 int vlen_enc = vector_length_encoding(this);
19036 if (vlen <= 4) {
19037 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19038 } else if (VM_Version::supports_avx2()) {
19039 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19040 } else {
19041 assert(vlen == 8, "sanity");
19042 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19043 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19044 }
19045 %}
19046 ins_pipe( pipe_slow );
19047 %}
19048
19049 instruct ReplF_reg(vec dst, vlRegF src) %{
19050 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19051 match(Set dst (Replicate src));
19052 format %{ "replicateF $dst,$src" %}
19053 ins_encode %{
19054 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19055 %}
19056 ins_pipe( pipe_slow );
19057 %}
19058
19059 instruct ReplF_mem(vec dst, memory mem) %{
19060 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19061 match(Set dst (Replicate (LoadF mem)));
19062 format %{ "replicateF $dst,$mem" %}
19063 ins_encode %{
19064 int vlen_enc = vector_length_encoding(this);
19065 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19066 %}
19067 ins_pipe( pipe_slow );
19068 %}
19069
19070 // Replicate float scalar immediate to be vector by loading from const table.
19071 instruct ReplF_imm(vec dst, immF con) %{
19072 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19073 match(Set dst (Replicate con));
19074 format %{ "replicateF $dst,$con" %}
19075 ins_encode %{
19076 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19077 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19078 int vlen = Matcher::vector_length_in_bytes(this);
19079 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19080 %}
19081 ins_pipe( pipe_slow );
19082 %}
19083
19084 instruct ReplF_zero(vec dst, immF0 zero) %{
19085 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19086 match(Set dst (Replicate zero));
19087 format %{ "replicateF $dst,$zero" %}
19088 ins_encode %{
19089 int vlen_enc = vector_length_encoding(this);
19090 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19091 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19092 } else {
19093 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19094 }
19095 %}
19096 ins_pipe( fpu_reg_reg );
19097 %}
19098
19099 // ====================ReplicateD=======================================
19100
19101 // Replicate double (8 bytes) scalar to be vector
19102 instruct vReplD_reg(vec dst, vlRegD src) %{
19103 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19104 match(Set dst (Replicate src));
19105 format %{ "replicateD $dst,$src" %}
19106 ins_encode %{
19107 uint vlen = Matcher::vector_length(this);
19108 int vlen_enc = vector_length_encoding(this);
19109 if (vlen <= 2) {
19110 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19111 } else if (VM_Version::supports_avx2()) {
19112 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19113 } else {
19114 assert(vlen == 4, "sanity");
19115 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19116 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19117 }
19118 %}
19119 ins_pipe( pipe_slow );
19120 %}
19121
19122 instruct ReplD_reg(vec dst, vlRegD src) %{
19123 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19124 match(Set dst (Replicate src));
19125 format %{ "replicateD $dst,$src" %}
19126 ins_encode %{
19127 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19128 %}
19129 ins_pipe( pipe_slow );
19130 %}
19131
19132 instruct ReplD_mem(vec dst, memory mem) %{
19133 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19134 match(Set dst (Replicate (LoadD mem)));
19135 format %{ "replicateD $dst,$mem" %}
19136 ins_encode %{
19137 if (Matcher::vector_length(this) >= 4) {
19138 int vlen_enc = vector_length_encoding(this);
19139 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19140 } else {
19141 __ movddup($dst$$XMMRegister, $mem$$Address);
19142 }
19143 %}
19144 ins_pipe( pipe_slow );
19145 %}
19146
19147 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19148 instruct ReplD_imm(vec dst, immD con) %{
19149 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19150 match(Set dst (Replicate con));
19151 format %{ "replicateD $dst,$con" %}
19152 ins_encode %{
19153 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19154 int vlen = Matcher::vector_length_in_bytes(this);
19155 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19156 %}
19157 ins_pipe( pipe_slow );
19158 %}
19159
19160 instruct ReplD_zero(vec dst, immD0 zero) %{
19161 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19162 match(Set dst (Replicate zero));
19163 format %{ "replicateD $dst,$zero" %}
19164 ins_encode %{
19165 int vlen_enc = vector_length_encoding(this);
19166 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19167 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19168 } else {
19169 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19170 }
19171 %}
19172 ins_pipe( fpu_reg_reg );
19173 %}
19174
19175 // ====================VECTOR INSERT=======================================
19176
19177 instruct insert(vec dst, rRegI val, immU8 idx) %{
19178 predicate(Matcher::vector_length_in_bytes(n) < 32);
19179 match(Set dst (VectorInsert (Binary dst val) idx));
19180 format %{ "vector_insert $dst,$val,$idx" %}
19181 ins_encode %{
19182 assert(UseSSE >= 4, "required");
19183 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19184
19185 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19186
19187 assert(is_integral_type(elem_bt), "");
19188 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19189
19190 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19191 %}
19192 ins_pipe( pipe_slow );
19193 %}
19194
19195 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19196 predicate(Matcher::vector_length_in_bytes(n) == 32);
19197 match(Set dst (VectorInsert (Binary src val) idx));
19198 effect(TEMP vtmp);
19199 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19200 ins_encode %{
19201 int vlen_enc = Assembler::AVX_256bit;
19202 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19203 int elem_per_lane = 16/type2aelembytes(elem_bt);
19204 int log2epr = log2(elem_per_lane);
19205
19206 assert(is_integral_type(elem_bt), "sanity");
19207 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19208
19209 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19210 uint y_idx = ($idx$$constant >> log2epr) & 1;
19211 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19212 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19213 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19214 %}
19215 ins_pipe( pipe_slow );
19216 %}
19217
19218 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19219 predicate(Matcher::vector_length_in_bytes(n) == 64);
19220 match(Set dst (VectorInsert (Binary src val) idx));
19221 effect(TEMP vtmp);
19222 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19223 ins_encode %{
19224 assert(UseAVX > 2, "sanity");
19225
19226 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19227 int elem_per_lane = 16/type2aelembytes(elem_bt);
19228 int log2epr = log2(elem_per_lane);
19229
19230 assert(is_integral_type(elem_bt), "");
19231 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19232
19233 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19234 uint y_idx = ($idx$$constant >> log2epr) & 3;
19235 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19236 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19237 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19238 %}
19239 ins_pipe( pipe_slow );
19240 %}
19241
19242 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19243 predicate(Matcher::vector_length(n) == 2);
19244 match(Set dst (VectorInsert (Binary dst val) idx));
19245 format %{ "vector_insert $dst,$val,$idx" %}
19246 ins_encode %{
19247 assert(UseSSE >= 4, "required");
19248 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19249 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19250
19251 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19252 %}
19253 ins_pipe( pipe_slow );
19254 %}
19255
19256 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19257 predicate(Matcher::vector_length(n) == 4);
19258 match(Set dst (VectorInsert (Binary src val) idx));
19259 effect(TEMP vtmp);
19260 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19261 ins_encode %{
19262 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19263 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19264
19265 uint x_idx = $idx$$constant & right_n_bits(1);
19266 uint y_idx = ($idx$$constant >> 1) & 1;
19267 int vlen_enc = Assembler::AVX_256bit;
19268 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19269 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19270 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19271 %}
19272 ins_pipe( pipe_slow );
19273 %}
19274
19275 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19276 predicate(Matcher::vector_length(n) == 8);
19277 match(Set dst (VectorInsert (Binary src val) idx));
19278 effect(TEMP vtmp);
19279 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19280 ins_encode %{
19281 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19282 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19283
19284 uint x_idx = $idx$$constant & right_n_bits(1);
19285 uint y_idx = ($idx$$constant >> 1) & 3;
19286 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19287 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19288 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19289 %}
19290 ins_pipe( pipe_slow );
19291 %}
19292
19293 instruct insertF(vec dst, regF val, immU8 idx) %{
19294 predicate(Matcher::vector_length(n) < 8);
19295 match(Set dst (VectorInsert (Binary dst val) idx));
19296 format %{ "vector_insert $dst,$val,$idx" %}
19297 ins_encode %{
19298 assert(UseSSE >= 4, "sanity");
19299
19300 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19301 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19302
19303 uint x_idx = $idx$$constant & right_n_bits(2);
19304 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19305 %}
19306 ins_pipe( pipe_slow );
19307 %}
19308
19309 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19310 predicate(Matcher::vector_length(n) >= 8);
19311 match(Set dst (VectorInsert (Binary src val) idx));
19312 effect(TEMP vtmp);
19313 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19314 ins_encode %{
19315 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19316 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19317
19318 int vlen = Matcher::vector_length(this);
19319 uint x_idx = $idx$$constant & right_n_bits(2);
19320 if (vlen == 8) {
19321 uint y_idx = ($idx$$constant >> 2) & 1;
19322 int vlen_enc = Assembler::AVX_256bit;
19323 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19324 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19325 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19326 } else {
19327 assert(vlen == 16, "sanity");
19328 uint y_idx = ($idx$$constant >> 2) & 3;
19329 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19330 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19331 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19332 }
19333 %}
19334 ins_pipe( pipe_slow );
19335 %}
19336
19337 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19338 predicate(Matcher::vector_length(n) == 2);
19339 match(Set dst (VectorInsert (Binary dst val) idx));
19340 effect(TEMP tmp);
19341 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19342 ins_encode %{
19343 assert(UseSSE >= 4, "sanity");
19344 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19345 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19346
19347 __ movq($tmp$$Register, $val$$XMMRegister);
19348 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19349 %}
19350 ins_pipe( pipe_slow );
19351 %}
19352
19353 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19354 predicate(Matcher::vector_length(n) == 4);
19355 match(Set dst (VectorInsert (Binary src val) idx));
19356 effect(TEMP vtmp, TEMP tmp);
19357 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19358 ins_encode %{
19359 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19360 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19361
19362 uint x_idx = $idx$$constant & right_n_bits(1);
19363 uint y_idx = ($idx$$constant >> 1) & 1;
19364 int vlen_enc = Assembler::AVX_256bit;
19365 __ movq($tmp$$Register, $val$$XMMRegister);
19366 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19367 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19368 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19369 %}
19370 ins_pipe( pipe_slow );
19371 %}
19372
19373 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19374 predicate(Matcher::vector_length(n) == 8);
19375 match(Set dst (VectorInsert (Binary src val) idx));
19376 effect(TEMP tmp, TEMP vtmp);
19377 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19378 ins_encode %{
19379 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19380 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19381
19382 uint x_idx = $idx$$constant & right_n_bits(1);
19383 uint y_idx = ($idx$$constant >> 1) & 3;
19384 __ movq($tmp$$Register, $val$$XMMRegister);
19385 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19386 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19387 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19388 %}
19389 ins_pipe( pipe_slow );
19390 %}
19391
19392 // ====================REDUCTION ARITHMETIC=======================================
19393
19394 // =======================Int Reduction==========================================
19395
19396 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19397 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19398 match(Set dst (AddReductionVI src1 src2));
19399 match(Set dst (MulReductionVI src1 src2));
19400 match(Set dst (AndReductionV src1 src2));
19401 match(Set dst ( OrReductionV src1 src2));
19402 match(Set dst (XorReductionV src1 src2));
19403 match(Set dst (MinReductionV src1 src2));
19404 match(Set dst (MaxReductionV src1 src2));
19405 match(Set dst (UMinReductionV src1 src2));
19406 match(Set dst (UMaxReductionV src1 src2));
19407 effect(TEMP vtmp1, TEMP vtmp2);
19408 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19409 ins_encode %{
19410 int opcode = this->ideal_Opcode();
19411 int vlen = Matcher::vector_length(this, $src2);
19412 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19413 %}
19414 ins_pipe( pipe_slow );
19415 %}
19416
19417 // =======================Long Reduction==========================================
19418
19419 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19420 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19421 match(Set dst (AddReductionVL src1 src2));
19422 match(Set dst (MulReductionVL src1 src2));
19423 match(Set dst (AndReductionV src1 src2));
19424 match(Set dst ( OrReductionV src1 src2));
19425 match(Set dst (XorReductionV src1 src2));
19426 match(Set dst (MinReductionV src1 src2));
19427 match(Set dst (MaxReductionV src1 src2));
19428 match(Set dst (UMinReductionV src1 src2));
19429 match(Set dst (UMaxReductionV src1 src2));
19430 effect(TEMP vtmp1, TEMP vtmp2);
19431 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19432 ins_encode %{
19433 int opcode = this->ideal_Opcode();
19434 int vlen = Matcher::vector_length(this, $src2);
19435 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19436 %}
19437 ins_pipe( pipe_slow );
19438 %}
19439
19440 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19441 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19442 match(Set dst (AddReductionVL src1 src2));
19443 match(Set dst (MulReductionVL src1 src2));
19444 match(Set dst (AndReductionV src1 src2));
19445 match(Set dst ( OrReductionV src1 src2));
19446 match(Set dst (XorReductionV src1 src2));
19447 match(Set dst (MinReductionV src1 src2));
19448 match(Set dst (MaxReductionV src1 src2));
19449 match(Set dst (UMinReductionV src1 src2));
19450 match(Set dst (UMaxReductionV src1 src2));
19451 effect(TEMP vtmp1, TEMP vtmp2);
19452 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19453 ins_encode %{
19454 int opcode = this->ideal_Opcode();
19455 int vlen = Matcher::vector_length(this, $src2);
19456 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19457 %}
19458 ins_pipe( pipe_slow );
19459 %}
19460
19461 // =======================Float Reduction==========================================
19462
19463 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19464 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19465 match(Set dst (AddReductionVF dst src));
19466 match(Set dst (MulReductionVF dst src));
19467 effect(TEMP dst, TEMP vtmp);
19468 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19469 ins_encode %{
19470 int opcode = this->ideal_Opcode();
19471 int vlen = Matcher::vector_length(this, $src);
19472 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19473 %}
19474 ins_pipe( pipe_slow );
19475 %}
19476
19477 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19478 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19479 match(Set dst (AddReductionVF dst src));
19480 match(Set dst (MulReductionVF dst src));
19481 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19482 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19483 ins_encode %{
19484 int opcode = this->ideal_Opcode();
19485 int vlen = Matcher::vector_length(this, $src);
19486 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19487 %}
19488 ins_pipe( pipe_slow );
19489 %}
19490
19491 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19492 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19493 match(Set dst (AddReductionVF dst src));
19494 match(Set dst (MulReductionVF dst src));
19495 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19496 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19497 ins_encode %{
19498 int opcode = this->ideal_Opcode();
19499 int vlen = Matcher::vector_length(this, $src);
19500 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19501 %}
19502 ins_pipe( pipe_slow );
19503 %}
19504
19505
19506 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19507 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19508 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19509 // src1 contains reduction identity
19510 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19511 match(Set dst (AddReductionVF src1 src2));
19512 match(Set dst (MulReductionVF src1 src2));
19513 effect(TEMP dst);
19514 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19515 ins_encode %{
19516 int opcode = this->ideal_Opcode();
19517 int vlen = Matcher::vector_length(this, $src2);
19518 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19519 %}
19520 ins_pipe( pipe_slow );
19521 %}
19522
19523 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19524 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19525 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19526 // src1 contains reduction identity
19527 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19528 match(Set dst (AddReductionVF src1 src2));
19529 match(Set dst (MulReductionVF src1 src2));
19530 effect(TEMP dst, TEMP vtmp);
19531 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19532 ins_encode %{
19533 int opcode = this->ideal_Opcode();
19534 int vlen = Matcher::vector_length(this, $src2);
19535 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19536 %}
19537 ins_pipe( pipe_slow );
19538 %}
19539
19540 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19541 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19542 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19543 // src1 contains reduction identity
19544 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19545 match(Set dst (AddReductionVF src1 src2));
19546 match(Set dst (MulReductionVF src1 src2));
19547 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19548 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19549 ins_encode %{
19550 int opcode = this->ideal_Opcode();
19551 int vlen = Matcher::vector_length(this, $src2);
19552 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19553 %}
19554 ins_pipe( pipe_slow );
19555 %}
19556
19557 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19558 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19559 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19560 // src1 contains reduction identity
19561 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19562 match(Set dst (AddReductionVF src1 src2));
19563 match(Set dst (MulReductionVF src1 src2));
19564 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19565 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19566 ins_encode %{
19567 int opcode = this->ideal_Opcode();
19568 int vlen = Matcher::vector_length(this, $src2);
19569 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19570 %}
19571 ins_pipe( pipe_slow );
19572 %}
19573
19574 // =======================Double Reduction==========================================
19575
19576 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19577 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19578 match(Set dst (AddReductionVD dst src));
19579 match(Set dst (MulReductionVD dst src));
19580 effect(TEMP dst, TEMP vtmp);
19581 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19582 ins_encode %{
19583 int opcode = this->ideal_Opcode();
19584 int vlen = Matcher::vector_length(this, $src);
19585 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19586 %}
19587 ins_pipe( pipe_slow );
19588 %}
19589
19590 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19591 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19592 match(Set dst (AddReductionVD dst src));
19593 match(Set dst (MulReductionVD dst src));
19594 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19595 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19596 ins_encode %{
19597 int opcode = this->ideal_Opcode();
19598 int vlen = Matcher::vector_length(this, $src);
19599 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19600 %}
19601 ins_pipe( pipe_slow );
19602 %}
19603
19604 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19605 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19606 match(Set dst (AddReductionVD dst src));
19607 match(Set dst (MulReductionVD dst src));
19608 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19609 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19610 ins_encode %{
19611 int opcode = this->ideal_Opcode();
19612 int vlen = Matcher::vector_length(this, $src);
19613 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19614 %}
19615 ins_pipe( pipe_slow );
19616 %}
19617
19618 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19619 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19620 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19621 // src1 contains reduction identity
19622 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19623 match(Set dst (AddReductionVD src1 src2));
19624 match(Set dst (MulReductionVD src1 src2));
19625 effect(TEMP dst);
19626 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19627 ins_encode %{
19628 int opcode = this->ideal_Opcode();
19629 int vlen = Matcher::vector_length(this, $src2);
19630 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19631 %}
19632 ins_pipe( pipe_slow );
19633 %}
19634
19635 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19636 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19637 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19638 // src1 contains reduction identity
19639 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19640 match(Set dst (AddReductionVD src1 src2));
19641 match(Set dst (MulReductionVD src1 src2));
19642 effect(TEMP dst, TEMP vtmp);
19643 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19644 ins_encode %{
19645 int opcode = this->ideal_Opcode();
19646 int vlen = Matcher::vector_length(this, $src2);
19647 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19648 %}
19649 ins_pipe( pipe_slow );
19650 %}
19651
19652 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19653 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19654 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19655 // src1 contains reduction identity
19656 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19657 match(Set dst (AddReductionVD src1 src2));
19658 match(Set dst (MulReductionVD src1 src2));
19659 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19660 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19661 ins_encode %{
19662 int opcode = this->ideal_Opcode();
19663 int vlen = Matcher::vector_length(this, $src2);
19664 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19665 %}
19666 ins_pipe( pipe_slow );
19667 %}
19668
19669 // =======================Byte Reduction==========================================
19670
19671 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19672 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19673 match(Set dst (AddReductionVI src1 src2));
19674 match(Set dst (AndReductionV src1 src2));
19675 match(Set dst ( OrReductionV src1 src2));
19676 match(Set dst (XorReductionV src1 src2));
19677 match(Set dst (MinReductionV src1 src2));
19678 match(Set dst (MaxReductionV src1 src2));
19679 match(Set dst (UMinReductionV src1 src2));
19680 match(Set dst (UMaxReductionV src1 src2));
19681 effect(TEMP vtmp1, TEMP vtmp2);
19682 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19683 ins_encode %{
19684 int opcode = this->ideal_Opcode();
19685 int vlen = Matcher::vector_length(this, $src2);
19686 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19687 %}
19688 ins_pipe( pipe_slow );
19689 %}
19690
19691 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19692 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19693 match(Set dst (AddReductionVI src1 src2));
19694 match(Set dst (AndReductionV src1 src2));
19695 match(Set dst ( OrReductionV src1 src2));
19696 match(Set dst (XorReductionV src1 src2));
19697 match(Set dst (MinReductionV src1 src2));
19698 match(Set dst (MaxReductionV src1 src2));
19699 match(Set dst (UMinReductionV src1 src2));
19700 match(Set dst (UMaxReductionV src1 src2));
19701 effect(TEMP vtmp1, TEMP vtmp2);
19702 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19703 ins_encode %{
19704 int opcode = this->ideal_Opcode();
19705 int vlen = Matcher::vector_length(this, $src2);
19706 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19707 %}
19708 ins_pipe( pipe_slow );
19709 %}
19710
19711 // =======================Short Reduction==========================================
19712
19713 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19714 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19715 match(Set dst (AddReductionVI src1 src2));
19716 match(Set dst (MulReductionVI src1 src2));
19717 match(Set dst (AndReductionV src1 src2));
19718 match(Set dst ( OrReductionV src1 src2));
19719 match(Set dst (XorReductionV src1 src2));
19720 match(Set dst (MinReductionV src1 src2));
19721 match(Set dst (MaxReductionV src1 src2));
19722 match(Set dst (UMinReductionV src1 src2));
19723 match(Set dst (UMaxReductionV src1 src2));
19724 effect(TEMP vtmp1, TEMP vtmp2);
19725 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19726 ins_encode %{
19727 int opcode = this->ideal_Opcode();
19728 int vlen = Matcher::vector_length(this, $src2);
19729 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19730 %}
19731 ins_pipe( pipe_slow );
19732 %}
19733
19734 // =======================Mul Reduction==========================================
19735
19736 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19737 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19738 Matcher::vector_length(n->in(2)) <= 32); // src2
19739 match(Set dst (MulReductionVI src1 src2));
19740 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19741 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19742 ins_encode %{
19743 int opcode = this->ideal_Opcode();
19744 int vlen = Matcher::vector_length(this, $src2);
19745 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19746 %}
19747 ins_pipe( pipe_slow );
19748 %}
19749
19750 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19751 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19752 Matcher::vector_length(n->in(2)) == 64); // src2
19753 match(Set dst (MulReductionVI src1 src2));
19754 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19755 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19756 ins_encode %{
19757 int opcode = this->ideal_Opcode();
19758 int vlen = Matcher::vector_length(this, $src2);
19759 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19760 %}
19761 ins_pipe( pipe_slow );
19762 %}
19763
19764 //--------------------Min/Max Float Reduction --------------------
19765 // Float Min Reduction
19766 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19767 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19768 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19769 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19770 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19771 Matcher::vector_length(n->in(2)) == 2);
19772 match(Set dst (MinReductionV src1 src2));
19773 match(Set dst (MaxReductionV src1 src2));
19774 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19775 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19776 ins_encode %{
19777 assert(UseAVX > 0, "sanity");
19778
19779 int opcode = this->ideal_Opcode();
19780 int vlen = Matcher::vector_length(this, $src2);
19781 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19782 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19783 %}
19784 ins_pipe( pipe_slow );
19785 %}
19786
19787 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19788 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19789 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19790 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19791 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19792 Matcher::vector_length(n->in(2)) >= 4);
19793 match(Set dst (MinReductionV src1 src2));
19794 match(Set dst (MaxReductionV src1 src2));
19795 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19796 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19797 ins_encode %{
19798 assert(UseAVX > 0, "sanity");
19799
19800 int opcode = this->ideal_Opcode();
19801 int vlen = Matcher::vector_length(this, $src2);
19802 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19803 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19804 %}
19805 ins_pipe( pipe_slow );
19806 %}
19807
19808 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19809 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19810 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19811 Matcher::vector_length(n->in(2)) == 2);
19812 match(Set dst (MinReductionV dst src));
19813 match(Set dst (MaxReductionV dst src));
19814 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19815 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19816 ins_encode %{
19817 assert(UseAVX > 0, "sanity");
19818
19819 int opcode = this->ideal_Opcode();
19820 int vlen = Matcher::vector_length(this, $src);
19821 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19822 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19823 %}
19824 ins_pipe( pipe_slow );
19825 %}
19826
19827
19828 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19829 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19830 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19831 Matcher::vector_length(n->in(2)) >= 4);
19832 match(Set dst (MinReductionV dst src));
19833 match(Set dst (MaxReductionV dst src));
19834 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19835 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19836 ins_encode %{
19837 assert(UseAVX > 0, "sanity");
19838
19839 int opcode = this->ideal_Opcode();
19840 int vlen = Matcher::vector_length(this, $src);
19841 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19842 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19843 %}
19844 ins_pipe( pipe_slow );
19845 %}
19846
19847 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19848 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19849 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19850 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19851 Matcher::vector_length(n->in(2)) == 2);
19852 match(Set dst (MinReductionV src1 src2));
19853 match(Set dst (MaxReductionV src1 src2));
19854 effect(TEMP dst, TEMP xtmp1);
19855 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19856 ins_encode %{
19857 int opcode = this->ideal_Opcode();
19858 int vlen = Matcher::vector_length(this, $src2);
19859 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19860 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19861 %}
19862 ins_pipe( pipe_slow );
19863 %}
19864
19865 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19866 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19867 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19868 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19869 Matcher::vector_length(n->in(2)) >= 4);
19870 match(Set dst (MinReductionV src1 src2));
19871 match(Set dst (MaxReductionV src1 src2));
19872 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19873 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19874 ins_encode %{
19875 int opcode = this->ideal_Opcode();
19876 int vlen = Matcher::vector_length(this, $src2);
19877 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19878 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19879 %}
19880 ins_pipe( pipe_slow );
19881 %}
19882
19883 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19884 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19885 Matcher::vector_length(n->in(2)) == 2);
19886 match(Set dst (MinReductionV dst src));
19887 match(Set dst (MaxReductionV dst src));
19888 effect(TEMP dst, TEMP xtmp1);
19889 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19890 ins_encode %{
19891 int opcode = this->ideal_Opcode();
19892 int vlen = Matcher::vector_length(this, $src);
19893 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19894 $xtmp1$$XMMRegister);
19895 %}
19896 ins_pipe( pipe_slow );
19897 %}
19898
19899 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19900 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19901 Matcher::vector_length(n->in(2)) >= 4);
19902 match(Set dst (MinReductionV dst src));
19903 match(Set dst (MaxReductionV dst src));
19904 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19905 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19906 ins_encode %{
19907 int opcode = this->ideal_Opcode();
19908 int vlen = Matcher::vector_length(this, $src);
19909 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19910 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19911 %}
19912 ins_pipe( pipe_slow );
19913 %}
19914
19915 //--------------------Min Double Reduction --------------------
19916 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19917 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19918 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19919 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19920 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19921 Matcher::vector_length(n->in(2)) == 2);
19922 match(Set dst (MinReductionV src1 src2));
19923 match(Set dst (MaxReductionV src1 src2));
19924 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19925 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19926 ins_encode %{
19927 assert(UseAVX > 0, "sanity");
19928
19929 int opcode = this->ideal_Opcode();
19930 int vlen = Matcher::vector_length(this, $src2);
19931 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19932 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19933 %}
19934 ins_pipe( pipe_slow );
19935 %}
19936
19937 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19938 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19939 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19940 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19941 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19942 Matcher::vector_length(n->in(2)) >= 4);
19943 match(Set dst (MinReductionV src1 src2));
19944 match(Set dst (MaxReductionV src1 src2));
19945 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19946 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19947 ins_encode %{
19948 assert(UseAVX > 0, "sanity");
19949
19950 int opcode = this->ideal_Opcode();
19951 int vlen = Matcher::vector_length(this, $src2);
19952 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19953 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19954 %}
19955 ins_pipe( pipe_slow );
19956 %}
19957
19958
19959 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19960 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19961 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19962 Matcher::vector_length(n->in(2)) == 2);
19963 match(Set dst (MinReductionV dst src));
19964 match(Set dst (MaxReductionV dst src));
19965 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19966 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19967 ins_encode %{
19968 assert(UseAVX > 0, "sanity");
19969
19970 int opcode = this->ideal_Opcode();
19971 int vlen = Matcher::vector_length(this, $src);
19972 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19973 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19974 %}
19975 ins_pipe( pipe_slow );
19976 %}
19977
19978 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19979 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19980 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19981 Matcher::vector_length(n->in(2)) >= 4);
19982 match(Set dst (MinReductionV dst src));
19983 match(Set dst (MaxReductionV dst src));
19984 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19985 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19986 ins_encode %{
19987 assert(UseAVX > 0, "sanity");
19988
19989 int opcode = this->ideal_Opcode();
19990 int vlen = Matcher::vector_length(this, $src);
19991 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19992 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19993 %}
19994 ins_pipe( pipe_slow );
19995 %}
19996
19997 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19998 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19999 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20000 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20001 Matcher::vector_length(n->in(2)) == 2);
20002 match(Set dst (MinReductionV src1 src2));
20003 match(Set dst (MaxReductionV src1 src2));
20004 effect(TEMP dst, TEMP xtmp1);
20005 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20006 ins_encode %{
20007 int opcode = this->ideal_Opcode();
20008 int vlen = Matcher::vector_length(this, $src2);
20009 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20010 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20011 %}
20012 ins_pipe( pipe_slow );
20013 %}
20014
20015 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20016 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20017 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20018 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20019 Matcher::vector_length(n->in(2)) >= 4);
20020 match(Set dst (MinReductionV src1 src2));
20021 match(Set dst (MaxReductionV src1 src2));
20022 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20023 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20024 ins_encode %{
20025 int opcode = this->ideal_Opcode();
20026 int vlen = Matcher::vector_length(this, $src2);
20027 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20028 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20029 %}
20030 ins_pipe( pipe_slow );
20031 %}
20032
20033
20034 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20035 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20036 Matcher::vector_length(n->in(2)) == 2);
20037 match(Set dst (MinReductionV dst src));
20038 match(Set dst (MaxReductionV dst src));
20039 effect(TEMP dst, TEMP xtmp1);
20040 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20041 ins_encode %{
20042 int opcode = this->ideal_Opcode();
20043 int vlen = Matcher::vector_length(this, $src);
20044 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20045 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20046 %}
20047 ins_pipe( pipe_slow );
20048 %}
20049
20050 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20051 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20052 Matcher::vector_length(n->in(2)) >= 4);
20053 match(Set dst (MinReductionV dst src));
20054 match(Set dst (MaxReductionV dst src));
20055 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20056 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20057 ins_encode %{
20058 int opcode = this->ideal_Opcode();
20059 int vlen = Matcher::vector_length(this, $src);
20060 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20061 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20062 %}
20063 ins_pipe( pipe_slow );
20064 %}
20065
20066 // ====================VECTOR ARITHMETIC=======================================
20067
20068 // --------------------------------- ADD --------------------------------------
20069
20070 // Bytes vector add
20071 instruct vaddB(vec dst, vec src) %{
20072 predicate(UseAVX == 0);
20073 match(Set dst (AddVB dst src));
20074 format %{ "paddb $dst,$src\t! add packedB" %}
20075 ins_encode %{
20076 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20077 %}
20078 ins_pipe( pipe_slow );
20079 %}
20080
20081 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20082 predicate(UseAVX > 0);
20083 match(Set dst (AddVB src1 src2));
20084 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20085 ins_encode %{
20086 int vlen_enc = vector_length_encoding(this);
20087 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20088 %}
20089 ins_pipe( pipe_slow );
20090 %}
20091
20092 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20093 predicate((UseAVX > 0) &&
20094 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20095 match(Set dst (AddVB src (LoadVector mem)));
20096 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20097 ins_encode %{
20098 int vlen_enc = vector_length_encoding(this);
20099 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20100 %}
20101 ins_pipe( pipe_slow );
20102 %}
20103
20104 // Shorts/Chars vector add
20105 instruct vaddS(vec dst, vec src) %{
20106 predicate(UseAVX == 0);
20107 match(Set dst (AddVS dst src));
20108 format %{ "paddw $dst,$src\t! add packedS" %}
20109 ins_encode %{
20110 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20111 %}
20112 ins_pipe( pipe_slow );
20113 %}
20114
20115 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20116 predicate(UseAVX > 0);
20117 match(Set dst (AddVS src1 src2));
20118 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20119 ins_encode %{
20120 int vlen_enc = vector_length_encoding(this);
20121 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20122 %}
20123 ins_pipe( pipe_slow );
20124 %}
20125
20126 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20127 predicate((UseAVX > 0) &&
20128 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20129 match(Set dst (AddVS src (LoadVector mem)));
20130 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20131 ins_encode %{
20132 int vlen_enc = vector_length_encoding(this);
20133 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20134 %}
20135 ins_pipe( pipe_slow );
20136 %}
20137
20138 // Integers vector add
20139 instruct vaddI(vec dst, vec src) %{
20140 predicate(UseAVX == 0);
20141 match(Set dst (AddVI dst src));
20142 format %{ "paddd $dst,$src\t! add packedI" %}
20143 ins_encode %{
20144 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20145 %}
20146 ins_pipe( pipe_slow );
20147 %}
20148
20149 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20150 predicate(UseAVX > 0);
20151 match(Set dst (AddVI src1 src2));
20152 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20153 ins_encode %{
20154 int vlen_enc = vector_length_encoding(this);
20155 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20156 %}
20157 ins_pipe( pipe_slow );
20158 %}
20159
20160
20161 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20162 predicate((UseAVX > 0) &&
20163 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20164 match(Set dst (AddVI src (LoadVector mem)));
20165 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20166 ins_encode %{
20167 int vlen_enc = vector_length_encoding(this);
20168 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20169 %}
20170 ins_pipe( pipe_slow );
20171 %}
20172
20173 // Longs vector add
20174 instruct vaddL(vec dst, vec src) %{
20175 predicate(UseAVX == 0);
20176 match(Set dst (AddVL dst src));
20177 format %{ "paddq $dst,$src\t! add packedL" %}
20178 ins_encode %{
20179 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20180 %}
20181 ins_pipe( pipe_slow );
20182 %}
20183
20184 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20185 predicate(UseAVX > 0);
20186 match(Set dst (AddVL src1 src2));
20187 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20188 ins_encode %{
20189 int vlen_enc = vector_length_encoding(this);
20190 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20191 %}
20192 ins_pipe( pipe_slow );
20193 %}
20194
20195 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20196 predicate((UseAVX > 0) &&
20197 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20198 match(Set dst (AddVL src (LoadVector mem)));
20199 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20200 ins_encode %{
20201 int vlen_enc = vector_length_encoding(this);
20202 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20203 %}
20204 ins_pipe( pipe_slow );
20205 %}
20206
20207 // Floats vector add
20208 instruct vaddF(vec dst, vec src) %{
20209 predicate(UseAVX == 0);
20210 match(Set dst (AddVF dst src));
20211 format %{ "addps $dst,$src\t! add packedF" %}
20212 ins_encode %{
20213 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20214 %}
20215 ins_pipe( pipe_slow );
20216 %}
20217
20218 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20219 predicate(UseAVX > 0);
20220 match(Set dst (AddVF src1 src2));
20221 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20222 ins_encode %{
20223 int vlen_enc = vector_length_encoding(this);
20224 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20225 %}
20226 ins_pipe( pipe_slow );
20227 %}
20228
20229 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20230 predicate((UseAVX > 0) &&
20231 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20232 match(Set dst (AddVF src (LoadVector mem)));
20233 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20234 ins_encode %{
20235 int vlen_enc = vector_length_encoding(this);
20236 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20237 %}
20238 ins_pipe( pipe_slow );
20239 %}
20240
20241 // Doubles vector add
20242 instruct vaddD(vec dst, vec src) %{
20243 predicate(UseAVX == 0);
20244 match(Set dst (AddVD dst src));
20245 format %{ "addpd $dst,$src\t! add packedD" %}
20246 ins_encode %{
20247 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20248 %}
20249 ins_pipe( pipe_slow );
20250 %}
20251
20252 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20253 predicate(UseAVX > 0);
20254 match(Set dst (AddVD src1 src2));
20255 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20256 ins_encode %{
20257 int vlen_enc = vector_length_encoding(this);
20258 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20259 %}
20260 ins_pipe( pipe_slow );
20261 %}
20262
20263 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20264 predicate((UseAVX > 0) &&
20265 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20266 match(Set dst (AddVD src (LoadVector mem)));
20267 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20268 ins_encode %{
20269 int vlen_enc = vector_length_encoding(this);
20270 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20271 %}
20272 ins_pipe( pipe_slow );
20273 %}
20274
20275 // --------------------------------- SUB --------------------------------------
20276
20277 // Bytes vector sub
20278 instruct vsubB(vec dst, vec src) %{
20279 predicate(UseAVX == 0);
20280 match(Set dst (SubVB dst src));
20281 format %{ "psubb $dst,$src\t! sub packedB" %}
20282 ins_encode %{
20283 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20284 %}
20285 ins_pipe( pipe_slow );
20286 %}
20287
20288 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20289 predicate(UseAVX > 0);
20290 match(Set dst (SubVB src1 src2));
20291 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20292 ins_encode %{
20293 int vlen_enc = vector_length_encoding(this);
20294 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20295 %}
20296 ins_pipe( pipe_slow );
20297 %}
20298
20299 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20300 predicate((UseAVX > 0) &&
20301 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20302 match(Set dst (SubVB src (LoadVector mem)));
20303 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20304 ins_encode %{
20305 int vlen_enc = vector_length_encoding(this);
20306 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20307 %}
20308 ins_pipe( pipe_slow );
20309 %}
20310
20311 // Shorts/Chars vector sub
20312 instruct vsubS(vec dst, vec src) %{
20313 predicate(UseAVX == 0);
20314 match(Set dst (SubVS dst src));
20315 format %{ "psubw $dst,$src\t! sub packedS" %}
20316 ins_encode %{
20317 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20318 %}
20319 ins_pipe( pipe_slow );
20320 %}
20321
20322
20323 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20324 predicate(UseAVX > 0);
20325 match(Set dst (SubVS src1 src2));
20326 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20327 ins_encode %{
20328 int vlen_enc = vector_length_encoding(this);
20329 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20330 %}
20331 ins_pipe( pipe_slow );
20332 %}
20333
20334 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20335 predicate((UseAVX > 0) &&
20336 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20337 match(Set dst (SubVS src (LoadVector mem)));
20338 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20339 ins_encode %{
20340 int vlen_enc = vector_length_encoding(this);
20341 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20342 %}
20343 ins_pipe( pipe_slow );
20344 %}
20345
20346 // Integers vector sub
20347 instruct vsubI(vec dst, vec src) %{
20348 predicate(UseAVX == 0);
20349 match(Set dst (SubVI dst src));
20350 format %{ "psubd $dst,$src\t! sub packedI" %}
20351 ins_encode %{
20352 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20353 %}
20354 ins_pipe( pipe_slow );
20355 %}
20356
20357 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20358 predicate(UseAVX > 0);
20359 match(Set dst (SubVI src1 src2));
20360 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20361 ins_encode %{
20362 int vlen_enc = vector_length_encoding(this);
20363 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20364 %}
20365 ins_pipe( pipe_slow );
20366 %}
20367
20368 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20369 predicate((UseAVX > 0) &&
20370 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20371 match(Set dst (SubVI src (LoadVector mem)));
20372 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20373 ins_encode %{
20374 int vlen_enc = vector_length_encoding(this);
20375 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20376 %}
20377 ins_pipe( pipe_slow );
20378 %}
20379
20380 // Longs vector sub
20381 instruct vsubL(vec dst, vec src) %{
20382 predicate(UseAVX == 0);
20383 match(Set dst (SubVL dst src));
20384 format %{ "psubq $dst,$src\t! sub packedL" %}
20385 ins_encode %{
20386 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20387 %}
20388 ins_pipe( pipe_slow );
20389 %}
20390
20391 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20392 predicate(UseAVX > 0);
20393 match(Set dst (SubVL src1 src2));
20394 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20395 ins_encode %{
20396 int vlen_enc = vector_length_encoding(this);
20397 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20398 %}
20399 ins_pipe( pipe_slow );
20400 %}
20401
20402
20403 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20404 predicate((UseAVX > 0) &&
20405 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20406 match(Set dst (SubVL src (LoadVector mem)));
20407 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20408 ins_encode %{
20409 int vlen_enc = vector_length_encoding(this);
20410 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20411 %}
20412 ins_pipe( pipe_slow );
20413 %}
20414
20415 // Floats vector sub
20416 instruct vsubF(vec dst, vec src) %{
20417 predicate(UseAVX == 0);
20418 match(Set dst (SubVF dst src));
20419 format %{ "subps $dst,$src\t! sub packedF" %}
20420 ins_encode %{
20421 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20422 %}
20423 ins_pipe( pipe_slow );
20424 %}
20425
20426 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20427 predicate(UseAVX > 0);
20428 match(Set dst (SubVF src1 src2));
20429 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20430 ins_encode %{
20431 int vlen_enc = vector_length_encoding(this);
20432 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20433 %}
20434 ins_pipe( pipe_slow );
20435 %}
20436
20437 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20438 predicate((UseAVX > 0) &&
20439 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20440 match(Set dst (SubVF src (LoadVector mem)));
20441 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20442 ins_encode %{
20443 int vlen_enc = vector_length_encoding(this);
20444 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20445 %}
20446 ins_pipe( pipe_slow );
20447 %}
20448
20449 // Doubles vector sub
20450 instruct vsubD(vec dst, vec src) %{
20451 predicate(UseAVX == 0);
20452 match(Set dst (SubVD dst src));
20453 format %{ "subpd $dst,$src\t! sub packedD" %}
20454 ins_encode %{
20455 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20456 %}
20457 ins_pipe( pipe_slow );
20458 %}
20459
20460 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20461 predicate(UseAVX > 0);
20462 match(Set dst (SubVD src1 src2));
20463 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20464 ins_encode %{
20465 int vlen_enc = vector_length_encoding(this);
20466 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20467 %}
20468 ins_pipe( pipe_slow );
20469 %}
20470
20471 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20472 predicate((UseAVX > 0) &&
20473 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20474 match(Set dst (SubVD src (LoadVector mem)));
20475 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20476 ins_encode %{
20477 int vlen_enc = vector_length_encoding(this);
20478 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20479 %}
20480 ins_pipe( pipe_slow );
20481 %}
20482
20483 // --------------------------------- MUL --------------------------------------
20484
20485 // Byte vector mul
20486 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20487 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20488 match(Set dst (MulVB src1 src2));
20489 effect(TEMP dst, TEMP xtmp);
20490 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20491 ins_encode %{
20492 assert(UseSSE > 3, "required");
20493 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20494 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20495 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20496 __ psllw($dst$$XMMRegister, 8);
20497 __ psrlw($dst$$XMMRegister, 8);
20498 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20499 %}
20500 ins_pipe( pipe_slow );
20501 %}
20502
20503 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20504 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20505 match(Set dst (MulVB src1 src2));
20506 effect(TEMP dst, TEMP xtmp);
20507 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20508 ins_encode %{
20509 assert(UseSSE > 3, "required");
20510 // Odd-index elements
20511 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20512 __ psrlw($dst$$XMMRegister, 8);
20513 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20514 __ psrlw($xtmp$$XMMRegister, 8);
20515 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20516 __ psllw($dst$$XMMRegister, 8);
20517 // Even-index elements
20518 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20519 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20520 __ psllw($xtmp$$XMMRegister, 8);
20521 __ psrlw($xtmp$$XMMRegister, 8);
20522 // Combine
20523 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20524 %}
20525 ins_pipe( pipe_slow );
20526 %}
20527
20528 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20529 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20530 match(Set dst (MulVB src1 src2));
20531 effect(TEMP xtmp1, TEMP xtmp2);
20532 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20533 ins_encode %{
20534 int vlen_enc = vector_length_encoding(this);
20535 // Odd-index elements
20536 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20537 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20538 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20539 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20540 // Even-index elements
20541 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20542 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20543 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20544 // Combine
20545 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20546 %}
20547 ins_pipe( pipe_slow );
20548 %}
20549
20550 // Shorts/Chars vector mul
20551 instruct vmulS(vec dst, vec src) %{
20552 predicate(UseAVX == 0);
20553 match(Set dst (MulVS dst src));
20554 format %{ "pmullw $dst,$src\t! mul packedS" %}
20555 ins_encode %{
20556 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20557 %}
20558 ins_pipe( pipe_slow );
20559 %}
20560
20561 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20562 predicate(UseAVX > 0);
20563 match(Set dst (MulVS src1 src2));
20564 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20565 ins_encode %{
20566 int vlen_enc = vector_length_encoding(this);
20567 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20568 %}
20569 ins_pipe( pipe_slow );
20570 %}
20571
20572 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20573 predicate((UseAVX > 0) &&
20574 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20575 match(Set dst (MulVS src (LoadVector mem)));
20576 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20577 ins_encode %{
20578 int vlen_enc = vector_length_encoding(this);
20579 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20580 %}
20581 ins_pipe( pipe_slow );
20582 %}
20583
20584 // Integers vector mul
20585 instruct vmulI(vec dst, vec src) %{
20586 predicate(UseAVX == 0);
20587 match(Set dst (MulVI dst src));
20588 format %{ "pmulld $dst,$src\t! mul packedI" %}
20589 ins_encode %{
20590 assert(UseSSE > 3, "required");
20591 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20592 %}
20593 ins_pipe( pipe_slow );
20594 %}
20595
20596 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20597 predicate(UseAVX > 0);
20598 match(Set dst (MulVI src1 src2));
20599 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20600 ins_encode %{
20601 int vlen_enc = vector_length_encoding(this);
20602 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20603 %}
20604 ins_pipe( pipe_slow );
20605 %}
20606
20607 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20608 predicate((UseAVX > 0) &&
20609 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20610 match(Set dst (MulVI src (LoadVector mem)));
20611 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20612 ins_encode %{
20613 int vlen_enc = vector_length_encoding(this);
20614 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20615 %}
20616 ins_pipe( pipe_slow );
20617 %}
20618
20619 // Longs vector mul
20620 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20621 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20622 VM_Version::supports_avx512dq()) ||
20623 VM_Version::supports_avx512vldq());
20624 match(Set dst (MulVL src1 src2));
20625 ins_cost(500);
20626 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20627 ins_encode %{
20628 assert(UseAVX > 2, "required");
20629 int vlen_enc = vector_length_encoding(this);
20630 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20631 %}
20632 ins_pipe( pipe_slow );
20633 %}
20634
20635 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20636 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20637 VM_Version::supports_avx512dq()) ||
20638 (Matcher::vector_length_in_bytes(n) > 8 &&
20639 VM_Version::supports_avx512vldq()));
20640 match(Set dst (MulVL src (LoadVector mem)));
20641 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20642 ins_cost(500);
20643 ins_encode %{
20644 assert(UseAVX > 2, "required");
20645 int vlen_enc = vector_length_encoding(this);
20646 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20647 %}
20648 ins_pipe( pipe_slow );
20649 %}
20650
20651 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20652 predicate(UseAVX == 0);
20653 match(Set dst (MulVL src1 src2));
20654 ins_cost(500);
20655 effect(TEMP dst, TEMP xtmp);
20656 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20657 ins_encode %{
20658 assert(VM_Version::supports_sse4_1(), "required");
20659 // Get the lo-hi products, only the lower 32 bits is in concerns
20660 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20661 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20662 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20663 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20664 __ psllq($dst$$XMMRegister, 32);
20665 // Get the lo-lo products
20666 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20667 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20668 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20669 %}
20670 ins_pipe( pipe_slow );
20671 %}
20672
20673 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20674 predicate(UseAVX > 0 &&
20675 ((Matcher::vector_length_in_bytes(n) == 64 &&
20676 !VM_Version::supports_avx512dq()) ||
20677 (Matcher::vector_length_in_bytes(n) < 64 &&
20678 !VM_Version::supports_avx512vldq())));
20679 match(Set dst (MulVL src1 src2));
20680 effect(TEMP xtmp1, TEMP xtmp2);
20681 ins_cost(500);
20682 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20683 ins_encode %{
20684 int vlen_enc = vector_length_encoding(this);
20685 // Get the lo-hi products, only the lower 32 bits is in concerns
20686 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20687 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20688 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20689 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20690 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20691 // Get the lo-lo products
20692 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20693 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20694 %}
20695 ins_pipe( pipe_slow );
20696 %}
20697
20698 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20699 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20700 match(Set dst (MulVL src1 src2));
20701 ins_cost(100);
20702 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20703 ins_encode %{
20704 int vlen_enc = vector_length_encoding(this);
20705 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20706 %}
20707 ins_pipe( pipe_slow );
20708 %}
20709
20710 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20711 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20712 match(Set dst (MulVL src1 src2));
20713 ins_cost(100);
20714 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20715 ins_encode %{
20716 int vlen_enc = vector_length_encoding(this);
20717 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20718 %}
20719 ins_pipe( pipe_slow );
20720 %}
20721
20722 // Floats vector mul
20723 instruct vmulF(vec dst, vec src) %{
20724 predicate(UseAVX == 0);
20725 match(Set dst (MulVF dst src));
20726 format %{ "mulps $dst,$src\t! mul packedF" %}
20727 ins_encode %{
20728 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20729 %}
20730 ins_pipe( pipe_slow );
20731 %}
20732
20733 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20734 predicate(UseAVX > 0);
20735 match(Set dst (MulVF src1 src2));
20736 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20737 ins_encode %{
20738 int vlen_enc = vector_length_encoding(this);
20739 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20740 %}
20741 ins_pipe( pipe_slow );
20742 %}
20743
20744 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20745 predicate((UseAVX > 0) &&
20746 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20747 match(Set dst (MulVF src (LoadVector mem)));
20748 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20749 ins_encode %{
20750 int vlen_enc = vector_length_encoding(this);
20751 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20752 %}
20753 ins_pipe( pipe_slow );
20754 %}
20755
20756 // Doubles vector mul
20757 instruct vmulD(vec dst, vec src) %{
20758 predicate(UseAVX == 0);
20759 match(Set dst (MulVD dst src));
20760 format %{ "mulpd $dst,$src\t! mul packedD" %}
20761 ins_encode %{
20762 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20763 %}
20764 ins_pipe( pipe_slow );
20765 %}
20766
20767 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20768 predicate(UseAVX > 0);
20769 match(Set dst (MulVD src1 src2));
20770 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20771 ins_encode %{
20772 int vlen_enc = vector_length_encoding(this);
20773 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20774 %}
20775 ins_pipe( pipe_slow );
20776 %}
20777
20778 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20779 predicate((UseAVX > 0) &&
20780 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20781 match(Set dst (MulVD src (LoadVector mem)));
20782 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20783 ins_encode %{
20784 int vlen_enc = vector_length_encoding(this);
20785 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20786 %}
20787 ins_pipe( pipe_slow );
20788 %}
20789
20790 // --------------------------------- DIV --------------------------------------
20791
20792 // Floats vector div
20793 instruct vdivF(vec dst, vec src) %{
20794 predicate(UseAVX == 0);
20795 match(Set dst (DivVF dst src));
20796 format %{ "divps $dst,$src\t! div packedF" %}
20797 ins_encode %{
20798 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20799 %}
20800 ins_pipe( pipe_slow );
20801 %}
20802
20803 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20804 predicate(UseAVX > 0);
20805 match(Set dst (DivVF src1 src2));
20806 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20807 ins_encode %{
20808 int vlen_enc = vector_length_encoding(this);
20809 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20810 %}
20811 ins_pipe( pipe_slow );
20812 %}
20813
20814 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20815 predicate((UseAVX > 0) &&
20816 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20817 match(Set dst (DivVF src (LoadVector mem)));
20818 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20819 ins_encode %{
20820 int vlen_enc = vector_length_encoding(this);
20821 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20822 %}
20823 ins_pipe( pipe_slow );
20824 %}
20825
20826 // Doubles vector div
20827 instruct vdivD(vec dst, vec src) %{
20828 predicate(UseAVX == 0);
20829 match(Set dst (DivVD dst src));
20830 format %{ "divpd $dst,$src\t! div packedD" %}
20831 ins_encode %{
20832 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20833 %}
20834 ins_pipe( pipe_slow );
20835 %}
20836
20837 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20838 predicate(UseAVX > 0);
20839 match(Set dst (DivVD src1 src2));
20840 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20841 ins_encode %{
20842 int vlen_enc = vector_length_encoding(this);
20843 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20844 %}
20845 ins_pipe( pipe_slow );
20846 %}
20847
20848 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20849 predicate((UseAVX > 0) &&
20850 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20851 match(Set dst (DivVD src (LoadVector mem)));
20852 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20853 ins_encode %{
20854 int vlen_enc = vector_length_encoding(this);
20855 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20856 %}
20857 ins_pipe( pipe_slow );
20858 %}
20859
20860 // ------------------------------ MinMax ---------------------------------------
20861
20862 // Byte, Short, Int vector Min/Max
20863 instruct minmax_reg_sse(vec dst, vec src) %{
20864 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20865 UseAVX == 0);
20866 match(Set dst (MinV dst src));
20867 match(Set dst (MaxV dst src));
20868 format %{ "vector_minmax $dst,$src\t! " %}
20869 ins_encode %{
20870 assert(UseSSE >= 4, "required");
20871
20872 int opcode = this->ideal_Opcode();
20873 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20874 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20875 %}
20876 ins_pipe( pipe_slow );
20877 %}
20878
20879 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20880 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20881 UseAVX > 0);
20882 match(Set dst (MinV src1 src2));
20883 match(Set dst (MaxV src1 src2));
20884 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20885 ins_encode %{
20886 int opcode = this->ideal_Opcode();
20887 int vlen_enc = vector_length_encoding(this);
20888 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20889
20890 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20891 %}
20892 ins_pipe( pipe_slow );
20893 %}
20894
20895 // Long vector Min/Max
20896 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20897 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20898 UseAVX == 0);
20899 match(Set dst (MinV dst src));
20900 match(Set dst (MaxV src dst));
20901 effect(TEMP dst, TEMP tmp);
20902 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20903 ins_encode %{
20904 assert(UseSSE >= 4, "required");
20905
20906 int opcode = this->ideal_Opcode();
20907 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20908 assert(elem_bt == T_LONG, "sanity");
20909
20910 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20911 %}
20912 ins_pipe( pipe_slow );
20913 %}
20914
20915 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20916 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20917 UseAVX > 0 && !VM_Version::supports_avx512vl());
20918 match(Set dst (MinV src1 src2));
20919 match(Set dst (MaxV src1 src2));
20920 effect(TEMP dst);
20921 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20922 ins_encode %{
20923 int vlen_enc = vector_length_encoding(this);
20924 int opcode = this->ideal_Opcode();
20925 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20926 assert(elem_bt == T_LONG, "sanity");
20927
20928 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20929 %}
20930 ins_pipe( pipe_slow );
20931 %}
20932
20933 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20934 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20935 Matcher::vector_element_basic_type(n) == T_LONG);
20936 match(Set dst (MinV src1 src2));
20937 match(Set dst (MaxV src1 src2));
20938 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20939 ins_encode %{
20940 assert(UseAVX > 2, "required");
20941
20942 int vlen_enc = vector_length_encoding(this);
20943 int opcode = this->ideal_Opcode();
20944 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20945 assert(elem_bt == T_LONG, "sanity");
20946
20947 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20948 %}
20949 ins_pipe( pipe_slow );
20950 %}
20951
20952 // Float/Double vector Min/Max
20953 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20954 predicate(VM_Version::supports_avx10_2() &&
20955 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20956 match(Set dst (MinV a b));
20957 match(Set dst (MaxV a b));
20958 format %{ "vector_minmaxFP $dst, $a, $b" %}
20959 ins_encode %{
20960 int vlen_enc = vector_length_encoding(this);
20961 int opcode = this->ideal_Opcode();
20962 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20963 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20964 %}
20965 ins_pipe( pipe_slow );
20966 %}
20967
20968 // Float/Double vector Min/Max
20969 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20970 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20971 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20972 UseAVX > 0);
20973 match(Set dst (MinV a b));
20974 match(Set dst (MaxV a b));
20975 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20976 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20977 ins_encode %{
20978 assert(UseAVX > 0, "required");
20979
20980 int opcode = this->ideal_Opcode();
20981 int vlen_enc = vector_length_encoding(this);
20982 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20983
20984 __ vminmax_fp(opcode, elem_bt,
20985 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20986 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20987 %}
20988 ins_pipe( pipe_slow );
20989 %}
20990
20991 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20992 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20993 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20994 match(Set dst (MinV a b));
20995 match(Set dst (MaxV a b));
20996 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20997 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20998 ins_encode %{
20999 assert(UseAVX > 2, "required");
21000
21001 int opcode = this->ideal_Opcode();
21002 int vlen_enc = vector_length_encoding(this);
21003 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21004
21005 __ evminmax_fp(opcode, elem_bt,
21006 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21007 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21008 %}
21009 ins_pipe( pipe_slow );
21010 %}
21011
21012 // ------------------------------ Unsigned vector Min/Max ----------------------
21013
21014 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21015 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21016 match(Set dst (UMinV a b));
21017 match(Set dst (UMaxV a b));
21018 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21019 ins_encode %{
21020 int opcode = this->ideal_Opcode();
21021 int vlen_enc = vector_length_encoding(this);
21022 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21023 assert(is_integral_type(elem_bt), "");
21024 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21025 %}
21026 ins_pipe( pipe_slow );
21027 %}
21028
21029 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21030 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21031 match(Set dst (UMinV a (LoadVector b)));
21032 match(Set dst (UMaxV a (LoadVector b)));
21033 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21034 ins_encode %{
21035 int opcode = this->ideal_Opcode();
21036 int vlen_enc = vector_length_encoding(this);
21037 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21038 assert(is_integral_type(elem_bt), "");
21039 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21040 %}
21041 ins_pipe( pipe_slow );
21042 %}
21043
21044 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21045 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21046 match(Set dst (UMinV a b));
21047 match(Set dst (UMaxV a b));
21048 effect(TEMP xtmp1, TEMP xtmp2);
21049 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21050 ins_encode %{
21051 int opcode = this->ideal_Opcode();
21052 int vlen_enc = vector_length_encoding(this);
21053 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21054 %}
21055 ins_pipe( pipe_slow );
21056 %}
21057
21058 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21059 match(Set dst (UMinV (Binary dst src2) mask));
21060 match(Set dst (UMaxV (Binary dst src2) mask));
21061 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21062 ins_encode %{
21063 int vlen_enc = vector_length_encoding(this);
21064 BasicType bt = Matcher::vector_element_basic_type(this);
21065 int opc = this->ideal_Opcode();
21066 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21067 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21068 %}
21069 ins_pipe( pipe_slow );
21070 %}
21071
21072 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21073 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21074 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21075 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21076 ins_encode %{
21077 int vlen_enc = vector_length_encoding(this);
21078 BasicType bt = Matcher::vector_element_basic_type(this);
21079 int opc = this->ideal_Opcode();
21080 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21081 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21082 %}
21083 ins_pipe( pipe_slow );
21084 %}
21085
21086 // --------------------------------- Signum/CopySign ---------------------------
21087
21088 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21089 match(Set dst (SignumF dst (Binary zero one)));
21090 effect(KILL cr);
21091 format %{ "signumF $dst, $dst" %}
21092 ins_encode %{
21093 int opcode = this->ideal_Opcode();
21094 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21095 %}
21096 ins_pipe( pipe_slow );
21097 %}
21098
21099 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21100 match(Set dst (SignumD dst (Binary zero one)));
21101 effect(KILL cr);
21102 format %{ "signumD $dst, $dst" %}
21103 ins_encode %{
21104 int opcode = this->ideal_Opcode();
21105 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21106 %}
21107 ins_pipe( pipe_slow );
21108 %}
21109
21110 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21111 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21112 match(Set dst (SignumVF src (Binary zero one)));
21113 match(Set dst (SignumVD src (Binary zero one)));
21114 effect(TEMP dst, TEMP xtmp1);
21115 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21116 ins_encode %{
21117 int opcode = this->ideal_Opcode();
21118 int vec_enc = vector_length_encoding(this);
21119 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21120 $xtmp1$$XMMRegister, vec_enc);
21121 %}
21122 ins_pipe( pipe_slow );
21123 %}
21124
21125 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21126 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21127 match(Set dst (SignumVF src (Binary zero one)));
21128 match(Set dst (SignumVD src (Binary zero one)));
21129 effect(TEMP dst, TEMP ktmp1);
21130 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21131 ins_encode %{
21132 int opcode = this->ideal_Opcode();
21133 int vec_enc = vector_length_encoding(this);
21134 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21135 $ktmp1$$KRegister, vec_enc);
21136 %}
21137 ins_pipe( pipe_slow );
21138 %}
21139
21140 // ---------------------------------------
21141 // For copySign use 0xE4 as writemask for vpternlog
21142 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21143 // C (xmm2) is set to 0x7FFFFFFF
21144 // Wherever xmm2 is 0, we want to pick from B (sign)
21145 // Wherever xmm2 is 1, we want to pick from A (src)
21146 //
21147 // A B C Result
21148 // 0 0 0 0
21149 // 0 0 1 0
21150 // 0 1 0 1
21151 // 0 1 1 0
21152 // 1 0 0 0
21153 // 1 0 1 1
21154 // 1 1 0 1
21155 // 1 1 1 1
21156 //
21157 // Result going from high bit to low bit is 0x11100100 = 0xe4
21158 // ---------------------------------------
21159
21160 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21161 match(Set dst (CopySignF dst src));
21162 effect(TEMP tmp1, TEMP tmp2);
21163 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21164 ins_encode %{
21165 __ movl($tmp2$$Register, 0x7FFFFFFF);
21166 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21167 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21168 %}
21169 ins_pipe( pipe_slow );
21170 %}
21171
21172 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21173 match(Set dst (CopySignD dst (Binary src zero)));
21174 ins_cost(100);
21175 effect(TEMP tmp1, TEMP tmp2);
21176 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21177 ins_encode %{
21178 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21179 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21180 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21181 %}
21182 ins_pipe( pipe_slow );
21183 %}
21184
21185 //----------------------------- CompressBits/ExpandBits ------------------------
21186
21187 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21188 predicate(n->bottom_type()->isa_int());
21189 match(Set dst (CompressBits src mask));
21190 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21191 ins_encode %{
21192 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21193 %}
21194 ins_pipe( pipe_slow );
21195 %}
21196
21197 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21198 predicate(n->bottom_type()->isa_int());
21199 match(Set dst (ExpandBits src mask));
21200 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21201 ins_encode %{
21202 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21203 %}
21204 ins_pipe( pipe_slow );
21205 %}
21206
21207 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21208 predicate(n->bottom_type()->isa_int());
21209 match(Set dst (CompressBits src (LoadI mask)));
21210 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21211 ins_encode %{
21212 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21213 %}
21214 ins_pipe( pipe_slow );
21215 %}
21216
21217 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21218 predicate(n->bottom_type()->isa_int());
21219 match(Set dst (ExpandBits src (LoadI mask)));
21220 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21221 ins_encode %{
21222 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21223 %}
21224 ins_pipe( pipe_slow );
21225 %}
21226
21227 // --------------------------------- Sqrt --------------------------------------
21228
21229 instruct vsqrtF_reg(vec dst, vec src) %{
21230 match(Set dst (SqrtVF src));
21231 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21232 ins_encode %{
21233 assert(UseAVX > 0, "required");
21234 int vlen_enc = vector_length_encoding(this);
21235 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21236 %}
21237 ins_pipe( pipe_slow );
21238 %}
21239
21240 instruct vsqrtF_mem(vec dst, memory mem) %{
21241 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21242 match(Set dst (SqrtVF (LoadVector mem)));
21243 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21244 ins_encode %{
21245 assert(UseAVX > 0, "required");
21246 int vlen_enc = vector_length_encoding(this);
21247 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21248 %}
21249 ins_pipe( pipe_slow );
21250 %}
21251
21252 // Floating point vector sqrt
21253 instruct vsqrtD_reg(vec dst, vec src) %{
21254 match(Set dst (SqrtVD src));
21255 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21256 ins_encode %{
21257 assert(UseAVX > 0, "required");
21258 int vlen_enc = vector_length_encoding(this);
21259 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21260 %}
21261 ins_pipe( pipe_slow );
21262 %}
21263
21264 instruct vsqrtD_mem(vec dst, memory mem) %{
21265 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21266 match(Set dst (SqrtVD (LoadVector mem)));
21267 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21268 ins_encode %{
21269 assert(UseAVX > 0, "required");
21270 int vlen_enc = vector_length_encoding(this);
21271 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21272 %}
21273 ins_pipe( pipe_slow );
21274 %}
21275
21276 // ------------------------------ Shift ---------------------------------------
21277
21278 // Left and right shift count vectors are the same on x86
21279 // (only lowest bits of xmm reg are used for count).
21280 instruct vshiftcnt(vec dst, rRegI cnt) %{
21281 match(Set dst (LShiftCntV cnt));
21282 match(Set dst (RShiftCntV cnt));
21283 format %{ "movdl $dst,$cnt\t! load shift count" %}
21284 ins_encode %{
21285 __ movdl($dst$$XMMRegister, $cnt$$Register);
21286 %}
21287 ins_pipe( pipe_slow );
21288 %}
21289
21290 // Byte vector shift
21291 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21292 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21293 match(Set dst ( LShiftVB src shift));
21294 match(Set dst ( RShiftVB src shift));
21295 match(Set dst (URShiftVB src shift));
21296 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21297 format %{"vector_byte_shift $dst,$src,$shift" %}
21298 ins_encode %{
21299 assert(UseSSE > 3, "required");
21300 int opcode = this->ideal_Opcode();
21301 bool sign = (opcode != Op_URShiftVB);
21302 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21303 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21304 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21305 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21306 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21307 %}
21308 ins_pipe( pipe_slow );
21309 %}
21310
21311 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21312 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21313 UseAVX <= 1);
21314 match(Set dst ( LShiftVB src shift));
21315 match(Set dst ( RShiftVB src shift));
21316 match(Set dst (URShiftVB src shift));
21317 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21318 format %{"vector_byte_shift $dst,$src,$shift" %}
21319 ins_encode %{
21320 assert(UseSSE > 3, "required");
21321 int opcode = this->ideal_Opcode();
21322 bool sign = (opcode != Op_URShiftVB);
21323 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21324 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21325 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21326 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21327 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21328 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21329 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21330 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21331 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21332 %}
21333 ins_pipe( pipe_slow );
21334 %}
21335
21336 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21337 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21338 UseAVX > 1);
21339 match(Set dst ( LShiftVB src shift));
21340 match(Set dst ( RShiftVB src shift));
21341 match(Set dst (URShiftVB src shift));
21342 effect(TEMP dst, TEMP tmp);
21343 format %{"vector_byte_shift $dst,$src,$shift" %}
21344 ins_encode %{
21345 int opcode = this->ideal_Opcode();
21346 bool sign = (opcode != Op_URShiftVB);
21347 int vlen_enc = Assembler::AVX_256bit;
21348 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21349 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21350 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21351 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21352 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21353 %}
21354 ins_pipe( pipe_slow );
21355 %}
21356
21357 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21358 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21359 match(Set dst ( LShiftVB src shift));
21360 match(Set dst ( RShiftVB src shift));
21361 match(Set dst (URShiftVB src shift));
21362 effect(TEMP dst, TEMP tmp);
21363 format %{"vector_byte_shift $dst,$src,$shift" %}
21364 ins_encode %{
21365 assert(UseAVX > 1, "required");
21366 int opcode = this->ideal_Opcode();
21367 bool sign = (opcode != Op_URShiftVB);
21368 int vlen_enc = Assembler::AVX_256bit;
21369 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21370 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21371 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21372 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21373 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21374 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21375 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21376 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21377 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21378 %}
21379 ins_pipe( pipe_slow );
21380 %}
21381
21382 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21383 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21384 match(Set dst ( LShiftVB src shift));
21385 match(Set dst (RShiftVB src shift));
21386 match(Set dst (URShiftVB src shift));
21387 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21388 format %{"vector_byte_shift $dst,$src,$shift" %}
21389 ins_encode %{
21390 assert(UseAVX > 2, "required");
21391 int opcode = this->ideal_Opcode();
21392 bool sign = (opcode != Op_URShiftVB);
21393 int vlen_enc = Assembler::AVX_512bit;
21394 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21395 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21396 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21397 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21398 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21399 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21400 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21401 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21403 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21404 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21405 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21406 %}
21407 ins_pipe( pipe_slow );
21408 %}
21409
21410 // Shorts vector logical right shift produces incorrect Java result
21411 // for negative data because java code convert short value into int with
21412 // sign extension before a shift. But char vectors are fine since chars are
21413 // unsigned values.
21414 // Shorts/Chars vector left shift
21415 instruct vshiftS(vec dst, vec src, vec shift) %{
21416 predicate(!n->as_ShiftV()->is_var_shift());
21417 match(Set dst ( LShiftVS src shift));
21418 match(Set dst ( RShiftVS src shift));
21419 match(Set dst (URShiftVS src shift));
21420 effect(TEMP dst, USE src, USE shift);
21421 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21422 ins_encode %{
21423 int opcode = this->ideal_Opcode();
21424 if (UseAVX > 0) {
21425 int vlen_enc = vector_length_encoding(this);
21426 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21427 } else {
21428 int vlen = Matcher::vector_length(this);
21429 if (vlen == 2) {
21430 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21431 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432 } else if (vlen == 4) {
21433 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21434 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435 } else {
21436 assert (vlen == 8, "sanity");
21437 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21438 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21439 }
21440 }
21441 %}
21442 ins_pipe( pipe_slow );
21443 %}
21444
21445 // Integers vector left shift
21446 instruct vshiftI(vec dst, vec src, vec shift) %{
21447 predicate(!n->as_ShiftV()->is_var_shift());
21448 match(Set dst ( LShiftVI src shift));
21449 match(Set dst ( RShiftVI src shift));
21450 match(Set dst (URShiftVI src shift));
21451 effect(TEMP dst, USE src, USE shift);
21452 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21453 ins_encode %{
21454 int opcode = this->ideal_Opcode();
21455 if (UseAVX > 0) {
21456 int vlen_enc = vector_length_encoding(this);
21457 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21458 } else {
21459 int vlen = Matcher::vector_length(this);
21460 if (vlen == 2) {
21461 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21462 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21463 } else {
21464 assert(vlen == 4, "sanity");
21465 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21466 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21467 }
21468 }
21469 %}
21470 ins_pipe( pipe_slow );
21471 %}
21472
21473 // Integers vector left constant shift
21474 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21475 match(Set dst (LShiftVI src (LShiftCntV shift)));
21476 match(Set dst (RShiftVI src (RShiftCntV shift)));
21477 match(Set dst (URShiftVI src (RShiftCntV shift)));
21478 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21479 ins_encode %{
21480 int opcode = this->ideal_Opcode();
21481 if (UseAVX > 0) {
21482 int vector_len = vector_length_encoding(this);
21483 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21484 } else {
21485 int vlen = Matcher::vector_length(this);
21486 if (vlen == 2) {
21487 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21488 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21489 } else {
21490 assert(vlen == 4, "sanity");
21491 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21492 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21493 }
21494 }
21495 %}
21496 ins_pipe( pipe_slow );
21497 %}
21498
21499 // Longs vector shift
21500 instruct vshiftL(vec dst, vec src, vec shift) %{
21501 predicate(!n->as_ShiftV()->is_var_shift());
21502 match(Set dst ( LShiftVL src shift));
21503 match(Set dst (URShiftVL src shift));
21504 effect(TEMP dst, USE src, USE shift);
21505 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21506 ins_encode %{
21507 int opcode = this->ideal_Opcode();
21508 if (UseAVX > 0) {
21509 int vlen_enc = vector_length_encoding(this);
21510 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21511 } else {
21512 assert(Matcher::vector_length(this) == 2, "");
21513 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21514 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21515 }
21516 %}
21517 ins_pipe( pipe_slow );
21518 %}
21519
21520 // Longs vector constant shift
21521 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21522 match(Set dst (LShiftVL src (LShiftCntV shift)));
21523 match(Set dst (URShiftVL src (RShiftCntV shift)));
21524 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21525 ins_encode %{
21526 int opcode = this->ideal_Opcode();
21527 if (UseAVX > 0) {
21528 int vector_len = vector_length_encoding(this);
21529 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21530 } else {
21531 assert(Matcher::vector_length(this) == 2, "");
21532 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21533 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21534 }
21535 %}
21536 ins_pipe( pipe_slow );
21537 %}
21538
21539 // -------------------ArithmeticRightShift -----------------------------------
21540 // Long vector arithmetic right shift
21541 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21542 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21543 match(Set dst (RShiftVL src shift));
21544 effect(TEMP dst, TEMP tmp);
21545 format %{ "vshiftq $dst,$src,$shift" %}
21546 ins_encode %{
21547 uint vlen = Matcher::vector_length(this);
21548 if (vlen == 2) {
21549 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21550 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21551 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21552 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21553 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21554 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21555 } else {
21556 assert(vlen == 4, "sanity");
21557 assert(UseAVX > 1, "required");
21558 int vlen_enc = Assembler::AVX_256bit;
21559 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21560 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21561 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21563 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21564 }
21565 %}
21566 ins_pipe( pipe_slow );
21567 %}
21568
21569 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21570 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21571 match(Set dst (RShiftVL src shift));
21572 format %{ "vshiftq $dst,$src,$shift" %}
21573 ins_encode %{
21574 int vlen_enc = vector_length_encoding(this);
21575 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21576 %}
21577 ins_pipe( pipe_slow );
21578 %}
21579
21580 // ------------------- Variable Shift -----------------------------
21581 // Byte variable shift
21582 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21583 predicate(Matcher::vector_length(n) <= 8 &&
21584 n->as_ShiftV()->is_var_shift() &&
21585 !VM_Version::supports_avx512bw());
21586 match(Set dst ( LShiftVB src shift));
21587 match(Set dst ( RShiftVB src shift));
21588 match(Set dst (URShiftVB src shift));
21589 effect(TEMP dst, TEMP vtmp);
21590 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21591 ins_encode %{
21592 assert(UseAVX >= 2, "required");
21593
21594 int opcode = this->ideal_Opcode();
21595 int vlen_enc = Assembler::AVX_128bit;
21596 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21597 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21598 %}
21599 ins_pipe( pipe_slow );
21600 %}
21601
21602 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21603 predicate(Matcher::vector_length(n) == 16 &&
21604 n->as_ShiftV()->is_var_shift() &&
21605 !VM_Version::supports_avx512bw());
21606 match(Set dst ( LShiftVB src shift));
21607 match(Set dst ( RShiftVB src shift));
21608 match(Set dst (URShiftVB src shift));
21609 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21610 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21611 ins_encode %{
21612 assert(UseAVX >= 2, "required");
21613
21614 int opcode = this->ideal_Opcode();
21615 int vlen_enc = Assembler::AVX_128bit;
21616 // Shift lower half and get word result in dst
21617 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21618
21619 // Shift upper half and get word result in vtmp1
21620 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21621 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21622 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21623
21624 // Merge and down convert the two word results to byte in dst
21625 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21626 %}
21627 ins_pipe( pipe_slow );
21628 %}
21629
21630 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21631 predicate(Matcher::vector_length(n) == 32 &&
21632 n->as_ShiftV()->is_var_shift() &&
21633 !VM_Version::supports_avx512bw());
21634 match(Set dst ( LShiftVB src shift));
21635 match(Set dst ( RShiftVB src shift));
21636 match(Set dst (URShiftVB src shift));
21637 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21638 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21639 ins_encode %{
21640 assert(UseAVX >= 2, "required");
21641
21642 int opcode = this->ideal_Opcode();
21643 int vlen_enc = Assembler::AVX_128bit;
21644 // Process lower 128 bits and get result in dst
21645 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21646 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21647 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21648 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21649 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21650
21651 // Process higher 128 bits and get result in vtmp3
21652 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21653 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21654 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21655 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21656 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21657 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21658 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21659
21660 // Merge the two results in dst
21661 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21662 %}
21663 ins_pipe( pipe_slow );
21664 %}
21665
21666 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21667 predicate(Matcher::vector_length(n) <= 32 &&
21668 n->as_ShiftV()->is_var_shift() &&
21669 VM_Version::supports_avx512bw());
21670 match(Set dst ( LShiftVB src shift));
21671 match(Set dst ( RShiftVB src shift));
21672 match(Set dst (URShiftVB src shift));
21673 effect(TEMP dst, TEMP vtmp);
21674 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21675 ins_encode %{
21676 assert(UseAVX > 2, "required");
21677
21678 int opcode = this->ideal_Opcode();
21679 int vlen_enc = vector_length_encoding(this);
21680 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21681 %}
21682 ins_pipe( pipe_slow );
21683 %}
21684
21685 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21686 predicate(Matcher::vector_length(n) == 64 &&
21687 n->as_ShiftV()->is_var_shift() &&
21688 VM_Version::supports_avx512bw());
21689 match(Set dst ( LShiftVB src shift));
21690 match(Set dst ( RShiftVB src shift));
21691 match(Set dst (URShiftVB src shift));
21692 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21693 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21694 ins_encode %{
21695 assert(UseAVX > 2, "required");
21696
21697 int opcode = this->ideal_Opcode();
21698 int vlen_enc = Assembler::AVX_256bit;
21699 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21700 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21701 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21702 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21703 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21704 %}
21705 ins_pipe( pipe_slow );
21706 %}
21707
21708 // Short variable shift
21709 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21710 predicate(Matcher::vector_length(n) <= 8 &&
21711 n->as_ShiftV()->is_var_shift() &&
21712 !VM_Version::supports_avx512bw());
21713 match(Set dst ( LShiftVS src shift));
21714 match(Set dst ( RShiftVS src shift));
21715 match(Set dst (URShiftVS src shift));
21716 effect(TEMP dst, TEMP vtmp);
21717 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21718 ins_encode %{
21719 assert(UseAVX >= 2, "required");
21720
21721 int opcode = this->ideal_Opcode();
21722 bool sign = (opcode != Op_URShiftVS);
21723 int vlen_enc = Assembler::AVX_256bit;
21724 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21725 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21726 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21727 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21728 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21729 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21730 %}
21731 ins_pipe( pipe_slow );
21732 %}
21733
21734 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21735 predicate(Matcher::vector_length(n) == 16 &&
21736 n->as_ShiftV()->is_var_shift() &&
21737 !VM_Version::supports_avx512bw());
21738 match(Set dst ( LShiftVS src shift));
21739 match(Set dst ( RShiftVS src shift));
21740 match(Set dst (URShiftVS src shift));
21741 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21742 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21743 ins_encode %{
21744 assert(UseAVX >= 2, "required");
21745
21746 int opcode = this->ideal_Opcode();
21747 bool sign = (opcode != Op_URShiftVS);
21748 int vlen_enc = Assembler::AVX_256bit;
21749 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21750 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21751 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21752 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21754
21755 // Shift upper half, with result in dst using vtmp1 as TEMP
21756 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21757 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21758 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21759 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21760 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21761 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21762
21763 // Merge lower and upper half result into dst
21764 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21765 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21766 %}
21767 ins_pipe( pipe_slow );
21768 %}
21769
21770 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21771 predicate(n->as_ShiftV()->is_var_shift() &&
21772 VM_Version::supports_avx512bw());
21773 match(Set dst ( LShiftVS src shift));
21774 match(Set dst ( RShiftVS src shift));
21775 match(Set dst (URShiftVS src shift));
21776 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21777 ins_encode %{
21778 assert(UseAVX > 2, "required");
21779
21780 int opcode = this->ideal_Opcode();
21781 int vlen_enc = vector_length_encoding(this);
21782 if (!VM_Version::supports_avx512vl()) {
21783 vlen_enc = Assembler::AVX_512bit;
21784 }
21785 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21786 %}
21787 ins_pipe( pipe_slow );
21788 %}
21789
21790 //Integer variable shift
21791 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21792 predicate(n->as_ShiftV()->is_var_shift());
21793 match(Set dst ( LShiftVI src shift));
21794 match(Set dst ( RShiftVI src shift));
21795 match(Set dst (URShiftVI src shift));
21796 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21797 ins_encode %{
21798 assert(UseAVX >= 2, "required");
21799
21800 int opcode = this->ideal_Opcode();
21801 int vlen_enc = vector_length_encoding(this);
21802 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21803 %}
21804 ins_pipe( pipe_slow );
21805 %}
21806
21807 //Long variable shift
21808 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21809 predicate(n->as_ShiftV()->is_var_shift());
21810 match(Set dst ( LShiftVL src shift));
21811 match(Set dst (URShiftVL src shift));
21812 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21813 ins_encode %{
21814 assert(UseAVX >= 2, "required");
21815
21816 int opcode = this->ideal_Opcode();
21817 int vlen_enc = vector_length_encoding(this);
21818 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21819 %}
21820 ins_pipe( pipe_slow );
21821 %}
21822
21823 //Long variable right shift arithmetic
21824 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21825 predicate(Matcher::vector_length(n) <= 4 &&
21826 n->as_ShiftV()->is_var_shift() &&
21827 UseAVX == 2);
21828 match(Set dst (RShiftVL src shift));
21829 effect(TEMP dst, TEMP vtmp);
21830 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21831 ins_encode %{
21832 int opcode = this->ideal_Opcode();
21833 int vlen_enc = vector_length_encoding(this);
21834 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21835 $vtmp$$XMMRegister);
21836 %}
21837 ins_pipe( pipe_slow );
21838 %}
21839
21840 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21841 predicate(n->as_ShiftV()->is_var_shift() &&
21842 UseAVX > 2);
21843 match(Set dst (RShiftVL src shift));
21844 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21845 ins_encode %{
21846 int opcode = this->ideal_Opcode();
21847 int vlen_enc = vector_length_encoding(this);
21848 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21849 %}
21850 ins_pipe( pipe_slow );
21851 %}
21852
21853 // --------------------------------- AND --------------------------------------
21854
21855 instruct vand(vec dst, vec src) %{
21856 predicate(UseAVX == 0);
21857 match(Set dst (AndV dst src));
21858 format %{ "pand $dst,$src\t! and vectors" %}
21859 ins_encode %{
21860 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21861 %}
21862 ins_pipe( pipe_slow );
21863 %}
21864
21865 instruct vand_reg(vec dst, vec src1, vec src2) %{
21866 predicate(UseAVX > 0);
21867 match(Set dst (AndV src1 src2));
21868 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21869 ins_encode %{
21870 int vlen_enc = vector_length_encoding(this);
21871 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21872 %}
21873 ins_pipe( pipe_slow );
21874 %}
21875
21876 instruct vand_mem(vec dst, vec src, memory mem) %{
21877 predicate((UseAVX > 0) &&
21878 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21879 match(Set dst (AndV src (LoadVector mem)));
21880 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21881 ins_encode %{
21882 int vlen_enc = vector_length_encoding(this);
21883 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21884 %}
21885 ins_pipe( pipe_slow );
21886 %}
21887
21888 // --------------------------------- OR ---------------------------------------
21889
21890 instruct vor(vec dst, vec src) %{
21891 predicate(UseAVX == 0);
21892 match(Set dst (OrV dst src));
21893 format %{ "por $dst,$src\t! or vectors" %}
21894 ins_encode %{
21895 __ por($dst$$XMMRegister, $src$$XMMRegister);
21896 %}
21897 ins_pipe( pipe_slow );
21898 %}
21899
21900 instruct vor_reg(vec dst, vec src1, vec src2) %{
21901 predicate(UseAVX > 0);
21902 match(Set dst (OrV src1 src2));
21903 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21904 ins_encode %{
21905 int vlen_enc = vector_length_encoding(this);
21906 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21907 %}
21908 ins_pipe( pipe_slow );
21909 %}
21910
21911 instruct vor_mem(vec dst, vec src, memory mem) %{
21912 predicate((UseAVX > 0) &&
21913 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21914 match(Set dst (OrV src (LoadVector mem)));
21915 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21916 ins_encode %{
21917 int vlen_enc = vector_length_encoding(this);
21918 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21919 %}
21920 ins_pipe( pipe_slow );
21921 %}
21922
21923 // --------------------------------- XOR --------------------------------------
21924
21925 instruct vxor(vec dst, vec src) %{
21926 predicate(UseAVX == 0);
21927 match(Set dst (XorV dst src));
21928 format %{ "pxor $dst,$src\t! xor vectors" %}
21929 ins_encode %{
21930 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21931 %}
21932 ins_pipe( pipe_slow );
21933 %}
21934
21935 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21936 predicate(UseAVX > 0);
21937 match(Set dst (XorV src1 src2));
21938 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21939 ins_encode %{
21940 int vlen_enc = vector_length_encoding(this);
21941 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21942 %}
21943 ins_pipe( pipe_slow );
21944 %}
21945
21946 instruct vxor_mem(vec dst, vec src, memory mem) %{
21947 predicate((UseAVX > 0) &&
21948 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21949 match(Set dst (XorV src (LoadVector mem)));
21950 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21951 ins_encode %{
21952 int vlen_enc = vector_length_encoding(this);
21953 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21954 %}
21955 ins_pipe( pipe_slow );
21956 %}
21957
21958 // --------------------------------- VectorCast --------------------------------------
21959
21960 instruct vcastBtoX(vec dst, vec src) %{
21961 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21962 match(Set dst (VectorCastB2X src));
21963 format %{ "vector_cast_b2x $dst,$src\t!" %}
21964 ins_encode %{
21965 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21966 int vlen_enc = vector_length_encoding(this);
21967 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21968 %}
21969 ins_pipe( pipe_slow );
21970 %}
21971
21972 instruct vcastBtoD(legVec dst, legVec src) %{
21973 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21974 match(Set dst (VectorCastB2X src));
21975 format %{ "vector_cast_b2x $dst,$src\t!" %}
21976 ins_encode %{
21977 int vlen_enc = vector_length_encoding(this);
21978 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21979 %}
21980 ins_pipe( pipe_slow );
21981 %}
21982
21983 instruct castStoX(vec dst, vec src) %{
21984 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21985 Matcher::vector_length(n->in(1)) <= 8 && // src
21986 Matcher::vector_element_basic_type(n) == T_BYTE);
21987 match(Set dst (VectorCastS2X src));
21988 format %{ "vector_cast_s2x $dst,$src" %}
21989 ins_encode %{
21990 assert(UseAVX > 0, "required");
21991
21992 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21993 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21994 %}
21995 ins_pipe( pipe_slow );
21996 %}
21997
21998 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21999 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22000 Matcher::vector_length(n->in(1)) == 16 && // src
22001 Matcher::vector_element_basic_type(n) == T_BYTE);
22002 effect(TEMP dst, TEMP vtmp);
22003 match(Set dst (VectorCastS2X src));
22004 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22005 ins_encode %{
22006 assert(UseAVX > 0, "required");
22007
22008 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22009 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22010 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22011 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22012 %}
22013 ins_pipe( pipe_slow );
22014 %}
22015
22016 instruct vcastStoX_evex(vec dst, vec src) %{
22017 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22018 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22019 match(Set dst (VectorCastS2X src));
22020 format %{ "vector_cast_s2x $dst,$src\t!" %}
22021 ins_encode %{
22022 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22023 int src_vlen_enc = vector_length_encoding(this, $src);
22024 int vlen_enc = vector_length_encoding(this);
22025 switch (to_elem_bt) {
22026 case T_BYTE:
22027 if (!VM_Version::supports_avx512vl()) {
22028 vlen_enc = Assembler::AVX_512bit;
22029 }
22030 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22031 break;
22032 case T_INT:
22033 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22034 break;
22035 case T_FLOAT:
22036 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22038 break;
22039 case T_LONG:
22040 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22041 break;
22042 case T_DOUBLE: {
22043 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22044 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22045 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22046 break;
22047 }
22048 default:
22049 ShouldNotReachHere();
22050 }
22051 %}
22052 ins_pipe( pipe_slow );
22053 %}
22054
22055 instruct castItoX(vec dst, vec src) %{
22056 predicate(UseAVX <= 2 &&
22057 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22058 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22059 match(Set dst (VectorCastI2X src));
22060 format %{ "vector_cast_i2x $dst,$src" %}
22061 ins_encode %{
22062 assert(UseAVX > 0, "required");
22063
22064 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22065 int vlen_enc = vector_length_encoding(this, $src);
22066
22067 if (to_elem_bt == T_BYTE) {
22068 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22069 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22070 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071 } else {
22072 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22073 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22074 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22075 }
22076 %}
22077 ins_pipe( pipe_slow );
22078 %}
22079
22080 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22081 predicate(UseAVX <= 2 &&
22082 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22083 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22084 match(Set dst (VectorCastI2X src));
22085 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22086 effect(TEMP dst, TEMP vtmp);
22087 ins_encode %{
22088 assert(UseAVX > 0, "required");
22089
22090 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22091 int vlen_enc = vector_length_encoding(this, $src);
22092
22093 if (to_elem_bt == T_BYTE) {
22094 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22095 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22096 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22097 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22098 } else {
22099 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22100 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22101 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22102 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22103 }
22104 %}
22105 ins_pipe( pipe_slow );
22106 %}
22107
22108 instruct vcastItoX_evex(vec dst, vec src) %{
22109 predicate(UseAVX > 2 ||
22110 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22111 match(Set dst (VectorCastI2X src));
22112 format %{ "vector_cast_i2x $dst,$src\t!" %}
22113 ins_encode %{
22114 assert(UseAVX > 0, "required");
22115
22116 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22117 int src_vlen_enc = vector_length_encoding(this, $src);
22118 int dst_vlen_enc = vector_length_encoding(this);
22119 switch (dst_elem_bt) {
22120 case T_BYTE:
22121 if (!VM_Version::supports_avx512vl()) {
22122 src_vlen_enc = Assembler::AVX_512bit;
22123 }
22124 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22125 break;
22126 case T_SHORT:
22127 if (!VM_Version::supports_avx512vl()) {
22128 src_vlen_enc = Assembler::AVX_512bit;
22129 }
22130 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22131 break;
22132 case T_FLOAT:
22133 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22134 break;
22135 case T_LONG:
22136 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22137 break;
22138 case T_DOUBLE:
22139 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22140 break;
22141 default:
22142 ShouldNotReachHere();
22143 }
22144 %}
22145 ins_pipe( pipe_slow );
22146 %}
22147
22148 instruct vcastLtoBS(vec dst, vec src) %{
22149 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22150 UseAVX <= 2);
22151 match(Set dst (VectorCastL2X src));
22152 format %{ "vector_cast_l2x $dst,$src" %}
22153 ins_encode %{
22154 assert(UseAVX > 0, "required");
22155
22156 int vlen = Matcher::vector_length_in_bytes(this, $src);
22157 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22158 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22159 : ExternalAddress(vector_int_to_short_mask());
22160 if (vlen <= 16) {
22161 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22162 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22163 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22164 } else {
22165 assert(vlen <= 32, "required");
22166 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22167 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22168 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22169 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22170 }
22171 if (to_elem_bt == T_BYTE) {
22172 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22173 }
22174 %}
22175 ins_pipe( pipe_slow );
22176 %}
22177
22178 instruct vcastLtoX_evex(vec dst, vec src) %{
22179 predicate(UseAVX > 2 ||
22180 (Matcher::vector_element_basic_type(n) == T_INT ||
22181 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22182 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22183 match(Set dst (VectorCastL2X src));
22184 format %{ "vector_cast_l2x $dst,$src\t!" %}
22185 ins_encode %{
22186 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22187 int vlen = Matcher::vector_length_in_bytes(this, $src);
22188 int vlen_enc = vector_length_encoding(this, $src);
22189 switch (to_elem_bt) {
22190 case T_BYTE:
22191 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22192 vlen_enc = Assembler::AVX_512bit;
22193 }
22194 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22195 break;
22196 case T_SHORT:
22197 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22198 vlen_enc = Assembler::AVX_512bit;
22199 }
22200 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22201 break;
22202 case T_INT:
22203 if (vlen == 8) {
22204 if ($dst$$XMMRegister != $src$$XMMRegister) {
22205 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22206 }
22207 } else if (vlen == 16) {
22208 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22209 } else if (vlen == 32) {
22210 if (UseAVX > 2) {
22211 if (!VM_Version::supports_avx512vl()) {
22212 vlen_enc = Assembler::AVX_512bit;
22213 }
22214 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22215 } else {
22216 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22217 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22218 }
22219 } else { // vlen == 64
22220 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22221 }
22222 break;
22223 case T_FLOAT:
22224 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22225 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22226 break;
22227 case T_DOUBLE:
22228 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22229 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22230 break;
22231
22232 default: assert(false, "%s", type2name(to_elem_bt));
22233 }
22234 %}
22235 ins_pipe( pipe_slow );
22236 %}
22237
22238 instruct vcastFtoD_reg(vec dst, vec src) %{
22239 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22240 match(Set dst (VectorCastF2X src));
22241 format %{ "vector_cast_f2d $dst,$src\t!" %}
22242 ins_encode %{
22243 int vlen_enc = vector_length_encoding(this);
22244 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22245 %}
22246 ins_pipe( pipe_slow );
22247 %}
22248
22249
22250 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22251 predicate(!VM_Version::supports_avx10_2() &&
22252 !VM_Version::supports_avx512vl() &&
22253 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22254 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22255 is_integral_type(Matcher::vector_element_basic_type(n)));
22256 match(Set dst (VectorCastF2X src));
22257 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22258 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22259 ins_encode %{
22260 int vlen_enc = vector_length_encoding(this, $src);
22261 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22262 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22263 // 32 bit addresses for register indirect addressing mode since stub constants
22264 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22265 // However, targets are free to increase this limit, but having a large code cache size
22266 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22267 // cap we save a temporary register allocation which in limiting case can prevent
22268 // spilling in high register pressure blocks.
22269 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22270 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22271 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22272 %}
22273 ins_pipe( pipe_slow );
22274 %}
22275
22276 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22277 predicate(!VM_Version::supports_avx10_2() &&
22278 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22279 is_integral_type(Matcher::vector_element_basic_type(n)));
22280 match(Set dst (VectorCastF2X src));
22281 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22282 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22283 ins_encode %{
22284 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22285 if (to_elem_bt == T_LONG) {
22286 int vlen_enc = vector_length_encoding(this);
22287 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22288 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22289 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22290 } else {
22291 int vlen_enc = vector_length_encoding(this, $src);
22292 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22293 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22294 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22295 }
22296 %}
22297 ins_pipe( pipe_slow );
22298 %}
22299
22300 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22301 predicate(VM_Version::supports_avx10_2() &&
22302 is_integral_type(Matcher::vector_element_basic_type(n)));
22303 match(Set dst (VectorCastF2X src));
22304 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22305 ins_encode %{
22306 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22307 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22308 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22309 %}
22310 ins_pipe( pipe_slow );
22311 %}
22312
22313 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22314 predicate(VM_Version::supports_avx10_2() &&
22315 is_integral_type(Matcher::vector_element_basic_type(n)));
22316 match(Set dst (VectorCastF2X (LoadVector src)));
22317 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22318 ins_encode %{
22319 int vlen = Matcher::vector_length(this);
22320 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22321 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22322 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22323 %}
22324 ins_pipe( pipe_slow );
22325 %}
22326
22327 instruct vcastDtoF_reg(vec dst, vec src) %{
22328 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22329 match(Set dst (VectorCastD2X src));
22330 format %{ "vector_cast_d2x $dst,$src\t!" %}
22331 ins_encode %{
22332 int vlen_enc = vector_length_encoding(this, $src);
22333 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22334 %}
22335 ins_pipe( pipe_slow );
22336 %}
22337
22338 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22339 predicate(!VM_Version::supports_avx10_2() &&
22340 !VM_Version::supports_avx512vl() &&
22341 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22342 is_integral_type(Matcher::vector_element_basic_type(n)));
22343 match(Set dst (VectorCastD2X src));
22344 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22345 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22346 ins_encode %{
22347 int vlen_enc = vector_length_encoding(this, $src);
22348 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22349 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22350 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22351 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22352 %}
22353 ins_pipe( pipe_slow );
22354 %}
22355
22356 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22357 predicate(!VM_Version::supports_avx10_2() &&
22358 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22359 is_integral_type(Matcher::vector_element_basic_type(n)));
22360 match(Set dst (VectorCastD2X src));
22361 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22362 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22363 ins_encode %{
22364 int vlen_enc = vector_length_encoding(this, $src);
22365 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22366 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22367 ExternalAddress(vector_float_signflip());
22368 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22369 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22370 %}
22371 ins_pipe( pipe_slow );
22372 %}
22373
22374 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22375 predicate(VM_Version::supports_avx10_2() &&
22376 is_integral_type(Matcher::vector_element_basic_type(n)));
22377 match(Set dst (VectorCastD2X src));
22378 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22379 ins_encode %{
22380 int vlen_enc = vector_length_encoding(this, $src);
22381 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22382 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22383 %}
22384 ins_pipe( pipe_slow );
22385 %}
22386
22387 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22388 predicate(VM_Version::supports_avx10_2() &&
22389 is_integral_type(Matcher::vector_element_basic_type(n)));
22390 match(Set dst (VectorCastD2X (LoadVector src)));
22391 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22392 ins_encode %{
22393 int vlen = Matcher::vector_length(this);
22394 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22395 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22396 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22397 %}
22398 ins_pipe( pipe_slow );
22399 %}
22400
22401 instruct vucast(vec dst, vec src) %{
22402 match(Set dst (VectorUCastB2X src));
22403 match(Set dst (VectorUCastS2X src));
22404 match(Set dst (VectorUCastI2X src));
22405 format %{ "vector_ucast $dst,$src\t!" %}
22406 ins_encode %{
22407 assert(UseAVX > 0, "required");
22408
22409 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22410 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22411 int vlen_enc = vector_length_encoding(this);
22412 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22413 %}
22414 ins_pipe( pipe_slow );
22415 %}
22416
22417 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22418 predicate(!VM_Version::supports_avx512vl() &&
22419 Matcher::vector_length_in_bytes(n) < 64 &&
22420 Matcher::vector_element_basic_type(n) == T_INT);
22421 match(Set dst (RoundVF src));
22422 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22423 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22424 ins_encode %{
22425 int vlen_enc = vector_length_encoding(this);
22426 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22427 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22428 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22429 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22430 %}
22431 ins_pipe( pipe_slow );
22432 %}
22433
22434 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22435 predicate((VM_Version::supports_avx512vl() ||
22436 Matcher::vector_length_in_bytes(n) == 64) &&
22437 Matcher::vector_element_basic_type(n) == T_INT);
22438 match(Set dst (RoundVF src));
22439 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22440 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22441 ins_encode %{
22442 int vlen_enc = vector_length_encoding(this);
22443 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22444 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22445 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22446 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22447 %}
22448 ins_pipe( pipe_slow );
22449 %}
22450
22451 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22452 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22453 match(Set dst (RoundVD src));
22454 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22455 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22456 ins_encode %{
22457 int vlen_enc = vector_length_encoding(this);
22458 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22459 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22460 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22461 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22462 %}
22463 ins_pipe( pipe_slow );
22464 %}
22465
22466 // --------------------------------- VectorMaskCmp --------------------------------------
22467
22468 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22469 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22470 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22471 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22472 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22473 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22474 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22475 ins_encode %{
22476 int vlen_enc = vector_length_encoding(this, $src1);
22477 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22478 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22479 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22480 } else {
22481 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22482 }
22483 %}
22484 ins_pipe( pipe_slow );
22485 %}
22486
22487 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22488 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22489 n->bottom_type()->isa_vectmask() == nullptr &&
22490 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22491 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22492 effect(TEMP ktmp);
22493 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22494 ins_encode %{
22495 int vlen_enc = Assembler::AVX_512bit;
22496 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22497 KRegister mask = k0; // The comparison itself is not being masked.
22498 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22499 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22500 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22501 } else {
22502 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22503 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22504 }
22505 %}
22506 ins_pipe( pipe_slow );
22507 %}
22508
22509 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22510 predicate(n->bottom_type()->isa_vectmask() &&
22511 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22512 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22513 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22514 ins_encode %{
22515 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22516 int vlen_enc = vector_length_encoding(this, $src1);
22517 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22518 KRegister mask = k0; // The comparison itself is not being masked.
22519 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22520 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22521 } else {
22522 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22523 }
22524 %}
22525 ins_pipe( pipe_slow );
22526 %}
22527
22528 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22529 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22530 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22531 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22532 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22533 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22534 (n->in(2)->get_int() == BoolTest::eq ||
22535 n->in(2)->get_int() == BoolTest::lt ||
22536 n->in(2)->get_int() == BoolTest::gt)); // cond
22537 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22538 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22539 ins_encode %{
22540 int vlen_enc = vector_length_encoding(this, $src1);
22541 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22542 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22543 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22544 %}
22545 ins_pipe( pipe_slow );
22546 %}
22547
22548 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22549 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22550 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22551 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22552 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22553 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22554 (n->in(2)->get_int() == BoolTest::ne ||
22555 n->in(2)->get_int() == BoolTest::le ||
22556 n->in(2)->get_int() == BoolTest::ge)); // cond
22557 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22558 effect(TEMP dst, TEMP xtmp);
22559 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22560 ins_encode %{
22561 int vlen_enc = vector_length_encoding(this, $src1);
22562 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22563 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22564 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22565 %}
22566 ins_pipe( pipe_slow );
22567 %}
22568
22569 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22570 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22571 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22572 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22573 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22574 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22575 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22576 effect(TEMP dst, TEMP xtmp);
22577 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22578 ins_encode %{
22579 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22580 int vlen_enc = vector_length_encoding(this, $src1);
22581 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22582 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22583
22584 if (vlen_enc == Assembler::AVX_128bit) {
22585 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22586 } else {
22587 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22588 }
22589 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22590 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22591 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22592 %}
22593 ins_pipe( pipe_slow );
22594 %}
22595
22596 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22597 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22598 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22599 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22600 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22601 effect(TEMP ktmp);
22602 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22603 ins_encode %{
22604 assert(UseAVX > 2, "required");
22605
22606 int vlen_enc = vector_length_encoding(this, $src1);
22607 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22608 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22609 KRegister mask = k0; // The comparison itself is not being masked.
22610 bool merge = false;
22611 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22612
22613 switch (src1_elem_bt) {
22614 case T_INT: {
22615 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22616 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22617 break;
22618 }
22619 case T_LONG: {
22620 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22621 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22622 break;
22623 }
22624 default: assert(false, "%s", type2name(src1_elem_bt));
22625 }
22626 %}
22627 ins_pipe( pipe_slow );
22628 %}
22629
22630
22631 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22632 predicate(n->bottom_type()->isa_vectmask() &&
22633 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22634 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22635 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22636 ins_encode %{
22637 assert(UseAVX > 2, "required");
22638 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22639
22640 int vlen_enc = vector_length_encoding(this, $src1);
22641 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22642 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22643 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22644
22645 // Comparison i
22646 switch (src1_elem_bt) {
22647 case T_BYTE: {
22648 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649 break;
22650 }
22651 case T_SHORT: {
22652 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653 break;
22654 }
22655 case T_INT: {
22656 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22657 break;
22658 }
22659 case T_LONG: {
22660 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22661 break;
22662 }
22663 default: assert(false, "%s", type2name(src1_elem_bt));
22664 }
22665 %}
22666 ins_pipe( pipe_slow );
22667 %}
22668
22669 // Extract
22670
22671 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22672 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22673 match(Set dst (ExtractI src idx));
22674 match(Set dst (ExtractS src idx));
22675 match(Set dst (ExtractB src idx));
22676 format %{ "extractI $dst,$src,$idx\t!" %}
22677 ins_encode %{
22678 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22679
22680 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22681 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22682 %}
22683 ins_pipe( pipe_slow );
22684 %}
22685
22686 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22687 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22688 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22689 match(Set dst (ExtractI src idx));
22690 match(Set dst (ExtractS src idx));
22691 match(Set dst (ExtractB src idx));
22692 effect(TEMP vtmp);
22693 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22694 ins_encode %{
22695 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22696
22697 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22698 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22699 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22700 %}
22701 ins_pipe( pipe_slow );
22702 %}
22703
22704 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22705 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22706 match(Set dst (ExtractL src idx));
22707 format %{ "extractL $dst,$src,$idx\t!" %}
22708 ins_encode %{
22709 assert(UseSSE >= 4, "required");
22710 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22711
22712 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22713 %}
22714 ins_pipe( pipe_slow );
22715 %}
22716
22717 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22718 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22719 Matcher::vector_length(n->in(1)) == 8); // src
22720 match(Set dst (ExtractL src idx));
22721 effect(TEMP vtmp);
22722 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22723 ins_encode %{
22724 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22725
22726 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22727 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22728 %}
22729 ins_pipe( pipe_slow );
22730 %}
22731
22732 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22733 predicate(Matcher::vector_length(n->in(1)) <= 4);
22734 match(Set dst (ExtractF src idx));
22735 effect(TEMP dst, TEMP vtmp);
22736 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22737 ins_encode %{
22738 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22739
22740 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22741 %}
22742 ins_pipe( pipe_slow );
22743 %}
22744
22745 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22746 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22747 Matcher::vector_length(n->in(1)/*src*/) == 16);
22748 match(Set dst (ExtractF src idx));
22749 effect(TEMP vtmp);
22750 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22751 ins_encode %{
22752 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22753
22754 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22755 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22756 %}
22757 ins_pipe( pipe_slow );
22758 %}
22759
22760 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22761 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22762 match(Set dst (ExtractD src idx));
22763 format %{ "extractD $dst,$src,$idx\t!" %}
22764 ins_encode %{
22765 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22766
22767 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22768 %}
22769 ins_pipe( pipe_slow );
22770 %}
22771
22772 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22773 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22774 Matcher::vector_length(n->in(1)) == 8); // src
22775 match(Set dst (ExtractD src idx));
22776 effect(TEMP vtmp);
22777 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22778 ins_encode %{
22779 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22780
22781 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22782 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22783 %}
22784 ins_pipe( pipe_slow );
22785 %}
22786
22787 // --------------------------------- Vector Blend --------------------------------------
22788
22789 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22790 predicate(UseAVX == 0);
22791 match(Set dst (VectorBlend (Binary dst src) mask));
22792 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22793 effect(TEMP tmp);
22794 ins_encode %{
22795 assert(UseSSE >= 4, "required");
22796
22797 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22798 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22799 }
22800 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22801 %}
22802 ins_pipe( pipe_slow );
22803 %}
22804
22805 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22806 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22807 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22808 Matcher::vector_length_in_bytes(n) <= 32 &&
22809 is_integral_type(Matcher::vector_element_basic_type(n)));
22810 match(Set dst (VectorBlend (Binary src1 src2) mask));
22811 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22812 ins_encode %{
22813 int vlen_enc = vector_length_encoding(this);
22814 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22815 %}
22816 ins_pipe( pipe_slow );
22817 %}
22818
22819 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22820 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22821 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22822 Matcher::vector_length_in_bytes(n) <= 32 &&
22823 !is_integral_type(Matcher::vector_element_basic_type(n)));
22824 match(Set dst (VectorBlend (Binary src1 src2) mask));
22825 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22826 ins_encode %{
22827 int vlen_enc = vector_length_encoding(this);
22828 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22829 %}
22830 ins_pipe( pipe_slow );
22831 %}
22832
22833 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22834 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22835 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22836 Matcher::vector_length_in_bytes(n) <= 32);
22837 match(Set dst (VectorBlend (Binary src1 src2) mask));
22838 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22839 effect(TEMP vtmp, TEMP dst);
22840 ins_encode %{
22841 int vlen_enc = vector_length_encoding(this);
22842 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22843 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22844 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22845 %}
22846 ins_pipe( pipe_slow );
22847 %}
22848
22849 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22850 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22851 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22852 match(Set dst (VectorBlend (Binary src1 src2) mask));
22853 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22854 effect(TEMP ktmp);
22855 ins_encode %{
22856 int vlen_enc = Assembler::AVX_512bit;
22857 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22858 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22859 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22860 %}
22861 ins_pipe( pipe_slow );
22862 %}
22863
22864
22865 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22866 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22867 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22868 VM_Version::supports_avx512bw()));
22869 match(Set dst (VectorBlend (Binary src1 src2) mask));
22870 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22871 ins_encode %{
22872 int vlen_enc = vector_length_encoding(this);
22873 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22874 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22875 %}
22876 ins_pipe( pipe_slow );
22877 %}
22878
22879 // --------------------------------- ABS --------------------------------------
22880 // a = |a|
22881 instruct vabsB_reg(vec dst, vec src) %{
22882 match(Set dst (AbsVB src));
22883 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22884 ins_encode %{
22885 uint vlen = Matcher::vector_length(this);
22886 if (vlen <= 16) {
22887 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22888 } else {
22889 int vlen_enc = vector_length_encoding(this);
22890 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22891 }
22892 %}
22893 ins_pipe( pipe_slow );
22894 %}
22895
22896 instruct vabsS_reg(vec dst, vec src) %{
22897 match(Set dst (AbsVS src));
22898 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22899 ins_encode %{
22900 uint vlen = Matcher::vector_length(this);
22901 if (vlen <= 8) {
22902 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22903 } else {
22904 int vlen_enc = vector_length_encoding(this);
22905 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22906 }
22907 %}
22908 ins_pipe( pipe_slow );
22909 %}
22910
22911 instruct vabsI_reg(vec dst, vec src) %{
22912 match(Set dst (AbsVI src));
22913 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22914 ins_encode %{
22915 uint vlen = Matcher::vector_length(this);
22916 if (vlen <= 4) {
22917 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22918 } else {
22919 int vlen_enc = vector_length_encoding(this);
22920 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22921 }
22922 %}
22923 ins_pipe( pipe_slow );
22924 %}
22925
22926 instruct vabsL_reg(vec dst, vec src) %{
22927 match(Set dst (AbsVL src));
22928 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22929 ins_encode %{
22930 assert(UseAVX > 2, "required");
22931 int vlen_enc = vector_length_encoding(this);
22932 if (!VM_Version::supports_avx512vl()) {
22933 vlen_enc = Assembler::AVX_512bit;
22934 }
22935 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22936 %}
22937 ins_pipe( pipe_slow );
22938 %}
22939
22940 // --------------------------------- ABSNEG --------------------------------------
22941
22942 instruct vabsnegF(vec dst, vec src) %{
22943 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22944 match(Set dst (AbsVF src));
22945 match(Set dst (NegVF src));
22946 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22947 ins_cost(150);
22948 ins_encode %{
22949 int opcode = this->ideal_Opcode();
22950 int vlen = Matcher::vector_length(this);
22951 if (vlen == 2) {
22952 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22953 } else {
22954 assert(vlen == 8 || vlen == 16, "required");
22955 int vlen_enc = vector_length_encoding(this);
22956 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22957 }
22958 %}
22959 ins_pipe( pipe_slow );
22960 %}
22961
22962 instruct vabsneg4F(vec dst) %{
22963 predicate(Matcher::vector_length(n) == 4);
22964 match(Set dst (AbsVF dst));
22965 match(Set dst (NegVF dst));
22966 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22967 ins_cost(150);
22968 ins_encode %{
22969 int opcode = this->ideal_Opcode();
22970 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22971 %}
22972 ins_pipe( pipe_slow );
22973 %}
22974
22975 instruct vabsnegD(vec dst, vec src) %{
22976 match(Set dst (AbsVD src));
22977 match(Set dst (NegVD src));
22978 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22979 ins_encode %{
22980 int opcode = this->ideal_Opcode();
22981 uint vlen = Matcher::vector_length(this);
22982 if (vlen == 2) {
22983 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22984 } else {
22985 int vlen_enc = vector_length_encoding(this);
22986 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22987 }
22988 %}
22989 ins_pipe( pipe_slow );
22990 %}
22991
22992 //------------------------------------- VectorTest --------------------------------------------
22993
22994 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22995 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22996 match(Set cr (VectorTest src1 src2));
22997 effect(TEMP vtmp);
22998 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22999 ins_encode %{
23000 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23001 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23002 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23003 %}
23004 ins_pipe( pipe_slow );
23005 %}
23006
23007 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23008 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23009 match(Set cr (VectorTest src1 src2));
23010 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23011 ins_encode %{
23012 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23013 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23014 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23015 %}
23016 ins_pipe( pipe_slow );
23017 %}
23018
23019 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23020 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23021 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23022 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23023 match(Set cr (VectorTest src1 src2));
23024 effect(TEMP tmp);
23025 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23026 ins_encode %{
23027 uint masklen = Matcher::vector_length(this, $src1);
23028 __ kmovwl($tmp$$Register, $src1$$KRegister);
23029 __ andl($tmp$$Register, (1 << masklen) - 1);
23030 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23031 %}
23032 ins_pipe( pipe_slow );
23033 %}
23034
23035 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23036 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23037 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23038 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23039 match(Set cr (VectorTest src1 src2));
23040 effect(TEMP tmp);
23041 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23042 ins_encode %{
23043 uint masklen = Matcher::vector_length(this, $src1);
23044 __ kmovwl($tmp$$Register, $src1$$KRegister);
23045 __ andl($tmp$$Register, (1 << masklen) - 1);
23046 %}
23047 ins_pipe( pipe_slow );
23048 %}
23049
23050 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23051 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23052 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23053 match(Set cr (VectorTest src1 src2));
23054 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23055 ins_encode %{
23056 uint masklen = Matcher::vector_length(this, $src1);
23057 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23058 %}
23059 ins_pipe( pipe_slow );
23060 %}
23061
23062 //------------------------------------- LoadMask --------------------------------------------
23063
23064 instruct loadMask(legVec dst, legVec src) %{
23065 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23066 match(Set dst (VectorLoadMask src));
23067 effect(TEMP dst);
23068 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23069 ins_encode %{
23070 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23071 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23072 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23073 %}
23074 ins_pipe( pipe_slow );
23075 %}
23076
23077 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23078 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23079 match(Set dst (VectorLoadMask src));
23080 effect(TEMP xtmp);
23081 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23082 ins_encode %{
23083 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23084 true, Assembler::AVX_512bit);
23085 %}
23086 ins_pipe( pipe_slow );
23087 %}
23088
23089 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23090 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23091 match(Set dst (VectorLoadMask src));
23092 effect(TEMP xtmp);
23093 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23094 ins_encode %{
23095 int vlen_enc = vector_length_encoding(in(1));
23096 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23097 false, vlen_enc);
23098 %}
23099 ins_pipe( pipe_slow );
23100 %}
23101
23102 //------------------------------------- StoreMask --------------------------------------------
23103
23104 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23105 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23106 match(Set dst (VectorStoreMask src size));
23107 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23108 ins_encode %{
23109 int vlen = Matcher::vector_length(this);
23110 if (vlen <= 16 && UseAVX <= 2) {
23111 assert(UseSSE >= 3, "required");
23112 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23113 } else {
23114 assert(UseAVX > 0, "required");
23115 int src_vlen_enc = vector_length_encoding(this, $src);
23116 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23117 }
23118 %}
23119 ins_pipe( pipe_slow );
23120 %}
23121
23122 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23123 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23124 match(Set dst (VectorStoreMask src size));
23125 effect(TEMP_DEF dst, TEMP xtmp);
23126 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23127 ins_encode %{
23128 int vlen_enc = Assembler::AVX_128bit;
23129 int vlen = Matcher::vector_length(this);
23130 if (vlen <= 8) {
23131 assert(UseSSE >= 3, "required");
23132 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23133 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23134 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23135 } else {
23136 assert(UseAVX > 0, "required");
23137 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23138 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23139 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23140 }
23141 %}
23142 ins_pipe( pipe_slow );
23143 %}
23144
23145 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23146 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23147 match(Set dst (VectorStoreMask src size));
23148 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23149 effect(TEMP_DEF dst, TEMP xtmp);
23150 ins_encode %{
23151 int vlen_enc = Assembler::AVX_128bit;
23152 int vlen = Matcher::vector_length(this);
23153 if (vlen <= 4) {
23154 assert(UseSSE >= 3, "required");
23155 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23156 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23157 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23158 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23159 } else {
23160 assert(UseAVX > 0, "required");
23161 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23162 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23163 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23164 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23165 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23166 }
23167 %}
23168 ins_pipe( pipe_slow );
23169 %}
23170
23171 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23172 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23173 match(Set dst (VectorStoreMask src size));
23174 effect(TEMP_DEF dst, TEMP xtmp);
23175 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23176 ins_encode %{
23177 assert(UseSSE >= 3, "required");
23178 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23179 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23180 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23181 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23182 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23183 %}
23184 ins_pipe( pipe_slow );
23185 %}
23186
23187 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23188 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23189 match(Set dst (VectorStoreMask src size));
23190 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23191 effect(TEMP_DEF dst, TEMP vtmp);
23192 ins_encode %{
23193 int vlen_enc = Assembler::AVX_128bit;
23194 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23195 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23196 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23197 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23198 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23199 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23200 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23201 %}
23202 ins_pipe( pipe_slow );
23203 %}
23204
23205 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23206 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23207 match(Set dst (VectorStoreMask src size));
23208 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23209 ins_encode %{
23210 int src_vlen_enc = vector_length_encoding(this, $src);
23211 int dst_vlen_enc = vector_length_encoding(this);
23212 if (!VM_Version::supports_avx512vl()) {
23213 src_vlen_enc = Assembler::AVX_512bit;
23214 }
23215 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23216 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23217 %}
23218 ins_pipe( pipe_slow );
23219 %}
23220
23221 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23222 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23223 match(Set dst (VectorStoreMask src size));
23224 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23225 ins_encode %{
23226 int src_vlen_enc = vector_length_encoding(this, $src);
23227 int dst_vlen_enc = vector_length_encoding(this);
23228 if (!VM_Version::supports_avx512vl()) {
23229 src_vlen_enc = Assembler::AVX_512bit;
23230 }
23231 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23232 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23233 %}
23234 ins_pipe( pipe_slow );
23235 %}
23236
23237 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23238 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23239 match(Set dst (VectorStoreMask mask size));
23240 effect(TEMP_DEF dst);
23241 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23242 ins_encode %{
23243 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23244 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23245 false, Assembler::AVX_512bit, noreg);
23246 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23247 %}
23248 ins_pipe( pipe_slow );
23249 %}
23250
23251 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23252 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23253 match(Set dst (VectorStoreMask mask size));
23254 effect(TEMP_DEF dst);
23255 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23256 ins_encode %{
23257 int dst_vlen_enc = vector_length_encoding(this);
23258 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23259 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23260 %}
23261 ins_pipe( pipe_slow );
23262 %}
23263
23264 instruct vmaskcast_evex(kReg dst) %{
23265 match(Set dst (VectorMaskCast dst));
23266 ins_cost(0);
23267 format %{ "vector_mask_cast $dst" %}
23268 ins_encode %{
23269 // empty
23270 %}
23271 ins_pipe(empty);
23272 %}
23273
23274 instruct vmaskcast(vec dst) %{
23275 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23276 match(Set dst (VectorMaskCast dst));
23277 ins_cost(0);
23278 format %{ "vector_mask_cast $dst" %}
23279 ins_encode %{
23280 // empty
23281 %}
23282 ins_pipe(empty);
23283 %}
23284
23285 instruct vmaskcast_avx(vec dst, vec src) %{
23286 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23287 match(Set dst (VectorMaskCast src));
23288 format %{ "vector_mask_cast $dst, $src" %}
23289 ins_encode %{
23290 int vlen = Matcher::vector_length(this);
23291 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23292 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23293 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23294 %}
23295 ins_pipe(pipe_slow);
23296 %}
23297
23298 //-------------------------------- Load Iota Indices ----------------------------------
23299
23300 instruct loadIotaIndices(vec dst, immI_0 src) %{
23301 match(Set dst (VectorLoadConst src));
23302 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23303 ins_encode %{
23304 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23305 BasicType bt = Matcher::vector_element_basic_type(this);
23306 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23307 %}
23308 ins_pipe( pipe_slow );
23309 %}
23310
23311 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23312 match(Set dst (PopulateIndex src1 src2));
23313 effect(TEMP dst, TEMP vtmp);
23314 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23315 ins_encode %{
23316 assert($src2$$constant == 1, "required");
23317 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23318 int vlen_enc = vector_length_encoding(this);
23319 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23320 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23321 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23322 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23323 %}
23324 ins_pipe( pipe_slow );
23325 %}
23326
23327 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23328 match(Set dst (PopulateIndex src1 src2));
23329 effect(TEMP dst, TEMP vtmp);
23330 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23331 ins_encode %{
23332 assert($src2$$constant == 1, "required");
23333 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23334 int vlen_enc = vector_length_encoding(this);
23335 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23336 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23337 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23338 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23339 %}
23340 ins_pipe( pipe_slow );
23341 %}
23342
23343 //-------------------------------- Rearrange ----------------------------------
23344
23345 // LoadShuffle/Rearrange for Byte
23346 instruct rearrangeB(vec dst, vec shuffle) %{
23347 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23348 Matcher::vector_length(n) < 32);
23349 match(Set dst (VectorRearrange dst shuffle));
23350 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23351 ins_encode %{
23352 assert(UseSSE >= 4, "required");
23353 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23354 %}
23355 ins_pipe( pipe_slow );
23356 %}
23357
23358 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23359 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23360 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23361 match(Set dst (VectorRearrange src shuffle));
23362 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23363 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23364 ins_encode %{
23365 assert(UseAVX >= 2, "required");
23366 // Swap src into vtmp1
23367 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23368 // Shuffle swapped src to get entries from other 128 bit lane
23369 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23370 // Shuffle original src to get entries from self 128 bit lane
23371 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23372 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23373 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23374 // Perform the blend
23375 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23376 %}
23377 ins_pipe( pipe_slow );
23378 %}
23379
23380
23381 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23382 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23383 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23384 match(Set dst (VectorRearrange src shuffle));
23385 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23386 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23387 ins_encode %{
23388 int vlen_enc = vector_length_encoding(this);
23389 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23390 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23391 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23392 %}
23393 ins_pipe( pipe_slow );
23394 %}
23395
23396 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23397 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23398 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23399 match(Set dst (VectorRearrange src shuffle));
23400 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23401 ins_encode %{
23402 int vlen_enc = vector_length_encoding(this);
23403 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23404 %}
23405 ins_pipe( pipe_slow );
23406 %}
23407
23408 // LoadShuffle/Rearrange for Short
23409
23410 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23411 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23412 !VM_Version::supports_avx512bw());
23413 match(Set dst (VectorLoadShuffle src));
23414 effect(TEMP dst, TEMP vtmp);
23415 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23416 ins_encode %{
23417 // Create a byte shuffle mask from short shuffle mask
23418 // only byte shuffle instruction available on these platforms
23419 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23420 if (UseAVX == 0) {
23421 assert(vlen_in_bytes <= 16, "required");
23422 // Multiply each shuffle by two to get byte index
23423 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23424 __ psllw($vtmp$$XMMRegister, 1);
23425
23426 // Duplicate to create 2 copies of byte index
23427 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23428 __ psllw($dst$$XMMRegister, 8);
23429 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23430
23431 // Add one to get alternate byte index
23432 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23433 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23434 } else {
23435 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23436 int vlen_enc = vector_length_encoding(this);
23437 // Multiply each shuffle by two to get byte index
23438 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23439
23440 // Duplicate to create 2 copies of byte index
23441 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23442 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23443
23444 // Add one to get alternate byte index
23445 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23446 }
23447 %}
23448 ins_pipe( pipe_slow );
23449 %}
23450
23451 instruct rearrangeS(vec dst, vec shuffle) %{
23452 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23453 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23454 match(Set dst (VectorRearrange dst shuffle));
23455 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23456 ins_encode %{
23457 assert(UseSSE >= 4, "required");
23458 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23459 %}
23460 ins_pipe( pipe_slow );
23461 %}
23462
23463 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23464 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23465 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23466 match(Set dst (VectorRearrange src shuffle));
23467 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23468 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23469 ins_encode %{
23470 assert(UseAVX >= 2, "required");
23471 // Swap src into vtmp1
23472 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23473 // Shuffle swapped src to get entries from other 128 bit lane
23474 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23475 // Shuffle original src to get entries from self 128 bit lane
23476 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23477 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23478 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23479 // Perform the blend
23480 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23481 %}
23482 ins_pipe( pipe_slow );
23483 %}
23484
23485 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23486 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23487 VM_Version::supports_avx512bw());
23488 match(Set dst (VectorRearrange src shuffle));
23489 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23490 ins_encode %{
23491 int vlen_enc = vector_length_encoding(this);
23492 if (!VM_Version::supports_avx512vl()) {
23493 vlen_enc = Assembler::AVX_512bit;
23494 }
23495 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23496 %}
23497 ins_pipe( pipe_slow );
23498 %}
23499
23500 // LoadShuffle/Rearrange for Integer and Float
23501
23502 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23503 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23504 Matcher::vector_length(n) == 4 && UseAVX == 0);
23505 match(Set dst (VectorLoadShuffle src));
23506 effect(TEMP dst, TEMP vtmp);
23507 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23508 ins_encode %{
23509 assert(UseSSE >= 4, "required");
23510
23511 // Create a byte shuffle mask from int shuffle mask
23512 // only byte shuffle instruction available on these platforms
23513
23514 // Duplicate and multiply each shuffle by 4
23515 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23516 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23517 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23518 __ psllw($vtmp$$XMMRegister, 2);
23519
23520 // Duplicate again to create 4 copies of byte index
23521 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23522 __ psllw($dst$$XMMRegister, 8);
23523 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23524
23525 // Add 3,2,1,0 to get alternate byte index
23526 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23527 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23528 %}
23529 ins_pipe( pipe_slow );
23530 %}
23531
23532 instruct rearrangeI(vec dst, vec shuffle) %{
23533 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23534 UseAVX == 0);
23535 match(Set dst (VectorRearrange dst shuffle));
23536 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23537 ins_encode %{
23538 assert(UseSSE >= 4, "required");
23539 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23540 %}
23541 ins_pipe( pipe_slow );
23542 %}
23543
23544 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23545 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23546 UseAVX > 0);
23547 match(Set dst (VectorRearrange src shuffle));
23548 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23549 ins_encode %{
23550 int vlen_enc = vector_length_encoding(this);
23551 BasicType bt = Matcher::vector_element_basic_type(this);
23552 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23553 %}
23554 ins_pipe( pipe_slow );
23555 %}
23556
23557 // LoadShuffle/Rearrange for Long and Double
23558
23559 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23560 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23561 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23562 match(Set dst (VectorLoadShuffle src));
23563 effect(TEMP dst, TEMP vtmp);
23564 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23565 ins_encode %{
23566 assert(UseAVX >= 2, "required");
23567
23568 int vlen_enc = vector_length_encoding(this);
23569 // Create a double word shuffle mask from long shuffle mask
23570 // only double word shuffle instruction available on these platforms
23571
23572 // Multiply each shuffle by two to get double word index
23573 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23574
23575 // Duplicate each double word shuffle
23576 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23577 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23578
23579 // Add one to get alternate double word index
23580 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23581 %}
23582 ins_pipe( pipe_slow );
23583 %}
23584
23585 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23586 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23587 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23588 match(Set dst (VectorRearrange src shuffle));
23589 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23590 ins_encode %{
23591 assert(UseAVX >= 2, "required");
23592
23593 int vlen_enc = vector_length_encoding(this);
23594 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23595 %}
23596 ins_pipe( pipe_slow );
23597 %}
23598
23599 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23600 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23601 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23602 match(Set dst (VectorRearrange src shuffle));
23603 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23604 ins_encode %{
23605 assert(UseAVX > 2, "required");
23606
23607 int vlen_enc = vector_length_encoding(this);
23608 if (vlen_enc == Assembler::AVX_128bit) {
23609 vlen_enc = Assembler::AVX_256bit;
23610 }
23611 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23612 %}
23613 ins_pipe( pipe_slow );
23614 %}
23615
23616 // --------------------------------- FMA --------------------------------------
23617 // a * b + c
23618
23619 instruct vfmaF_reg(vec a, vec b, vec c) %{
23620 match(Set c (FmaVF c (Binary a b)));
23621 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23622 ins_cost(150);
23623 ins_encode %{
23624 assert(UseFMA, "not enabled");
23625 int vlen_enc = vector_length_encoding(this);
23626 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23627 %}
23628 ins_pipe( pipe_slow );
23629 %}
23630
23631 instruct vfmaF_mem(vec a, memory b, vec c) %{
23632 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23633 match(Set c (FmaVF c (Binary a (LoadVector b))));
23634 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23635 ins_cost(150);
23636 ins_encode %{
23637 assert(UseFMA, "not enabled");
23638 int vlen_enc = vector_length_encoding(this);
23639 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23640 %}
23641 ins_pipe( pipe_slow );
23642 %}
23643
23644 instruct vfmaD_reg(vec a, vec b, vec c) %{
23645 match(Set c (FmaVD c (Binary a b)));
23646 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23647 ins_cost(150);
23648 ins_encode %{
23649 assert(UseFMA, "not enabled");
23650 int vlen_enc = vector_length_encoding(this);
23651 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23652 %}
23653 ins_pipe( pipe_slow );
23654 %}
23655
23656 instruct vfmaD_mem(vec a, memory b, vec c) %{
23657 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23658 match(Set c (FmaVD c (Binary a (LoadVector b))));
23659 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23660 ins_cost(150);
23661 ins_encode %{
23662 assert(UseFMA, "not enabled");
23663 int vlen_enc = vector_length_encoding(this);
23664 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23665 %}
23666 ins_pipe( pipe_slow );
23667 %}
23668
23669 // --------------------------------- Vector Multiply Add --------------------------------------
23670
23671 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23672 predicate(UseAVX == 0);
23673 match(Set dst (MulAddVS2VI dst src1));
23674 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23675 ins_encode %{
23676 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23677 %}
23678 ins_pipe( pipe_slow );
23679 %}
23680
23681 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23682 predicate(UseAVX > 0);
23683 match(Set dst (MulAddVS2VI src1 src2));
23684 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23685 ins_encode %{
23686 int vlen_enc = vector_length_encoding(this);
23687 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23688 %}
23689 ins_pipe( pipe_slow );
23690 %}
23691
23692 // --------------------------------- Vector Multiply Add Add ----------------------------------
23693
23694 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23695 predicate(VM_Version::supports_avx512_vnni());
23696 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23697 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23698 ins_encode %{
23699 assert(UseAVX > 2, "required");
23700 int vlen_enc = vector_length_encoding(this);
23701 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23702 %}
23703 ins_pipe( pipe_slow );
23704 ins_cost(10);
23705 %}
23706
23707 // --------------------------------- PopCount --------------------------------------
23708
23709 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23710 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23711 match(Set dst (PopCountVI src));
23712 match(Set dst (PopCountVL src));
23713 format %{ "vector_popcount_integral $dst, $src" %}
23714 ins_encode %{
23715 int opcode = this->ideal_Opcode();
23716 int vlen_enc = vector_length_encoding(this, $src);
23717 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23718 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23719 %}
23720 ins_pipe( pipe_slow );
23721 %}
23722
23723 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23724 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23725 match(Set dst (PopCountVI src mask));
23726 match(Set dst (PopCountVL src mask));
23727 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23728 ins_encode %{
23729 int vlen_enc = vector_length_encoding(this, $src);
23730 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23731 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23732 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23733 %}
23734 ins_pipe( pipe_slow );
23735 %}
23736
23737 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23738 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23739 match(Set dst (PopCountVI src));
23740 match(Set dst (PopCountVL src));
23741 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23742 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23743 ins_encode %{
23744 int opcode = this->ideal_Opcode();
23745 int vlen_enc = vector_length_encoding(this, $src);
23746 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23747 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23748 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23749 %}
23750 ins_pipe( pipe_slow );
23751 %}
23752
23753 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23754
23755 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23756 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23757 Matcher::vector_length_in_bytes(n->in(1))));
23758 match(Set dst (CountTrailingZerosV src));
23759 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23760 ins_cost(400);
23761 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23762 ins_encode %{
23763 int vlen_enc = vector_length_encoding(this, $src);
23764 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23765 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23766 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23767 %}
23768 ins_pipe( pipe_slow );
23769 %}
23770
23771 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23772 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23773 VM_Version::supports_avx512cd() &&
23774 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23775 match(Set dst (CountTrailingZerosV src));
23776 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23777 ins_cost(400);
23778 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23779 ins_encode %{
23780 int vlen_enc = vector_length_encoding(this, $src);
23781 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23782 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23783 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23784 %}
23785 ins_pipe( pipe_slow );
23786 %}
23787
23788 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23789 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23790 match(Set dst (CountTrailingZerosV src));
23791 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23792 ins_cost(400);
23793 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23794 ins_encode %{
23795 int vlen_enc = vector_length_encoding(this, $src);
23796 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23797 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23798 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23799 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23800 %}
23801 ins_pipe( pipe_slow );
23802 %}
23803
23804 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23805 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23806 match(Set dst (CountTrailingZerosV src));
23807 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23808 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23809 ins_encode %{
23810 int vlen_enc = vector_length_encoding(this, $src);
23811 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23812 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23813 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23814 %}
23815 ins_pipe( pipe_slow );
23816 %}
23817
23818
23819 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23820
23821 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23822 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23823 effect(TEMP dst);
23824 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23825 ins_encode %{
23826 int vector_len = vector_length_encoding(this);
23827 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23828 %}
23829 ins_pipe( pipe_slow );
23830 %}
23831
23832 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23833 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23834 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23835 effect(TEMP dst);
23836 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23837 ins_encode %{
23838 int vector_len = vector_length_encoding(this);
23839 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23840 %}
23841 ins_pipe( pipe_slow );
23842 %}
23843
23844 // --------------------------------- Rotation Operations ----------------------------------
23845 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23846 match(Set dst (RotateLeftV src shift));
23847 match(Set dst (RotateRightV src shift));
23848 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23849 ins_encode %{
23850 int opcode = this->ideal_Opcode();
23851 int vector_len = vector_length_encoding(this);
23852 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23853 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23854 %}
23855 ins_pipe( pipe_slow );
23856 %}
23857
23858 instruct vprorate(vec dst, vec src, vec shift) %{
23859 match(Set dst (RotateLeftV src shift));
23860 match(Set dst (RotateRightV src shift));
23861 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23862 ins_encode %{
23863 int opcode = this->ideal_Opcode();
23864 int vector_len = vector_length_encoding(this);
23865 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23866 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23867 %}
23868 ins_pipe( pipe_slow );
23869 %}
23870
23871 // ---------------------------------- Masked Operations ------------------------------------
23872 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23873 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23874 match(Set dst (LoadVectorMasked mem mask));
23875 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23876 ins_encode %{
23877 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23878 int vlen_enc = vector_length_encoding(this);
23879 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23880 %}
23881 ins_pipe( pipe_slow );
23882 %}
23883
23884
23885 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23886 predicate(n->in(3)->bottom_type()->isa_vectmask());
23887 match(Set dst (LoadVectorMasked mem mask));
23888 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23889 ins_encode %{
23890 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23891 int vector_len = vector_length_encoding(this);
23892 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23893 %}
23894 ins_pipe( pipe_slow );
23895 %}
23896
23897 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23898 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23899 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23900 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23901 ins_encode %{
23902 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23903 int vlen_enc = vector_length_encoding(src_node);
23904 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23905 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23906 %}
23907 ins_pipe( pipe_slow );
23908 %}
23909
23910 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23911 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23912 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23913 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23914 ins_encode %{
23915 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23916 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23917 int vlen_enc = vector_length_encoding(src_node);
23918 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23919 %}
23920 ins_pipe( pipe_slow );
23921 %}
23922
23923 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23924 match(Set addr (VerifyVectorAlignment addr mask));
23925 effect(KILL cr);
23926 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23927 ins_encode %{
23928 Label Lskip;
23929 // check if masked bits of addr are zero
23930 __ testq($addr$$Register, $mask$$constant);
23931 __ jccb(Assembler::equal, Lskip);
23932 __ stop("verify_vector_alignment found a misaligned vector memory access");
23933 __ bind(Lskip);
23934 %}
23935 ins_pipe(pipe_slow);
23936 %}
23937
23938 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23939 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23940 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23941 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23942 ins_encode %{
23943 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23944 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23945
23946 Label DONE;
23947 int vlen_enc = vector_length_encoding(this, $src1);
23948 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23949
23950 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23951 __ mov64($dst$$Register, -1L);
23952 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23953 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23954 __ jccb(Assembler::carrySet, DONE);
23955 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23956 __ notq($dst$$Register);
23957 __ tzcntq($dst$$Register, $dst$$Register);
23958 __ bind(DONE);
23959 %}
23960 ins_pipe( pipe_slow );
23961 %}
23962
23963
23964 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23965 match(Set dst (VectorMaskGen len));
23966 effect(TEMP temp, KILL cr);
23967 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23968 ins_encode %{
23969 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23970 %}
23971 ins_pipe( pipe_slow );
23972 %}
23973
23974 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23975 match(Set dst (VectorMaskGen len));
23976 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23977 effect(TEMP temp);
23978 ins_encode %{
23979 if ($len$$constant > 0) {
23980 __ mov64($temp$$Register, right_n_bits($len$$constant));
23981 __ kmovql($dst$$KRegister, $temp$$Register);
23982 } else {
23983 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23984 }
23985 %}
23986 ins_pipe( pipe_slow );
23987 %}
23988
23989 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23990 predicate(n->in(1)->bottom_type()->isa_vectmask());
23991 match(Set dst (VectorMaskToLong mask));
23992 effect(TEMP dst, KILL cr);
23993 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23994 ins_encode %{
23995 int opcode = this->ideal_Opcode();
23996 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23997 int mask_len = Matcher::vector_length(this, $mask);
23998 int mask_size = mask_len * type2aelembytes(mbt);
23999 int vlen_enc = vector_length_encoding(this, $mask);
24000 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24001 $dst$$Register, mask_len, mask_size, vlen_enc);
24002 %}
24003 ins_pipe( pipe_slow );
24004 %}
24005
24006 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24007 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24008 match(Set dst (VectorMaskToLong mask));
24009 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24010 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24011 ins_encode %{
24012 int opcode = this->ideal_Opcode();
24013 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24014 int mask_len = Matcher::vector_length(this, $mask);
24015 int vlen_enc = vector_length_encoding(this, $mask);
24016 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24017 $dst$$Register, mask_len, mbt, vlen_enc);
24018 %}
24019 ins_pipe( pipe_slow );
24020 %}
24021
24022 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24023 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24024 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24025 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24026 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24027 ins_encode %{
24028 int opcode = this->ideal_Opcode();
24029 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24030 int mask_len = Matcher::vector_length(this, $mask);
24031 int vlen_enc = vector_length_encoding(this, $mask);
24032 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24033 $dst$$Register, mask_len, mbt, vlen_enc);
24034 %}
24035 ins_pipe( pipe_slow );
24036 %}
24037
24038 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24039 predicate(n->in(1)->bottom_type()->isa_vectmask());
24040 match(Set dst (VectorMaskTrueCount mask));
24041 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24042 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24043 ins_encode %{
24044 int opcode = this->ideal_Opcode();
24045 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24046 int mask_len = Matcher::vector_length(this, $mask);
24047 int mask_size = mask_len * type2aelembytes(mbt);
24048 int vlen_enc = vector_length_encoding(this, $mask);
24049 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24050 $tmp$$Register, mask_len, mask_size, vlen_enc);
24051 %}
24052 ins_pipe( pipe_slow );
24053 %}
24054
24055 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24056 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24057 match(Set dst (VectorMaskTrueCount mask));
24058 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24059 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24060 ins_encode %{
24061 int opcode = this->ideal_Opcode();
24062 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24063 int mask_len = Matcher::vector_length(this, $mask);
24064 int vlen_enc = vector_length_encoding(this, $mask);
24065 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24066 $tmp$$Register, mask_len, mbt, vlen_enc);
24067 %}
24068 ins_pipe( pipe_slow );
24069 %}
24070
24071 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24072 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24073 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24074 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24075 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24076 ins_encode %{
24077 int opcode = this->ideal_Opcode();
24078 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24079 int mask_len = Matcher::vector_length(this, $mask);
24080 int vlen_enc = vector_length_encoding(this, $mask);
24081 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24082 $tmp$$Register, mask_len, mbt, vlen_enc);
24083 %}
24084 ins_pipe( pipe_slow );
24085 %}
24086
24087 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24088 predicate(n->in(1)->bottom_type()->isa_vectmask());
24089 match(Set dst (VectorMaskFirstTrue mask));
24090 match(Set dst (VectorMaskLastTrue mask));
24091 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24092 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24093 ins_encode %{
24094 int opcode = this->ideal_Opcode();
24095 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24096 int mask_len = Matcher::vector_length(this, $mask);
24097 int mask_size = mask_len * type2aelembytes(mbt);
24098 int vlen_enc = vector_length_encoding(this, $mask);
24099 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24100 $tmp$$Register, mask_len, mask_size, vlen_enc);
24101 %}
24102 ins_pipe( pipe_slow );
24103 %}
24104
24105 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24106 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24107 match(Set dst (VectorMaskFirstTrue mask));
24108 match(Set dst (VectorMaskLastTrue mask));
24109 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24110 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24111 ins_encode %{
24112 int opcode = this->ideal_Opcode();
24113 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24114 int mask_len = Matcher::vector_length(this, $mask);
24115 int vlen_enc = vector_length_encoding(this, $mask);
24116 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24117 $tmp$$Register, mask_len, mbt, vlen_enc);
24118 %}
24119 ins_pipe( pipe_slow );
24120 %}
24121
24122 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24123 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24124 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24125 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24126 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24127 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24128 ins_encode %{
24129 int opcode = this->ideal_Opcode();
24130 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24131 int mask_len = Matcher::vector_length(this, $mask);
24132 int vlen_enc = vector_length_encoding(this, $mask);
24133 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24134 $tmp$$Register, mask_len, mbt, vlen_enc);
24135 %}
24136 ins_pipe( pipe_slow );
24137 %}
24138
24139 // --------------------------------- Compress/Expand Operations ---------------------------
24140 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24141 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24142 match(Set dst (CompressV src mask));
24143 match(Set dst (ExpandV src mask));
24144 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24145 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24146 ins_encode %{
24147 int opcode = this->ideal_Opcode();
24148 int vlen_enc = vector_length_encoding(this);
24149 BasicType bt = Matcher::vector_element_basic_type(this);
24150 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24151 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24152 %}
24153 ins_pipe( pipe_slow );
24154 %}
24155
24156 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24157 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24158 match(Set dst (CompressV src mask));
24159 match(Set dst (ExpandV src mask));
24160 format %{ "vector_compress_expand $dst, $src, $mask" %}
24161 ins_encode %{
24162 int opcode = this->ideal_Opcode();
24163 int vector_len = vector_length_encoding(this);
24164 BasicType bt = Matcher::vector_element_basic_type(this);
24165 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24166 %}
24167 ins_pipe( pipe_slow );
24168 %}
24169
24170 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24171 match(Set dst (CompressM mask));
24172 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24173 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24174 ins_encode %{
24175 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24176 int mask_len = Matcher::vector_length(this);
24177 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24178 %}
24179 ins_pipe( pipe_slow );
24180 %}
24181
24182 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24183
24184 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24185 predicate(!VM_Version::supports_gfni());
24186 match(Set dst (ReverseV src));
24187 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24188 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24189 ins_encode %{
24190 int vec_enc = vector_length_encoding(this);
24191 BasicType bt = Matcher::vector_element_basic_type(this);
24192 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24193 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24194 %}
24195 ins_pipe( pipe_slow );
24196 %}
24197
24198 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24199 predicate(VM_Version::supports_gfni());
24200 match(Set dst (ReverseV src));
24201 effect(TEMP dst, TEMP xtmp);
24202 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24203 ins_encode %{
24204 int vec_enc = vector_length_encoding(this);
24205 BasicType bt = Matcher::vector_element_basic_type(this);
24206 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24207 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24208 $xtmp$$XMMRegister);
24209 %}
24210 ins_pipe( pipe_slow );
24211 %}
24212
24213 instruct vreverse_byte_reg(vec dst, vec src) %{
24214 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24215 match(Set dst (ReverseBytesV src));
24216 effect(TEMP dst);
24217 format %{ "vector_reverse_byte $dst, $src" %}
24218 ins_encode %{
24219 int vec_enc = vector_length_encoding(this);
24220 BasicType bt = Matcher::vector_element_basic_type(this);
24221 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24222 %}
24223 ins_pipe( pipe_slow );
24224 %}
24225
24226 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24227 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24228 match(Set dst (ReverseBytesV src));
24229 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24230 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24231 ins_encode %{
24232 int vec_enc = vector_length_encoding(this);
24233 BasicType bt = Matcher::vector_element_basic_type(this);
24234 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24235 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24236 %}
24237 ins_pipe( pipe_slow );
24238 %}
24239
24240 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24241
24242 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24243 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24244 Matcher::vector_length_in_bytes(n->in(1))));
24245 match(Set dst (CountLeadingZerosV src));
24246 format %{ "vector_count_leading_zeros $dst, $src" %}
24247 ins_encode %{
24248 int vlen_enc = vector_length_encoding(this, $src);
24249 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24250 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24251 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24252 %}
24253 ins_pipe( pipe_slow );
24254 %}
24255
24256 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24257 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24258 Matcher::vector_length_in_bytes(n->in(1))));
24259 match(Set dst (CountLeadingZerosV src mask));
24260 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24261 ins_encode %{
24262 int vlen_enc = vector_length_encoding(this, $src);
24263 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24264 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24265 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24266 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24267 %}
24268 ins_pipe( pipe_slow );
24269 %}
24270
24271 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24272 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24273 VM_Version::supports_avx512cd() &&
24274 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24275 match(Set dst (CountLeadingZerosV src));
24276 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24277 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24278 ins_encode %{
24279 int vlen_enc = vector_length_encoding(this, $src);
24280 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24281 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24282 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24283 %}
24284 ins_pipe( pipe_slow );
24285 %}
24286
24287 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24288 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24289 match(Set dst (CountLeadingZerosV src));
24290 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24291 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24292 ins_encode %{
24293 int vlen_enc = vector_length_encoding(this, $src);
24294 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24295 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24296 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24297 $rtmp$$Register, true, vlen_enc);
24298 %}
24299 ins_pipe( pipe_slow );
24300 %}
24301
24302 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24303 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24304 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24305 match(Set dst (CountLeadingZerosV src));
24306 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24307 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24308 ins_encode %{
24309 int vlen_enc = vector_length_encoding(this, $src);
24310 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24311 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24312 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24313 %}
24314 ins_pipe( pipe_slow );
24315 %}
24316
24317 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24318 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24319 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24320 match(Set dst (CountLeadingZerosV src));
24321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24322 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24323 ins_encode %{
24324 int vlen_enc = vector_length_encoding(this, $src);
24325 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24326 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24327 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24328 %}
24329 ins_pipe( pipe_slow );
24330 %}
24331
24332 // ---------------------------------- Vector Masked Operations ------------------------------------
24333
24334 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24335 match(Set dst (AddVB (Binary dst src2) mask));
24336 match(Set dst (AddVS (Binary dst src2) mask));
24337 match(Set dst (AddVI (Binary dst src2) mask));
24338 match(Set dst (AddVL (Binary dst src2) mask));
24339 match(Set dst (AddVF (Binary dst src2) mask));
24340 match(Set dst (AddVD (Binary dst src2) mask));
24341 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24342 ins_encode %{
24343 int vlen_enc = vector_length_encoding(this);
24344 BasicType bt = Matcher::vector_element_basic_type(this);
24345 int opc = this->ideal_Opcode();
24346 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24347 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24348 %}
24349 ins_pipe( pipe_slow );
24350 %}
24351
24352 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24353 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24354 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24355 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24356 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24357 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24358 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24359 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24360 ins_encode %{
24361 int vlen_enc = vector_length_encoding(this);
24362 BasicType bt = Matcher::vector_element_basic_type(this);
24363 int opc = this->ideal_Opcode();
24364 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24366 %}
24367 ins_pipe( pipe_slow );
24368 %}
24369
24370 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24371 match(Set dst (XorV (Binary dst src2) mask));
24372 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24373 ins_encode %{
24374 int vlen_enc = vector_length_encoding(this);
24375 BasicType bt = Matcher::vector_element_basic_type(this);
24376 int opc = this->ideal_Opcode();
24377 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24378 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24379 %}
24380 ins_pipe( pipe_slow );
24381 %}
24382
24383 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24384 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24385 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24386 ins_encode %{
24387 int vlen_enc = vector_length_encoding(this);
24388 BasicType bt = Matcher::vector_element_basic_type(this);
24389 int opc = this->ideal_Opcode();
24390 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24391 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24392 %}
24393 ins_pipe( pipe_slow );
24394 %}
24395
24396 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24397 match(Set dst (OrV (Binary dst src2) mask));
24398 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24399 ins_encode %{
24400 int vlen_enc = vector_length_encoding(this);
24401 BasicType bt = Matcher::vector_element_basic_type(this);
24402 int opc = this->ideal_Opcode();
24403 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24404 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24405 %}
24406 ins_pipe( pipe_slow );
24407 %}
24408
24409 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24410 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24411 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24412 ins_encode %{
24413 int vlen_enc = vector_length_encoding(this);
24414 BasicType bt = Matcher::vector_element_basic_type(this);
24415 int opc = this->ideal_Opcode();
24416 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24417 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24418 %}
24419 ins_pipe( pipe_slow );
24420 %}
24421
24422 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24423 match(Set dst (AndV (Binary dst src2) mask));
24424 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24425 ins_encode %{
24426 int vlen_enc = vector_length_encoding(this);
24427 BasicType bt = Matcher::vector_element_basic_type(this);
24428 int opc = this->ideal_Opcode();
24429 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24430 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24431 %}
24432 ins_pipe( pipe_slow );
24433 %}
24434
24435 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24436 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24437 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24438 ins_encode %{
24439 int vlen_enc = vector_length_encoding(this);
24440 BasicType bt = Matcher::vector_element_basic_type(this);
24441 int opc = this->ideal_Opcode();
24442 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24443 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24444 %}
24445 ins_pipe( pipe_slow );
24446 %}
24447
24448 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24449 match(Set dst (SubVB (Binary dst src2) mask));
24450 match(Set dst (SubVS (Binary dst src2) mask));
24451 match(Set dst (SubVI (Binary dst src2) mask));
24452 match(Set dst (SubVL (Binary dst src2) mask));
24453 match(Set dst (SubVF (Binary dst src2) mask));
24454 match(Set dst (SubVD (Binary dst src2) mask));
24455 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24456 ins_encode %{
24457 int vlen_enc = vector_length_encoding(this);
24458 BasicType bt = Matcher::vector_element_basic_type(this);
24459 int opc = this->ideal_Opcode();
24460 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24461 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24462 %}
24463 ins_pipe( pipe_slow );
24464 %}
24465
24466 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24467 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24468 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24469 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24470 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24471 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24472 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24473 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24474 ins_encode %{
24475 int vlen_enc = vector_length_encoding(this);
24476 BasicType bt = Matcher::vector_element_basic_type(this);
24477 int opc = this->ideal_Opcode();
24478 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24479 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24480 %}
24481 ins_pipe( pipe_slow );
24482 %}
24483
24484 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24485 match(Set dst (MulVS (Binary dst src2) mask));
24486 match(Set dst (MulVI (Binary dst src2) mask));
24487 match(Set dst (MulVL (Binary dst src2) mask));
24488 match(Set dst (MulVF (Binary dst src2) mask));
24489 match(Set dst (MulVD (Binary dst src2) mask));
24490 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24491 ins_encode %{
24492 int vlen_enc = vector_length_encoding(this);
24493 BasicType bt = Matcher::vector_element_basic_type(this);
24494 int opc = this->ideal_Opcode();
24495 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24496 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24497 %}
24498 ins_pipe( pipe_slow );
24499 %}
24500
24501 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24502 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24503 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24504 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24505 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24506 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24507 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24508 ins_encode %{
24509 int vlen_enc = vector_length_encoding(this);
24510 BasicType bt = Matcher::vector_element_basic_type(this);
24511 int opc = this->ideal_Opcode();
24512 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24513 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24514 %}
24515 ins_pipe( pipe_slow );
24516 %}
24517
24518 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24519 match(Set dst (SqrtVF dst mask));
24520 match(Set dst (SqrtVD dst mask));
24521 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24522 ins_encode %{
24523 int vlen_enc = vector_length_encoding(this);
24524 BasicType bt = Matcher::vector_element_basic_type(this);
24525 int opc = this->ideal_Opcode();
24526 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24527 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24528 %}
24529 ins_pipe( pipe_slow );
24530 %}
24531
24532 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24533 match(Set dst (DivVF (Binary dst src2) mask));
24534 match(Set dst (DivVD (Binary dst src2) mask));
24535 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24536 ins_encode %{
24537 int vlen_enc = vector_length_encoding(this);
24538 BasicType bt = Matcher::vector_element_basic_type(this);
24539 int opc = this->ideal_Opcode();
24540 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24541 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24542 %}
24543 ins_pipe( pipe_slow );
24544 %}
24545
24546 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24547 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24548 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24549 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24550 ins_encode %{
24551 int vlen_enc = vector_length_encoding(this);
24552 BasicType bt = Matcher::vector_element_basic_type(this);
24553 int opc = this->ideal_Opcode();
24554 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24555 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24556 %}
24557 ins_pipe( pipe_slow );
24558 %}
24559
24560
24561 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24562 match(Set dst (RotateLeftV (Binary dst shift) mask));
24563 match(Set dst (RotateRightV (Binary dst shift) mask));
24564 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24565 ins_encode %{
24566 int vlen_enc = vector_length_encoding(this);
24567 BasicType bt = Matcher::vector_element_basic_type(this);
24568 int opc = this->ideal_Opcode();
24569 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24570 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24571 %}
24572 ins_pipe( pipe_slow );
24573 %}
24574
24575 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24576 match(Set dst (RotateLeftV (Binary dst src2) mask));
24577 match(Set dst (RotateRightV (Binary dst src2) mask));
24578 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24579 ins_encode %{
24580 int vlen_enc = vector_length_encoding(this);
24581 BasicType bt = Matcher::vector_element_basic_type(this);
24582 int opc = this->ideal_Opcode();
24583 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24584 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24585 %}
24586 ins_pipe( pipe_slow );
24587 %}
24588
24589 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24590 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24591 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24592 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24593 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24594 ins_encode %{
24595 int vlen_enc = vector_length_encoding(this);
24596 BasicType bt = Matcher::vector_element_basic_type(this);
24597 int opc = this->ideal_Opcode();
24598 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24599 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24600 %}
24601 ins_pipe( pipe_slow );
24602 %}
24603
24604 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24605 predicate(!n->as_ShiftV()->is_var_shift());
24606 match(Set dst (LShiftVS (Binary dst src2) mask));
24607 match(Set dst (LShiftVI (Binary dst src2) mask));
24608 match(Set dst (LShiftVL (Binary dst src2) mask));
24609 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24610 ins_encode %{
24611 int vlen_enc = vector_length_encoding(this);
24612 BasicType bt = Matcher::vector_element_basic_type(this);
24613 int opc = this->ideal_Opcode();
24614 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24615 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24616 %}
24617 ins_pipe( pipe_slow );
24618 %}
24619
24620 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24621 predicate(n->as_ShiftV()->is_var_shift());
24622 match(Set dst (LShiftVS (Binary dst src2) mask));
24623 match(Set dst (LShiftVI (Binary dst src2) mask));
24624 match(Set dst (LShiftVL (Binary dst src2) mask));
24625 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24626 ins_encode %{
24627 int vlen_enc = vector_length_encoding(this);
24628 BasicType bt = Matcher::vector_element_basic_type(this);
24629 int opc = this->ideal_Opcode();
24630 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24631 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24632 %}
24633 ins_pipe( pipe_slow );
24634 %}
24635
24636 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24637 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24638 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24639 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24640 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24641 ins_encode %{
24642 int vlen_enc = vector_length_encoding(this);
24643 BasicType bt = Matcher::vector_element_basic_type(this);
24644 int opc = this->ideal_Opcode();
24645 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24646 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24647 %}
24648 ins_pipe( pipe_slow );
24649 %}
24650
24651 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24652 predicate(!n->as_ShiftV()->is_var_shift());
24653 match(Set dst (RShiftVS (Binary dst src2) mask));
24654 match(Set dst (RShiftVI (Binary dst src2) mask));
24655 match(Set dst (RShiftVL (Binary dst src2) mask));
24656 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24657 ins_encode %{
24658 int vlen_enc = vector_length_encoding(this);
24659 BasicType bt = Matcher::vector_element_basic_type(this);
24660 int opc = this->ideal_Opcode();
24661 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24662 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24663 %}
24664 ins_pipe( pipe_slow );
24665 %}
24666
24667 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24668 predicate(n->as_ShiftV()->is_var_shift());
24669 match(Set dst (RShiftVS (Binary dst src2) mask));
24670 match(Set dst (RShiftVI (Binary dst src2) mask));
24671 match(Set dst (RShiftVL (Binary dst src2) mask));
24672 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24673 ins_encode %{
24674 int vlen_enc = vector_length_encoding(this);
24675 BasicType bt = Matcher::vector_element_basic_type(this);
24676 int opc = this->ideal_Opcode();
24677 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24678 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24679 %}
24680 ins_pipe( pipe_slow );
24681 %}
24682
24683 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24684 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24685 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24686 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24687 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24688 ins_encode %{
24689 int vlen_enc = vector_length_encoding(this);
24690 BasicType bt = Matcher::vector_element_basic_type(this);
24691 int opc = this->ideal_Opcode();
24692 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24693 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24694 %}
24695 ins_pipe( pipe_slow );
24696 %}
24697
24698 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24699 predicate(!n->as_ShiftV()->is_var_shift());
24700 match(Set dst (URShiftVS (Binary dst src2) mask));
24701 match(Set dst (URShiftVI (Binary dst src2) mask));
24702 match(Set dst (URShiftVL (Binary dst src2) mask));
24703 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24704 ins_encode %{
24705 int vlen_enc = vector_length_encoding(this);
24706 BasicType bt = Matcher::vector_element_basic_type(this);
24707 int opc = this->ideal_Opcode();
24708 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24709 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24710 %}
24711 ins_pipe( pipe_slow );
24712 %}
24713
24714 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24715 predicate(n->as_ShiftV()->is_var_shift());
24716 match(Set dst (URShiftVS (Binary dst src2) mask));
24717 match(Set dst (URShiftVI (Binary dst src2) mask));
24718 match(Set dst (URShiftVL (Binary dst src2) mask));
24719 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24720 ins_encode %{
24721 int vlen_enc = vector_length_encoding(this);
24722 BasicType bt = Matcher::vector_element_basic_type(this);
24723 int opc = this->ideal_Opcode();
24724 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24725 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24726 %}
24727 ins_pipe( pipe_slow );
24728 %}
24729
24730 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24731 match(Set dst (MaxV (Binary dst src2) mask));
24732 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24733 ins_encode %{
24734 int vlen_enc = vector_length_encoding(this);
24735 BasicType bt = Matcher::vector_element_basic_type(this);
24736 int opc = this->ideal_Opcode();
24737 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24738 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24739 %}
24740 ins_pipe( pipe_slow );
24741 %}
24742
24743 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24744 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24745 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24746 ins_encode %{
24747 int vlen_enc = vector_length_encoding(this);
24748 BasicType bt = Matcher::vector_element_basic_type(this);
24749 int opc = this->ideal_Opcode();
24750 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24751 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24752 %}
24753 ins_pipe( pipe_slow );
24754 %}
24755
24756 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24757 match(Set dst (MinV (Binary dst src2) mask));
24758 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24759 ins_encode %{
24760 int vlen_enc = vector_length_encoding(this);
24761 BasicType bt = Matcher::vector_element_basic_type(this);
24762 int opc = this->ideal_Opcode();
24763 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24764 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24765 %}
24766 ins_pipe( pipe_slow );
24767 %}
24768
24769 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24770 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24771 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24772 ins_encode %{
24773 int vlen_enc = vector_length_encoding(this);
24774 BasicType bt = Matcher::vector_element_basic_type(this);
24775 int opc = this->ideal_Opcode();
24776 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24777 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24778 %}
24779 ins_pipe( pipe_slow );
24780 %}
24781
24782 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24783 match(Set dst (VectorRearrange (Binary dst src2) mask));
24784 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24785 ins_encode %{
24786 int vlen_enc = vector_length_encoding(this);
24787 BasicType bt = Matcher::vector_element_basic_type(this);
24788 int opc = this->ideal_Opcode();
24789 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24790 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24791 %}
24792 ins_pipe( pipe_slow );
24793 %}
24794
24795 instruct vabs_masked(vec dst, kReg mask) %{
24796 match(Set dst (AbsVB dst mask));
24797 match(Set dst (AbsVS dst mask));
24798 match(Set dst (AbsVI dst mask));
24799 match(Set dst (AbsVL dst mask));
24800 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24801 ins_encode %{
24802 int vlen_enc = vector_length_encoding(this);
24803 BasicType bt = Matcher::vector_element_basic_type(this);
24804 int opc = this->ideal_Opcode();
24805 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24807 %}
24808 ins_pipe( pipe_slow );
24809 %}
24810
24811 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24812 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24813 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24814 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24815 ins_encode %{
24816 assert(UseFMA, "Needs FMA instructions support.");
24817 int vlen_enc = vector_length_encoding(this);
24818 BasicType bt = Matcher::vector_element_basic_type(this);
24819 int opc = this->ideal_Opcode();
24820 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24821 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24822 %}
24823 ins_pipe( pipe_slow );
24824 %}
24825
24826 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24827 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24828 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24829 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24830 ins_encode %{
24831 assert(UseFMA, "Needs FMA instructions support.");
24832 int vlen_enc = vector_length_encoding(this);
24833 BasicType bt = Matcher::vector_element_basic_type(this);
24834 int opc = this->ideal_Opcode();
24835 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24836 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24837 %}
24838 ins_pipe( pipe_slow );
24839 %}
24840
24841 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24842 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24843 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24844 ins_encode %{
24845 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24846 int vlen_enc = vector_length_encoding(this, $src1);
24847 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24848
24849 // Comparison i
24850 switch (src1_elem_bt) {
24851 case T_BYTE: {
24852 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24853 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24854 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24855 break;
24856 }
24857 case T_SHORT: {
24858 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24859 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24860 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24861 break;
24862 }
24863 case T_INT: {
24864 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24865 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24866 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24867 break;
24868 }
24869 case T_LONG: {
24870 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24871 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24872 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24873 break;
24874 }
24875 case T_FLOAT: {
24876 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24877 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24878 break;
24879 }
24880 case T_DOUBLE: {
24881 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24882 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24883 break;
24884 }
24885 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24886 }
24887 %}
24888 ins_pipe( pipe_slow );
24889 %}
24890
24891 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24892 predicate(Matcher::vector_length(n) <= 32);
24893 match(Set dst (MaskAll src));
24894 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24895 ins_encode %{
24896 int mask_len = Matcher::vector_length(this);
24897 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24898 %}
24899 ins_pipe( pipe_slow );
24900 %}
24901
24902 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24903 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24904 match(Set dst (XorVMask src (MaskAll cnt)));
24905 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24906 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24907 ins_encode %{
24908 uint masklen = Matcher::vector_length(this);
24909 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24910 %}
24911 ins_pipe( pipe_slow );
24912 %}
24913
24914 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24915 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24916 (Matcher::vector_length(n) == 16) ||
24917 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24918 match(Set dst (XorVMask src (MaskAll cnt)));
24919 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24920 ins_encode %{
24921 uint masklen = Matcher::vector_length(this);
24922 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24923 %}
24924 ins_pipe( pipe_slow );
24925 %}
24926
24927 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24928 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24929 match(Set dst (VectorLongToMask src));
24930 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24931 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24932 ins_encode %{
24933 int mask_len = Matcher::vector_length(this);
24934 int vec_enc = vector_length_encoding(mask_len);
24935 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24936 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24937 %}
24938 ins_pipe( pipe_slow );
24939 %}
24940
24941
24942 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24943 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24944 match(Set dst (VectorLongToMask src));
24945 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24946 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24947 ins_encode %{
24948 int mask_len = Matcher::vector_length(this);
24949 assert(mask_len <= 32, "invalid mask length");
24950 int vec_enc = vector_length_encoding(mask_len);
24951 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24952 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24953 %}
24954 ins_pipe( pipe_slow );
24955 %}
24956
24957 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24958 predicate(n->bottom_type()->isa_vectmask());
24959 match(Set dst (VectorLongToMask src));
24960 format %{ "long_to_mask_evex $dst, $src\t!" %}
24961 ins_encode %{
24962 __ kmov($dst$$KRegister, $src$$Register);
24963 %}
24964 ins_pipe( pipe_slow );
24965 %}
24966
24967 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24968 match(Set dst (AndVMask src1 src2));
24969 match(Set dst (OrVMask src1 src2));
24970 match(Set dst (XorVMask src1 src2));
24971 effect(TEMP kscratch);
24972 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24973 ins_encode %{
24974 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24975 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24976 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24977 uint masklen = Matcher::vector_length(this);
24978 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24979 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24980 %}
24981 ins_pipe( pipe_slow );
24982 %}
24983
24984 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24985 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24986 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24987 ins_encode %{
24988 int vlen_enc = vector_length_encoding(this);
24989 BasicType bt = Matcher::vector_element_basic_type(this);
24990 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24991 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24992 %}
24993 ins_pipe( pipe_slow );
24994 %}
24995
24996 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24997 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24998 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24999 ins_encode %{
25000 int vlen_enc = vector_length_encoding(this);
25001 BasicType bt = Matcher::vector_element_basic_type(this);
25002 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25003 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25004 %}
25005 ins_pipe( pipe_slow );
25006 %}
25007
25008 instruct castMM(kReg dst)
25009 %{
25010 match(Set dst (CastVV dst));
25011
25012 size(0);
25013 format %{ "# castVV of $dst" %}
25014 ins_encode(/* empty encoding */);
25015 ins_cost(0);
25016 ins_pipe(empty);
25017 %}
25018
25019 instruct castVV(vec dst)
25020 %{
25021 match(Set dst (CastVV dst));
25022
25023 size(0);
25024 format %{ "# castVV of $dst" %}
25025 ins_encode(/* empty encoding */);
25026 ins_cost(0);
25027 ins_pipe(empty);
25028 %}
25029
25030 instruct castVVLeg(legVec dst)
25031 %{
25032 match(Set dst (CastVV dst));
25033
25034 size(0);
25035 format %{ "# castVV of $dst" %}
25036 ins_encode(/* empty encoding */);
25037 ins_cost(0);
25038 ins_pipe(empty);
25039 %}
25040
25041 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25042 %{
25043 match(Set dst (IsInfiniteF src));
25044 effect(TEMP ktmp, KILL cr);
25045 format %{ "float_class_check $dst, $src" %}
25046 ins_encode %{
25047 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25048 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25049 %}
25050 ins_pipe(pipe_slow);
25051 %}
25052
25053 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25054 %{
25055 match(Set dst (IsInfiniteD src));
25056 effect(TEMP ktmp, KILL cr);
25057 format %{ "double_class_check $dst, $src" %}
25058 ins_encode %{
25059 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25060 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25061 %}
25062 ins_pipe(pipe_slow);
25063 %}
25064
25065 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25066 %{
25067 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25068 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25069 match(Set dst (SaturatingAddV src1 src2));
25070 match(Set dst (SaturatingSubV src1 src2));
25071 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25072 ins_encode %{
25073 int vlen_enc = vector_length_encoding(this);
25074 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25075 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25076 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25077 %}
25078 ins_pipe(pipe_slow);
25079 %}
25080
25081 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25082 %{
25083 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25084 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25085 match(Set dst (SaturatingAddV src1 src2));
25086 match(Set dst (SaturatingSubV src1 src2));
25087 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25088 ins_encode %{
25089 int vlen_enc = vector_length_encoding(this);
25090 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25091 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25092 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25093 %}
25094 ins_pipe(pipe_slow);
25095 %}
25096
25097 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25098 %{
25099 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25100 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25101 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25102 match(Set dst (SaturatingAddV src1 src2));
25103 match(Set dst (SaturatingSubV src1 src2));
25104 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25105 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25106 ins_encode %{
25107 int vlen_enc = vector_length_encoding(this);
25108 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25109 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25110 $src1$$XMMRegister, $src2$$XMMRegister,
25111 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25112 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25113 %}
25114 ins_pipe(pipe_slow);
25115 %}
25116
25117 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25118 %{
25119 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25120 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25121 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25122 match(Set dst (SaturatingAddV src1 src2));
25123 match(Set dst (SaturatingSubV src1 src2));
25124 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25125 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25126 ins_encode %{
25127 int vlen_enc = vector_length_encoding(this);
25128 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25129 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25130 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25131 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25132 %}
25133 ins_pipe(pipe_slow);
25134 %}
25135
25136 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25137 %{
25138 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25139 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25140 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25141 match(Set dst (SaturatingAddV src1 src2));
25142 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25143 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25144 ins_encode %{
25145 int vlen_enc = vector_length_encoding(this);
25146 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25147 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25148 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25149 %}
25150 ins_pipe(pipe_slow);
25151 %}
25152
25153 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25154 %{
25155 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25156 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25157 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25158 match(Set dst (SaturatingAddV src1 src2));
25159 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25160 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25161 ins_encode %{
25162 int vlen_enc = vector_length_encoding(this);
25163 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25164 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25165 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25166 %}
25167 ins_pipe(pipe_slow);
25168 %}
25169
25170 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25171 %{
25172 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25173 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25174 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25175 match(Set dst (SaturatingSubV src1 src2));
25176 effect(TEMP ktmp);
25177 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25178 ins_encode %{
25179 int vlen_enc = vector_length_encoding(this);
25180 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25181 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25182 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25183 %}
25184 ins_pipe(pipe_slow);
25185 %}
25186
25187 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25188 %{
25189 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25190 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25191 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25192 match(Set dst (SaturatingSubV src1 src2));
25193 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25194 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25195 ins_encode %{
25196 int vlen_enc = vector_length_encoding(this);
25197 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25198 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25199 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25200 %}
25201 ins_pipe(pipe_slow);
25202 %}
25203
25204 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25205 %{
25206 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25207 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25208 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25209 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25210 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25211 ins_encode %{
25212 int vlen_enc = vector_length_encoding(this);
25213 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25214 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25215 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25216 %}
25217 ins_pipe(pipe_slow);
25218 %}
25219
25220 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25221 %{
25222 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25223 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25224 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25225 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25226 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25227 ins_encode %{
25228 int vlen_enc = vector_length_encoding(this);
25229 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25230 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25231 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25232 %}
25233 ins_pipe(pipe_slow);
25234 %}
25235
25236 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25237 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25238 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25239 match(Set dst (SaturatingAddV (Binary dst src) mask));
25240 match(Set dst (SaturatingSubV (Binary dst src) mask));
25241 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25242 ins_encode %{
25243 int vlen_enc = vector_length_encoding(this);
25244 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25245 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25246 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25247 %}
25248 ins_pipe( pipe_slow );
25249 %}
25250
25251 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25252 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25253 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25254 match(Set dst (SaturatingAddV (Binary dst src) mask));
25255 match(Set dst (SaturatingSubV (Binary dst src) mask));
25256 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25257 ins_encode %{
25258 int vlen_enc = vector_length_encoding(this);
25259 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25260 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25261 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25262 %}
25263 ins_pipe( pipe_slow );
25264 %}
25265
25266 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25267 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25269 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25270 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25271 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25272 ins_encode %{
25273 int vlen_enc = vector_length_encoding(this);
25274 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25275 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25276 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25277 %}
25278 ins_pipe( pipe_slow );
25279 %}
25280
25281 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25282 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25283 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25284 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25285 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25286 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25287 ins_encode %{
25288 int vlen_enc = vector_length_encoding(this);
25289 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25290 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25291 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25292 %}
25293 ins_pipe( pipe_slow );
25294 %}
25295
25296 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25297 %{
25298 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25299 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25300 ins_encode %{
25301 int vlen_enc = vector_length_encoding(this);
25302 BasicType bt = Matcher::vector_element_basic_type(this);
25303 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25304 %}
25305 ins_pipe(pipe_slow);
25306 %}
25307
25308 instruct reinterpretS2HF(regF dst, rRegI src)
25309 %{
25310 match(Set dst (ReinterpretS2HF src));
25311 format %{ "evmovw $dst, $src" %}
25312 ins_encode %{
25313 __ evmovw($dst$$XMMRegister, $src$$Register);
25314 %}
25315 ins_pipe(pipe_slow);
25316 %}
25317
25318 instruct reinterpretHF2S(rRegI dst, regF src)
25319 %{
25320 match(Set dst (ReinterpretHF2S src));
25321 format %{ "evmovw $dst, $src" %}
25322 ins_encode %{
25323 __ evmovw($dst$$Register, $src$$XMMRegister);
25324 __ narrow_subword_type($dst$$Register, T_SHORT);
25325 %}
25326 ins_pipe(pipe_slow);
25327 %}
25328
25329 instruct convF2HFAndS2HF(regF dst, regF src)
25330 %{
25331 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25332 format %{ "convF2HFAndS2HF $dst, $src" %}
25333 ins_encode %{
25334 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25335 %}
25336 ins_pipe(pipe_slow);
25337 %}
25338
25339 instruct convHF2SAndHF2F(regF dst, regF src)
25340 %{
25341 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25342 format %{ "convHF2SAndHF2F $dst, $src" %}
25343 ins_encode %{
25344 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25345 %}
25346 ins_pipe(pipe_slow);
25347 %}
25348
25349 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25350 %{
25351 match(Set dst (SqrtHF src));
25352 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25353 ins_encode %{
25354 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25355 %}
25356 ins_pipe(pipe_slow);
25357 %}
25358
25359 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25360 %{
25361 match(Set dst (AddHF src1 src2));
25362 match(Set dst (DivHF src1 src2));
25363 match(Set dst (MulHF src1 src2));
25364 match(Set dst (SubHF src1 src2));
25365 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25366 ins_encode %{
25367 int opcode = this->ideal_Opcode();
25368 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25369 %}
25370 ins_pipe(pipe_slow);
25371 %}
25372
25373 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25374 %{
25375 predicate(VM_Version::supports_avx10_2());
25376 match(Set dst (MaxHF src1 src2));
25377 match(Set dst (MinHF src1 src2));
25378
25379 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25380 ins_encode %{
25381 int opcode = this->ideal_Opcode();
25382 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25383 %}
25384 ins_pipe( pipe_slow );
25385 %}
25386
25387 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25388 %{
25389 predicate(!VM_Version::supports_avx10_2());
25390 match(Set dst (MaxHF src1 src2));
25391 match(Set dst (MinHF src1 src2));
25392 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25393
25394 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25395 ins_encode %{
25396 int opcode = this->ideal_Opcode();
25397 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25398 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25399 %}
25400 ins_pipe( pipe_slow );
25401 %}
25402
25403 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25404 %{
25405 match(Set dst (FmaHF src2 (Binary dst src1)));
25406 effect(DEF dst);
25407 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25408 ins_encode %{
25409 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25410 %}
25411 ins_pipe( pipe_slow );
25412 %}
25413
25414
25415 instruct vector_sqrt_HF_reg(vec dst, vec src)
25416 %{
25417 match(Set dst (SqrtVHF src));
25418 format %{ "vector_sqrt_fp16 $dst, $src" %}
25419 ins_encode %{
25420 int vlen_enc = vector_length_encoding(this);
25421 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25422 %}
25423 ins_pipe(pipe_slow);
25424 %}
25425
25426 instruct vector_sqrt_HF_mem(vec dst, memory src)
25427 %{
25428 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25429 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25430 ins_encode %{
25431 int vlen_enc = vector_length_encoding(this);
25432 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25433 %}
25434 ins_pipe(pipe_slow);
25435 %}
25436
25437 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25438 %{
25439 match(Set dst (AddVHF src1 src2));
25440 match(Set dst (DivVHF src1 src2));
25441 match(Set dst (MulVHF src1 src2));
25442 match(Set dst (SubVHF src1 src2));
25443 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25444 ins_encode %{
25445 int vlen_enc = vector_length_encoding(this);
25446 int opcode = this->ideal_Opcode();
25447 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25448 %}
25449 ins_pipe(pipe_slow);
25450 %}
25451
25452
25453 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25454 %{
25455 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25456 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25457 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25458 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25459 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25460 ins_encode %{
25461 int vlen_enc = vector_length_encoding(this);
25462 int opcode = this->ideal_Opcode();
25463 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25464 %}
25465 ins_pipe(pipe_slow);
25466 %}
25467
25468 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25469 %{
25470 match(Set dst (FmaVHF src2 (Binary dst src1)));
25471 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25472 ins_encode %{
25473 int vlen_enc = vector_length_encoding(this);
25474 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25475 %}
25476 ins_pipe( pipe_slow );
25477 %}
25478
25479 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25480 %{
25481 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25482 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25483 ins_encode %{
25484 int vlen_enc = vector_length_encoding(this);
25485 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25486 %}
25487 ins_pipe( pipe_slow );
25488 %}
25489
25490 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25491 %{
25492 predicate(VM_Version::supports_avx10_2());
25493 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25494 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25495 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25496 ins_encode %{
25497 int vlen_enc = vector_length_encoding(this);
25498 int opcode = this->ideal_Opcode();
25499 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25500 k0, vlen_enc);
25501 %}
25502 ins_pipe( pipe_slow );
25503 %}
25504
25505 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25506 %{
25507 predicate(VM_Version::supports_avx10_2());
25508 match(Set dst (MinVHF src1 src2));
25509 match(Set dst (MaxVHF src1 src2));
25510 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25511 ins_encode %{
25512 int vlen_enc = vector_length_encoding(this);
25513 int opcode = this->ideal_Opcode();
25514 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25515 k0, vlen_enc);
25516 %}
25517 ins_pipe( pipe_slow );
25518 %}
25519
25520 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25521 %{
25522 predicate(!VM_Version::supports_avx10_2());
25523 match(Set dst (MinVHF src1 src2));
25524 match(Set dst (MaxVHF src1 src2));
25525 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25526 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25527 ins_encode %{
25528 int vlen_enc = vector_length_encoding(this);
25529 int opcode = this->ideal_Opcode();
25530 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25531 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25532 %}
25533 ins_pipe( pipe_slow );
25534 %}
25535
25536 //----------PEEPHOLE RULES-----------------------------------------------------
25537 // These must follow all instruction definitions as they use the names
25538 // defined in the instructions definitions.
25539 //
25540 // peeppredicate ( rule_predicate );
25541 // // the predicate unless which the peephole rule will be ignored
25542 //
25543 // peepmatch ( root_instr_name [preceding_instruction]* );
25544 //
25545 // peepprocedure ( procedure_name );
25546 // // provide a procedure name to perform the optimization, the procedure should
25547 // // reside in the architecture dependent peephole file, the method has the
25548 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25549 // // with the arguments being the basic block, the current node index inside the
25550 // // block, the register allocator, the functions upon invoked return a new node
25551 // // defined in peepreplace, and the rules of the nodes appearing in the
25552 // // corresponding peepmatch, the function return true if successful, else
25553 // // return false
25554 //
25555 // peepconstraint %{
25556 // (instruction_number.operand_name relational_op instruction_number.operand_name
25557 // [, ...] );
25558 // // instruction numbers are zero-based using left to right order in peepmatch
25559 //
25560 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25561 // // provide an instruction_number.operand_name for each operand that appears
25562 // // in the replacement instruction's match rule
25563 //
25564 // ---------VM FLAGS---------------------------------------------------------
25565 //
25566 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25567 //
25568 // Each peephole rule is given an identifying number starting with zero and
25569 // increasing by one in the order seen by the parser. An individual peephole
25570 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25571 // on the command-line.
25572 //
25573 // ---------CURRENT LIMITATIONS----------------------------------------------
25574 //
25575 // Only transformations inside a basic block (do we need more for peephole)
25576 //
25577 // ---------EXAMPLE----------------------------------------------------------
25578 //
25579 // // pertinent parts of existing instructions in architecture description
25580 // instruct movI(rRegI dst, rRegI src)
25581 // %{
25582 // match(Set dst (CopyI src));
25583 // %}
25584 //
25585 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25586 // %{
25587 // match(Set dst (AddI dst src));
25588 // effect(KILL cr);
25589 // %}
25590 //
25591 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25592 // %{
25593 // match(Set dst (AddI dst src));
25594 // %}
25595 //
25596 // 1. Simple replacement
25597 // - Only match adjacent instructions in same basic block
25598 // - Only equality constraints
25599 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25600 // - Only one replacement instruction
25601 //
25602 // // Change (inc mov) to lea
25603 // peephole %{
25604 // // lea should only be emitted when beneficial
25605 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25606 // // increment preceded by register-register move
25607 // peepmatch ( incI_rReg movI );
25608 // // require that the destination register of the increment
25609 // // match the destination register of the move
25610 // peepconstraint ( 0.dst == 1.dst );
25611 // // construct a replacement instruction that sets
25612 // // the destination to ( move's source register + one )
25613 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25614 // %}
25615 //
25616 // 2. Procedural replacement
25617 // - More flexible finding relevent nodes
25618 // - More flexible constraints
25619 // - More flexible transformations
25620 // - May utilise architecture-dependent API more effectively
25621 // - Currently only one replacement instruction due to adlc parsing capabilities
25622 //
25623 // // Change (inc mov) to lea
25624 // peephole %{
25625 // // lea should only be emitted when beneficial
25626 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25627 // // the rule numbers of these nodes inside are passed into the function below
25628 // peepmatch ( incI_rReg movI );
25629 // // the method that takes the responsibility of transformation
25630 // peepprocedure ( inc_mov_to_lea );
25631 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25632 // // node is passed into the function above
25633 // peepreplace ( leaI_rReg_immI() );
25634 // %}
25635
25636 // These instructions is not matched by the matcher but used by the peephole
25637 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25638 %{
25639 predicate(false);
25640 match(Set dst (AddI src1 src2));
25641 format %{ "leal $dst, [$src1 + $src2]" %}
25642 ins_encode %{
25643 Register dst = $dst$$Register;
25644 Register src1 = $src1$$Register;
25645 Register src2 = $src2$$Register;
25646 if (src1 != rbp && src1 != r13) {
25647 __ leal(dst, Address(src1, src2, Address::times_1));
25648 } else {
25649 assert(src2 != rbp && src2 != r13, "");
25650 __ leal(dst, Address(src2, src1, Address::times_1));
25651 }
25652 %}
25653 ins_pipe(ialu_reg_reg);
25654 %}
25655
25656 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25657 %{
25658 predicate(false);
25659 match(Set dst (AddI src1 src2));
25660 format %{ "leal $dst, [$src1 + $src2]" %}
25661 ins_encode %{
25662 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25663 %}
25664 ins_pipe(ialu_reg_reg);
25665 %}
25666
25667 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25668 %{
25669 predicate(false);
25670 match(Set dst (LShiftI src shift));
25671 format %{ "leal $dst, [$src << $shift]" %}
25672 ins_encode %{
25673 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25674 Register src = $src$$Register;
25675 if (scale == Address::times_2 && src != rbp && src != r13) {
25676 __ leal($dst$$Register, Address(src, src, Address::times_1));
25677 } else {
25678 __ leal($dst$$Register, Address(noreg, src, scale));
25679 }
25680 %}
25681 ins_pipe(ialu_reg_reg);
25682 %}
25683
25684 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25685 %{
25686 predicate(false);
25687 match(Set dst (AddL src1 src2));
25688 format %{ "leaq $dst, [$src1 + $src2]" %}
25689 ins_encode %{
25690 Register dst = $dst$$Register;
25691 Register src1 = $src1$$Register;
25692 Register src2 = $src2$$Register;
25693 if (src1 != rbp && src1 != r13) {
25694 __ leaq(dst, Address(src1, src2, Address::times_1));
25695 } else {
25696 assert(src2 != rbp && src2 != r13, "");
25697 __ leaq(dst, Address(src2, src1, Address::times_1));
25698 }
25699 %}
25700 ins_pipe(ialu_reg_reg);
25701 %}
25702
25703 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25704 %{
25705 predicate(false);
25706 match(Set dst (AddL src1 src2));
25707 format %{ "leaq $dst, [$src1 + $src2]" %}
25708 ins_encode %{
25709 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25710 %}
25711 ins_pipe(ialu_reg_reg);
25712 %}
25713
25714 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25715 %{
25716 predicate(false);
25717 match(Set dst (LShiftL src shift));
25718 format %{ "leaq $dst, [$src << $shift]" %}
25719 ins_encode %{
25720 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25721 Register src = $src$$Register;
25722 if (scale == Address::times_2 && src != rbp && src != r13) {
25723 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25724 } else {
25725 __ leaq($dst$$Register, Address(noreg, src, scale));
25726 }
25727 %}
25728 ins_pipe(ialu_reg_reg);
25729 %}
25730
25731 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25732 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25733 // processors with at least partial ALU support for lea
25734 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25735 // beneficial for processors with full ALU support
25736 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25737
25738 peephole
25739 %{
25740 peeppredicate(VM_Version::supports_fast_2op_lea());
25741 peepmatch (addI_rReg);
25742 peepprocedure (lea_coalesce_reg);
25743 peepreplace (leaI_rReg_rReg_peep());
25744 %}
25745
25746 peephole
25747 %{
25748 peeppredicate(VM_Version::supports_fast_2op_lea());
25749 peepmatch (addI_rReg_imm);
25750 peepprocedure (lea_coalesce_imm);
25751 peepreplace (leaI_rReg_immI_peep());
25752 %}
25753
25754 peephole
25755 %{
25756 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25757 VM_Version::is_intel_cascade_lake());
25758 peepmatch (incI_rReg);
25759 peepprocedure (lea_coalesce_imm);
25760 peepreplace (leaI_rReg_immI_peep());
25761 %}
25762
25763 peephole
25764 %{
25765 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25766 VM_Version::is_intel_cascade_lake());
25767 peepmatch (decI_rReg);
25768 peepprocedure (lea_coalesce_imm);
25769 peepreplace (leaI_rReg_immI_peep());
25770 %}
25771
25772 peephole
25773 %{
25774 peeppredicate(VM_Version::supports_fast_2op_lea());
25775 peepmatch (salI_rReg_immI2);
25776 peepprocedure (lea_coalesce_imm);
25777 peepreplace (leaI_rReg_immI2_peep());
25778 %}
25779
25780 peephole
25781 %{
25782 peeppredicate(VM_Version::supports_fast_2op_lea());
25783 peepmatch (addL_rReg);
25784 peepprocedure (lea_coalesce_reg);
25785 peepreplace (leaL_rReg_rReg_peep());
25786 %}
25787
25788 peephole
25789 %{
25790 peeppredicate(VM_Version::supports_fast_2op_lea());
25791 peepmatch (addL_rReg_imm);
25792 peepprocedure (lea_coalesce_imm);
25793 peepreplace (leaL_rReg_immL32_peep());
25794 %}
25795
25796 peephole
25797 %{
25798 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25799 VM_Version::is_intel_cascade_lake());
25800 peepmatch (incL_rReg);
25801 peepprocedure (lea_coalesce_imm);
25802 peepreplace (leaL_rReg_immL32_peep());
25803 %}
25804
25805 peephole
25806 %{
25807 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25808 VM_Version::is_intel_cascade_lake());
25809 peepmatch (decL_rReg);
25810 peepprocedure (lea_coalesce_imm);
25811 peepreplace (leaL_rReg_immL32_peep());
25812 %}
25813
25814 peephole
25815 %{
25816 peeppredicate(VM_Version::supports_fast_2op_lea());
25817 peepmatch (salL_rReg_immI2);
25818 peepprocedure (lea_coalesce_imm);
25819 peepreplace (leaL_rReg_immI2_peep());
25820 %}
25821
25822 peephole
25823 %{
25824 peepmatch (leaPCompressedOopOffset);
25825 peepprocedure (lea_remove_redundant);
25826 %}
25827
25828 peephole
25829 %{
25830 peepmatch (leaP8Narrow);
25831 peepprocedure (lea_remove_redundant);
25832 %}
25833
25834 peephole
25835 %{
25836 peepmatch (leaP32Narrow);
25837 peepprocedure (lea_remove_redundant);
25838 %}
25839
25840 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25841 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25842
25843 //int variant
25844 peephole
25845 %{
25846 peepmatch (testI_reg);
25847 peepprocedure (test_may_remove);
25848 %}
25849
25850 //long variant
25851 peephole
25852 %{
25853 peepmatch (testL_reg);
25854 peepprocedure (test_may_remove);
25855 %}
25856
25857
25858 //----------SMARTSPILL RULES---------------------------------------------------
25859 // These must follow all instruction definitions as they use the names
25860 // defined in the instructions definitions.