1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 // Math.min() # Math.max()
1712 // --------------------------
1713 // ucomis[s/d] #
1714 // ja -> b # a
1715 // jp -> NaN # NaN
1716 // jb -> a # b
1717 // je #
1718 // |-jz -> a | b # a & b
1719 // | -> a #
1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1721 XMMRegister a, XMMRegister b,
1722 XMMRegister xmmt, Register rt,
1723 bool min, bool single) {
1724
1725 Label nan, zero, below, above, done;
1726
1727 if (single)
1728 __ ucomiss(a, b);
1729 else
1730 __ ucomisd(a, b);
1731
1732 if (dst->encoding() != (min ? b : a)->encoding())
1733 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1734 else
1735 __ jccb(Assembler::above, done);
1736
1737 __ jccb(Assembler::parity, nan); // PF=1
1738 __ jccb(Assembler::below, below); // CF=1
1739
1740 // equal
1741 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1742 if (single) {
1743 __ ucomiss(a, xmmt);
1744 __ jccb(Assembler::equal, zero);
1745
1746 __ movflt(dst, a);
1747 __ jmp(done);
1748 }
1749 else {
1750 __ ucomisd(a, xmmt);
1751 __ jccb(Assembler::equal, zero);
1752
1753 __ movdbl(dst, a);
1754 __ jmp(done);
1755 }
1756
1757 __ bind(zero);
1758 if (min)
1759 __ vpor(dst, a, b, Assembler::AVX_128bit);
1760 else
1761 __ vpand(dst, a, b, Assembler::AVX_128bit);
1762
1763 __ jmp(done);
1764
1765 __ bind(above);
1766 if (single)
1767 __ movflt(dst, min ? b : a);
1768 else
1769 __ movdbl(dst, min ? b : a);
1770
1771 __ jmp(done);
1772
1773 __ bind(nan);
1774 if (single) {
1775 __ movl(rt, 0x7fc00000); // Float.NaN
1776 __ movdl(dst, rt);
1777 }
1778 else {
1779 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1780 __ movdq(dst, rt);
1781 }
1782 __ jmp(done);
1783
1784 __ bind(below);
1785 if (single)
1786 __ movflt(dst, min ? a : b);
1787 else
1788 __ movdbl(dst, min ? a : b);
1789
1790 __ bind(done);
1791 }
1792
1793 //=============================================================================
1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1795
1796 int ConstantTable::calculate_table_base_offset() const {
1797 return 0; // absolute addressing, no offset
1798 }
1799
1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1802 ShouldNotReachHere();
1803 }
1804
1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1806 // Empty encoding
1807 }
1808
1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1810 return 0;
1811 }
1812
1813 #ifndef PRODUCT
1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1815 st->print("# MachConstantBaseNode (empty encoding)");
1816 }
1817 #endif
1818
1819
1820 //=============================================================================
1821 #ifndef PRODUCT
1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1823 Compile* C = ra_->C;
1824
1825 int framesize = C->output()->frame_size_in_bytes();
1826 int bangsize = C->output()->bang_size_in_bytes();
1827 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1828 // Remove wordSize for return addr which is already pushed.
1829 framesize -= wordSize;
1830
1831 if (C->output()->need_stack_bang(bangsize)) {
1832 framesize -= wordSize;
1833 st->print("# stack bang (%d bytes)", bangsize);
1834 st->print("\n\t");
1835 st->print("pushq rbp\t# Save rbp");
1836 if (PreserveFramePointer) {
1837 st->print("\n\t");
1838 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1839 }
1840 if (framesize) {
1841 st->print("\n\t");
1842 st->print("subq rsp, #%d\t# Create frame",framesize);
1843 }
1844 } else {
1845 st->print("subq rsp, #%d\t# Create frame",framesize);
1846 st->print("\n\t");
1847 framesize -= wordSize;
1848 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1849 if (PreserveFramePointer) {
1850 st->print("\n\t");
1851 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1852 if (framesize > 0) {
1853 st->print("\n\t");
1854 st->print("addq rbp, #%d", framesize);
1855 }
1856 }
1857 }
1858
1859 if (VerifyStackAtCalls) {
1860 st->print("\n\t");
1861 framesize -= wordSize;
1862 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1863 #ifdef ASSERT
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1939 framesize -= 2*wordSize;
1940
1941 if (framesize) {
1942 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1943 st->print("\t");
1944 }
1945
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2023 static enum RC rc_class(OptoReg::Name reg)
2024 {
2025 if( !OptoReg::is_valid(reg) ) return rc_bad;
2026
2027 if (OptoReg::is_stack(reg)) return rc_stack;
2028
2029 VMReg r = OptoReg::as_VMReg(reg);
2030
2031 if (r->is_Register()) return rc_int;
2032
2033 if (r->is_KRegister()) return rc_kreg;
2034
2035 assert(r->is_XMMRegister(), "must be");
2036 return rc_float;
2037 }
2038
2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2041 int src_hi, int dst_hi, uint ireg, outputStream* st);
2042
2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2044 int stack_offset, int reg, uint ireg, outputStream* st);
2045
2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2047 int dst_offset, uint ireg, outputStream* st) {
2048 if (masm) {
2049 switch (ireg) {
2050 case Op_VecS:
2051 __ movq(Address(rsp, -8), rax);
2052 __ movl(rax, Address(rsp, src_offset));
2053 __ movl(Address(rsp, dst_offset), rax);
2054 __ movq(rax, Address(rsp, -8));
2055 break;
2056 case Op_VecD:
2057 __ pushq(Address(rsp, src_offset));
2058 __ popq (Address(rsp, dst_offset));
2059 break;
2060 case Op_VecX:
2061 __ pushq(Address(rsp, src_offset));
2062 __ popq (Address(rsp, dst_offset));
2063 __ pushq(Address(rsp, src_offset+8));
2064 __ popq (Address(rsp, dst_offset+8));
2065 break;
2066 case Op_VecY:
2067 __ vmovdqu(Address(rsp, -32), xmm0);
2068 __ vmovdqu(xmm0, Address(rsp, src_offset));
2069 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2070 __ vmovdqu(xmm0, Address(rsp, -32));
2071 break;
2072 case Op_VecZ:
2073 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2074 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2075 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2076 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2077 break;
2078 default:
2079 ShouldNotReachHere();
2080 }
2081 #ifndef PRODUCT
2082 } else {
2083 switch (ireg) {
2084 case Op_VecS:
2085 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2086 "movl rax, [rsp + #%d]\n\t"
2087 "movl [rsp + #%d], rax\n\t"
2088 "movq rax, [rsp - #8]",
2089 src_offset, dst_offset);
2090 break;
2091 case Op_VecD:
2092 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2093 "popq [rsp + #%d]",
2094 src_offset, dst_offset);
2095 break;
2096 case Op_VecX:
2097 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2098 "popq [rsp + #%d]\n\t"
2099 "pushq [rsp + #%d]\n\t"
2100 "popq [rsp + #%d]",
2101 src_offset, dst_offset, src_offset+8, dst_offset+8);
2102 break;
2103 case Op_VecY:
2104 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2105 "vmovdqu xmm0, [rsp + #%d]\n\t"
2106 "vmovdqu [rsp + #%d], xmm0\n\t"
2107 "vmovdqu xmm0, [rsp - #32]",
2108 src_offset, dst_offset);
2109 break;
2110 case Op_VecZ:
2111 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2112 "vmovdqu xmm0, [rsp + #%d]\n\t"
2113 "vmovdqu [rsp + #%d], xmm0\n\t"
2114 "vmovdqu xmm0, [rsp - #64]",
2115 src_offset, dst_offset);
2116 break;
2117 default:
2118 ShouldNotReachHere();
2119 }
2120 #endif
2121 }
2122 }
2123
2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2125 PhaseRegAlloc* ra_,
2126 bool do_size,
2127 outputStream* st) const {
2128 assert(masm != nullptr || st != nullptr, "sanity");
2129 // Get registers to move
2130 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2131 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2132 OptoReg::Name dst_second = ra_->get_reg_second(this);
2133 OptoReg::Name dst_first = ra_->get_reg_first(this);
2134
2135 enum RC src_second_rc = rc_class(src_second);
2136 enum RC src_first_rc = rc_class(src_first);
2137 enum RC dst_second_rc = rc_class(dst_second);
2138 enum RC dst_first_rc = rc_class(dst_first);
2139
2140 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2141 "must move at least 1 register" );
2142
2143 if (src_first == dst_first && src_second == dst_second) {
2144 // Self copy, no move
2145 return 0;
2146 }
2147 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2148 uint ireg = ideal_reg();
2149 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2150 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2151 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2152 // mem -> mem
2153 int src_offset = ra_->reg2offset(src_first);
2154 int dst_offset = ra_->reg2offset(dst_first);
2155 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2156 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2157 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2158 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2159 int stack_offset = ra_->reg2offset(dst_first);
2160 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2161 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2162 int stack_offset = ra_->reg2offset(src_first);
2163 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2164 } else {
2165 ShouldNotReachHere();
2166 }
2167 return 0;
2168 }
2169 if (src_first_rc == rc_stack) {
2170 // mem ->
2171 if (dst_first_rc == rc_stack) {
2172 // mem -> mem
2173 assert(src_second != dst_first, "overlap");
2174 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2175 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2176 // 64-bit
2177 int src_offset = ra_->reg2offset(src_first);
2178 int dst_offset = ra_->reg2offset(dst_first);
2179 if (masm) {
2180 __ pushq(Address(rsp, src_offset));
2181 __ popq (Address(rsp, dst_offset));
2182 #ifndef PRODUCT
2183 } else {
2184 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2185 "popq [rsp + #%d]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 } else {
2190 // 32-bit
2191 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2192 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2193 // No pushl/popl, so:
2194 int src_offset = ra_->reg2offset(src_first);
2195 int dst_offset = ra_->reg2offset(dst_first);
2196 if (masm) {
2197 __ movq(Address(rsp, -8), rax);
2198 __ movl(rax, Address(rsp, src_offset));
2199 __ movl(Address(rsp, dst_offset), rax);
2200 __ movq(rax, Address(rsp, -8));
2201 #ifndef PRODUCT
2202 } else {
2203 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2204 "movl rax, [rsp + #%d]\n\t"
2205 "movl [rsp + #%d], rax\n\t"
2206 "movq rax, [rsp - #8]",
2207 src_offset, dst_offset);
2208 #endif
2209 }
2210 }
2211 return 0;
2212 } else if (dst_first_rc == rc_int) {
2213 // mem -> gpr
2214 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2215 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2216 // 64-bit
2217 int offset = ra_->reg2offset(src_first);
2218 if (masm) {
2219 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2220 #ifndef PRODUCT
2221 } else {
2222 st->print("movq %s, [rsp + #%d]\t# spill",
2223 Matcher::regName[dst_first],
2224 offset);
2225 #endif
2226 }
2227 } else {
2228 // 32-bit
2229 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2230 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2231 int offset = ra_->reg2offset(src_first);
2232 if (masm) {
2233 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2234 #ifndef PRODUCT
2235 } else {
2236 st->print("movl %s, [rsp + #%d]\t# spill",
2237 Matcher::regName[dst_first],
2238 offset);
2239 #endif
2240 }
2241 }
2242 return 0;
2243 } else if (dst_first_rc == rc_float) {
2244 // mem-> xmm
2245 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2246 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2247 // 64-bit
2248 int offset = ra_->reg2offset(src_first);
2249 if (masm) {
2250 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2251 #ifndef PRODUCT
2252 } else {
2253 st->print("%s %s, [rsp + #%d]\t# spill",
2254 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2255 Matcher::regName[dst_first],
2256 offset);
2257 #endif
2258 }
2259 } else {
2260 // 32-bit
2261 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2262 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("movss %s, [rsp + #%d]\t# spill",
2269 Matcher::regName[dst_first],
2270 offset);
2271 #endif
2272 }
2273 }
2274 return 0;
2275 } else if (dst_first_rc == rc_kreg) {
2276 // mem -> kreg
2277 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2278 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2279 // 64-bit
2280 int offset = ra_->reg2offset(src_first);
2281 if (masm) {
2282 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2283 #ifndef PRODUCT
2284 } else {
2285 st->print("kmovq %s, [rsp + #%d]\t# spill",
2286 Matcher::regName[dst_first],
2287 offset);
2288 #endif
2289 }
2290 }
2291 return 0;
2292 }
2293 } else if (src_first_rc == rc_int) {
2294 // gpr ->
2295 if (dst_first_rc == rc_stack) {
2296 // gpr -> mem
2297 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2298 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2299 // 64-bit
2300 int offset = ra_->reg2offset(dst_first);
2301 if (masm) {
2302 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2303 #ifndef PRODUCT
2304 } else {
2305 st->print("movq [rsp + #%d], %s\t# spill",
2306 offset,
2307 Matcher::regName[src_first]);
2308 #endif
2309 }
2310 } else {
2311 // 32-bit
2312 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2313 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2314 int offset = ra_->reg2offset(dst_first);
2315 if (masm) {
2316 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2317 #ifndef PRODUCT
2318 } else {
2319 st->print("movl [rsp + #%d], %s\t# spill",
2320 offset,
2321 Matcher::regName[src_first]);
2322 #endif
2323 }
2324 }
2325 return 0;
2326 } else if (dst_first_rc == rc_int) {
2327 // gpr -> gpr
2328 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2329 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2330 // 64-bit
2331 if (masm) {
2332 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2333 as_Register(Matcher::_regEncode[src_first]));
2334 #ifndef PRODUCT
2335 } else {
2336 st->print("movq %s, %s\t# spill",
2337 Matcher::regName[dst_first],
2338 Matcher::regName[src_first]);
2339 #endif
2340 }
2341 return 0;
2342 } else {
2343 // 32-bit
2344 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2345 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2346 if (masm) {
2347 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movl %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 }
2358 } else if (dst_first_rc == rc_float) {
2359 // gpr -> xmm
2360 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2361 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2362 // 64-bit
2363 if (masm) {
2364 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2365 #ifndef PRODUCT
2366 } else {
2367 st->print("movdq %s, %s\t# spill",
2368 Matcher::regName[dst_first],
2369 Matcher::regName[src_first]);
2370 #endif
2371 }
2372 } else {
2373 // 32-bit
2374 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2375 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2376 if (masm) {
2377 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2378 #ifndef PRODUCT
2379 } else {
2380 st->print("movdl %s, %s\t# spill",
2381 Matcher::regName[dst_first],
2382 Matcher::regName[src_first]);
2383 #endif
2384 }
2385 }
2386 return 0;
2387 } else if (dst_first_rc == rc_kreg) {
2388 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2389 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2390 // 64-bit
2391 if (masm) {
2392 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("kmovq %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 Unimplemented();
2402 return 0;
2403 }
2404 } else if (src_first_rc == rc_float) {
2405 // xmm ->
2406 if (dst_first_rc == rc_stack) {
2407 // xmm -> mem
2408 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2409 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2410 // 64-bit
2411 int offset = ra_->reg2offset(dst_first);
2412 if (masm) {
2413 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2414 #ifndef PRODUCT
2415 } else {
2416 st->print("movsd [rsp + #%d], %s\t# spill",
2417 offset,
2418 Matcher::regName[src_first]);
2419 #endif
2420 }
2421 } else {
2422 // 32-bit
2423 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2424 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2425 int offset = ra_->reg2offset(dst_first);
2426 if (masm) {
2427 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2428 #ifndef PRODUCT
2429 } else {
2430 st->print("movss [rsp + #%d], %s\t# spill",
2431 offset,
2432 Matcher::regName[src_first]);
2433 #endif
2434 }
2435 }
2436 return 0;
2437 } else if (dst_first_rc == rc_int) {
2438 // xmm -> gpr
2439 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2440 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2441 // 64-bit
2442 if (masm) {
2443 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2444 #ifndef PRODUCT
2445 } else {
2446 st->print("movdq %s, %s\t# spill",
2447 Matcher::regName[dst_first],
2448 Matcher::regName[src_first]);
2449 #endif
2450 }
2451 } else {
2452 // 32-bit
2453 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2454 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2455 if (masm) {
2456 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2457 #ifndef PRODUCT
2458 } else {
2459 st->print("movdl %s, %s\t# spill",
2460 Matcher::regName[dst_first],
2461 Matcher::regName[src_first]);
2462 #endif
2463 }
2464 }
2465 return 0;
2466 } else if (dst_first_rc == rc_float) {
2467 // xmm -> xmm
2468 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2469 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2470 // 64-bit
2471 if (masm) {
2472 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2473 #ifndef PRODUCT
2474 } else {
2475 st->print("%s %s, %s\t# spill",
2476 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2477 Matcher::regName[dst_first],
2478 Matcher::regName[src_first]);
2479 #endif
2480 }
2481 } else {
2482 // 32-bit
2483 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2484 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2485 if (masm) {
2486 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2487 #ifndef PRODUCT
2488 } else {
2489 st->print("%s %s, %s\t# spill",
2490 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2491 Matcher::regName[dst_first],
2492 Matcher::regName[src_first]);
2493 #endif
2494 }
2495 }
2496 return 0;
2497 } else if (dst_first_rc == rc_kreg) {
2498 assert(false, "Illegal spilling");
2499 return 0;
2500 }
2501 } else if (src_first_rc == rc_kreg) {
2502 if (dst_first_rc == rc_stack) {
2503 // mem -> kreg
2504 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2505 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2506 // 64-bit
2507 int offset = ra_->reg2offset(dst_first);
2508 if (masm) {
2509 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2510 #ifndef PRODUCT
2511 } else {
2512 st->print("kmovq [rsp + #%d] , %s\t# spill",
2513 offset,
2514 Matcher::regName[src_first]);
2515 #endif
2516 }
2517 }
2518 return 0;
2519 } else if (dst_first_rc == rc_int) {
2520 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2521 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2522 // 64-bit
2523 if (masm) {
2524 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq %s, %s\t# spill",
2528 Matcher::regName[dst_first],
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 Unimplemented();
2534 return 0;
2535 } else if (dst_first_rc == rc_kreg) {
2536 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2537 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2538 // 64-bit
2539 if (masm) {
2540 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2541 #ifndef PRODUCT
2542 } else {
2543 st->print("kmovq %s, %s\t# spill",
2544 Matcher::regName[dst_first],
2545 Matcher::regName[src_first]);
2546 #endif
2547 }
2548 }
2549 return 0;
2550 } else if (dst_first_rc == rc_float) {
2551 assert(false, "Illegal spill");
2552 return 0;
2553 }
2554 }
2555
2556 assert(0," foo ");
2557 Unimplemented();
2558 return 0;
2559 }
2560
2561 #ifndef PRODUCT
2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2563 implementation(nullptr, ra_, false, st);
2564 }
2565 #endif
2566
2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2568 implementation(masm, ra_, false, nullptr);
2569 }
2570
2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2572 return MachNode::size(ra_);
2573 }
2574
2575 //=============================================================================
2576 #ifndef PRODUCT
2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2578 {
2579 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2580 int reg = ra_->get_reg_first(this);
2581 st->print("leaq %s, [rsp + #%d]\t# box lock",
2582 Matcher::regName[reg], offset);
2583 }
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
2650
2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2652 int oper_index) {
2653 if (mdef == nullptr) {
2654 return false;
2655 }
2656
2657 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2658 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2659 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2660 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2661 return false;
2662 }
2663
2664 // Complex memory operand covers multiple incoming edges needed for
2665 // address computation. Biasing def towards any address component will not
2666 // result in NDD demotion by assembler.
2667 if (mdef->operand_num_edges(oper_index) != 1) {
2668 return false;
2669 }
2670
2671 // Demotion candidate must be register mask compatible with definition.
2672 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2673 if (!oper_mask.overlap(mdef->out_RegMask())) {
2674 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2675 return false;
2676 }
2677
2678 switch (oper_index) {
2679 // First operand of MachNode corresponding to Intel APX NDD selection
2680 // pattern can share its assigned register with definition operand if
2681 // their live ranges do not overlap. In such a scenario we can demote
2682 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2683 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2684 // are decorated with a special flag by instruction selector.
2685 case 1:
2686 return is_ndd_demotable_opr1(mdef);
2687
2688 // Definition operand of commutative operation can be biased towards second
2689 // operand.
2690 case 2:
2691 return is_ndd_demotable_opr2(mdef);
2692
2693 // Current scheme only selects up to two biasing candidates
2694 default:
2695 assert(false, "unhandled operand index: %s", mdef->Name());
2696 break;
2697 }
2698
2699 return false;
2700 }
2701
2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2703 assert(EnableVectorSupport, "sanity");
2704 int lo = XMM0_num;
2705 int hi = XMM0b_num;
2706 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2707 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2708 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2709 return OptoRegPair(hi, lo);
2710 }
2711
2712 // Is this branch offset short enough that a short branch can be used?
2713 //
2714 // NOTE: If the platform does not provide any short branch variants, then
2715 // this method should return false for offset 0.
2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2717 // The passed offset is relative to address of the branch.
2718 // On 86 a branch displacement is calculated relative to address
2719 // of a next instruction.
2720 offset -= br_size;
2721
2722 // the short version of jmpConUCF2 contains multiple branches,
2723 // making the reach slightly less
2724 if (rule == jmpConUCF2_rule)
2725 return (-126 <= offset && offset <= 125);
2726 return (-128 <= offset && offset <= 127);
2727 }
2728
2729 // Return whether or not this register is ever used as an argument.
2730 // This function is used on startup to build the trampoline stubs in
2731 // generateOptoStub. Registers not mentioned will be killed by the VM
2732 // call in the trampoline, and arguments in those registers not be
2733 // available to the callee.
2734 bool Matcher::can_be_java_arg(int reg)
2735 {
2736 return
2737 reg == RDI_num || reg == RDI_H_num ||
2738 reg == RSI_num || reg == RSI_H_num ||
2739 reg == RDX_num || reg == RDX_H_num ||
2740 reg == RCX_num || reg == RCX_H_num ||
2741 reg == R8_num || reg == R8_H_num ||
2742 reg == R9_num || reg == R9_H_num ||
2743 reg == R12_num || reg == R12_H_num ||
2744 reg == XMM0_num || reg == XMM0b_num ||
2745 reg == XMM1_num || reg == XMM1b_num ||
2746 reg == XMM2_num || reg == XMM2b_num ||
2747 reg == XMM3_num || reg == XMM3b_num ||
2748 reg == XMM4_num || reg == XMM4b_num ||
2749 reg == XMM5_num || reg == XMM5b_num ||
2750 reg == XMM6_num || reg == XMM6b_num ||
2751 reg == XMM7_num || reg == XMM7b_num;
2752 }
2753
2754 bool Matcher::is_spillable_arg(int reg)
2755 {
2756 return can_be_java_arg(reg);
2757 }
2758
2759 uint Matcher::int_pressure_limit()
2760 {
2761 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2762 }
2763
2764 uint Matcher::float_pressure_limit()
2765 {
2766 // After experiment around with different values, the following default threshold
2767 // works best for LCM's register pressure scheduling on x64.
2768 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2769 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2770 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2771 }
2772
2773 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2774 // In 64 bit mode a code which use multiply when
2775 // devisor is constant is faster than hardware
2776 // DIV instruction (it uses MulHiL).
2777 return false;
2778 }
2779
2780 // Register for DIVI projection of divmodI
2781 const RegMask& Matcher::divI_proj_mask() {
2782 return INT_RAX_REG_mask();
2783 }
2784
2785 // Register for MODI projection of divmodI
2786 const RegMask& Matcher::modI_proj_mask() {
2787 return INT_RDX_REG_mask();
2788 }
2789
2790 // Register for DIVL projection of divmodL
2791 const RegMask& Matcher::divL_proj_mask() {
2792 return LONG_RAX_REG_mask();
2793 }
2794
2795 // Register for MODL projection of divmodL
2796 const RegMask& Matcher::modL_proj_mask() {
2797 return LONG_RDX_REG_mask();
2798 }
2799
2800 %}
2801
2802 source_hpp %{
2803 // Header information of the source block.
2804 // Method declarations/definitions which are used outside
2805 // the ad-scope can conveniently be defined here.
2806 //
2807 // To keep related declarations/definitions/uses close together,
2808 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2809
2810 #include "runtime/vm_version.hpp"
2811
2812 class NativeJump;
2813
2814 class CallStubImpl {
2815
2816 //--------------------------------------------------------------
2817 //---< Used for optimization in Compile::shorten_branches >---
2818 //--------------------------------------------------------------
2819
2820 public:
2821 // Size of call trampoline stub.
2822 static uint size_call_trampoline() {
2823 return 0; // no call trampolines on this platform
2824 }
2825
2826 // number of relocations needed by a call trampoline stub
2827 static uint reloc_call_trampoline() {
2828 return 0; // no call trampolines on this platform
2829 }
2830 };
2831
2832 class HandlerImpl {
2833
2834 public:
2835
2836 static int emit_deopt_handler(C2_MacroAssembler* masm);
2837
2838 static uint size_deopt_handler() {
2839 // one call and one jmp.
2840 return 7;
2841 }
2842 };
2843
2844 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2845 switch(bytes) {
2846 case 4: // fall-through
2847 case 8: // fall-through
2848 case 16: return Assembler::AVX_128bit;
2849 case 32: return Assembler::AVX_256bit;
2850 case 64: return Assembler::AVX_512bit;
2851
2852 default: {
2853 ShouldNotReachHere();
2854 return Assembler::AVX_NoVec;
2855 }
2856 }
2857 }
2858
2859 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2860 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2861 }
2862
2863 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2864 uint def_idx = use->operand_index(opnd);
2865 Node* def = use->in(def_idx);
2866 return vector_length_encoding(def);
2867 }
2868
2869 static inline bool is_vector_popcount_predicate(BasicType bt) {
2870 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2871 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2872 }
2873
2874 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2875 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2876 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2877 }
2878
2879 class Node::PD {
2880 public:
2881 enum NodeFlags : uint64_t {
2882 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2883 Flag_sets_carry_flag = Node::_last_flag << 2,
2884 Flag_sets_parity_flag = Node::_last_flag << 3,
2885 Flag_sets_zero_flag = Node::_last_flag << 4,
2886 Flag_sets_overflow_flag = Node::_last_flag << 5,
2887 Flag_sets_sign_flag = Node::_last_flag << 6,
2888 Flag_clears_carry_flag = Node::_last_flag << 7,
2889 Flag_clears_parity_flag = Node::_last_flag << 8,
2890 Flag_clears_zero_flag = Node::_last_flag << 9,
2891 Flag_clears_overflow_flag = Node::_last_flag << 10,
2892 Flag_clears_sign_flag = Node::_last_flag << 11,
2893 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2894 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2895 _last_flag = Flag_ndd_demotable_opr2
2896 };
2897 };
2898
2899 %} // end source_hpp
2900
2901 source %{
2902
2903 #include "opto/addnode.hpp"
2904 #include "c2_intelJccErratum_x86.hpp"
2905
2906 void PhaseOutput::pd_perform_mach_node_analysis() {
2907 if (VM_Version::has_intel_jcc_erratum()) {
2908 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2909 _buf_sizes._code += extra_padding;
2910 }
2911 }
2912
2913 int MachNode::pd_alignment_required() const {
2914 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2915 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2916 return IntelJccErratum::largest_jcc_size() + 1;
2917 } else {
2918 return 1;
2919 }
2920 }
2921
2922 int MachNode::compute_padding(int current_offset) const {
2923 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2924 Compile* C = Compile::current();
2925 PhaseOutput* output = C->output();
2926 Block* block = output->block();
2927 int index = output->index();
2928 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2929 } else {
2930 return 0;
2931 }
2932 }
2933
2934 // Emit deopt handler code.
2935 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2936
2937 // Note that the code buffer's insts_mark is always relative to insts.
2938 // That's why we must use the macroassembler to generate a handler.
2939 address base = __ start_a_stub(size_deopt_handler());
2940 if (base == nullptr) {
2941 ciEnv::current()->record_failure("CodeCache is full");
2942 return 0; // CodeBuffer::expand failed
2943 }
2944 int offset = __ offset();
2945
2946 Label start;
2947 __ bind(start);
2948
2949 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2950
2951 int entry_offset = __ offset();
2952
2953 __ jmp(start);
2954
2955 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2956 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2957 "out of bounds read in post-call NOP check");
2958 __ end_a_stub();
2959 return entry_offset;
2960 }
2961
2962 static Assembler::Width widthForType(BasicType bt) {
2963 if (bt == T_BYTE) {
2964 return Assembler::B;
2965 } else if (bt == T_SHORT) {
2966 return Assembler::W;
2967 } else if (bt == T_INT) {
2968 return Assembler::D;
2969 } else {
2970 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2971 return Assembler::Q;
2972 }
2973 }
2974
2975 //=============================================================================
2976
2977 // Float masks come from different places depending on platform.
2978 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2979 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2980 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2981 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2982 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2983 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2984 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2985 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2986 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2987 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2988 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2989 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2990 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2991 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2992 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2993 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2994 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2995 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2996 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2997
2998 //=============================================================================
2999 bool Matcher::match_rule_supported(int opcode) {
3000 if (!has_match_rule(opcode)) {
3001 return false; // no match rule present
3002 }
3003 switch (opcode) {
3004 case Op_AbsVL:
3005 case Op_StoreVectorScatter:
3006 if (UseAVX < 3) {
3007 return false;
3008 }
3009 break;
3010 case Op_PopCountI:
3011 case Op_PopCountL:
3012 if (!UsePopCountInstruction) {
3013 return false;
3014 }
3015 break;
3016 case Op_PopCountVI:
3017 if (UseAVX < 2) {
3018 return false;
3019 }
3020 break;
3021 case Op_CompressV:
3022 case Op_ExpandV:
3023 case Op_PopCountVL:
3024 if (UseAVX < 2) {
3025 return false;
3026 }
3027 break;
3028 case Op_MulVI:
3029 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3030 return false;
3031 }
3032 break;
3033 case Op_MulVL:
3034 if (UseSSE < 4) { // only with SSE4_1 or AVX
3035 return false;
3036 }
3037 break;
3038 case Op_MulReductionVL:
3039 if (VM_Version::supports_avx512dq() == false) {
3040 return false;
3041 }
3042 break;
3043 case Op_AbsVB:
3044 case Op_AbsVS:
3045 case Op_AbsVI:
3046 case Op_AddReductionVI:
3047 case Op_AndReductionV:
3048 case Op_OrReductionV:
3049 case Op_XorReductionV:
3050 if (UseSSE < 3) { // requires at least SSSE3
3051 return false;
3052 }
3053 break;
3054 case Op_MaxHF:
3055 case Op_MinHF:
3056 if (!VM_Version::supports_avx512vlbw()) {
3057 return false;
3058 } // fallthrough
3059 case Op_AddHF:
3060 case Op_DivHF:
3061 case Op_FmaHF:
3062 case Op_MulHF:
3063 case Op_ReinterpretS2HF:
3064 case Op_ReinterpretHF2S:
3065 case Op_SubHF:
3066 case Op_SqrtHF:
3067 if (!VM_Version::supports_avx512_fp16()) {
3068 return false;
3069 }
3070 break;
3071 case Op_VectorLoadShuffle:
3072 case Op_VectorRearrange:
3073 case Op_MulReductionVI:
3074 if (UseSSE < 4) { // requires at least SSE4
3075 return false;
3076 }
3077 break;
3078 case Op_IsInfiniteF:
3079 case Op_IsInfiniteD:
3080 if (!VM_Version::supports_avx512dq()) {
3081 return false;
3082 }
3083 break;
3084 case Op_SqrtVD:
3085 case Op_SqrtVF:
3086 case Op_VectorMaskCmp:
3087 case Op_VectorCastB2X:
3088 case Op_VectorCastS2X:
3089 case Op_VectorCastI2X:
3090 case Op_VectorCastL2X:
3091 case Op_VectorCastF2X:
3092 case Op_VectorCastD2X:
3093 case Op_VectorUCastB2X:
3094 case Op_VectorUCastS2X:
3095 case Op_VectorUCastI2X:
3096 case Op_VectorMaskCast:
3097 if (UseAVX < 1) { // enabled for AVX only
3098 return false;
3099 }
3100 break;
3101 case Op_PopulateIndex:
3102 if (UseAVX < 2) {
3103 return false;
3104 }
3105 break;
3106 case Op_RoundVF:
3107 if (UseAVX < 2) { // enabled for AVX2 only
3108 return false;
3109 }
3110 break;
3111 case Op_RoundVD:
3112 if (UseAVX < 3) {
3113 return false; // enabled for AVX3 only
3114 }
3115 break;
3116 case Op_CompareAndSwapL:
3117 case Op_CompareAndSwapP:
3118 break;
3119 case Op_StrIndexOf:
3120 if (!UseSSE42Intrinsics) {
3121 return false;
3122 }
3123 break;
3124 case Op_StrIndexOfChar:
3125 if (!UseSSE42Intrinsics) {
3126 return false;
3127 }
3128 break;
3129 case Op_OnSpinWait:
3130 if (VM_Version::supports_on_spin_wait() == false) {
3131 return false;
3132 }
3133 break;
3134 case Op_MulVB:
3135 case Op_LShiftVB:
3136 case Op_RShiftVB:
3137 case Op_URShiftVB:
3138 case Op_VectorInsert:
3139 case Op_VectorLoadMask:
3140 case Op_VectorStoreMask:
3141 case Op_VectorBlend:
3142 if (UseSSE < 4) {
3143 return false;
3144 }
3145 break;
3146 case Op_MaxD:
3147 case Op_MaxF:
3148 case Op_MinD:
3149 case Op_MinF:
3150 if (UseAVX < 1) { // enabled for AVX only
3151 return false;
3152 }
3153 break;
3154 case Op_CacheWB:
3155 case Op_CacheWBPreSync:
3156 case Op_CacheWBPostSync:
3157 if (!VM_Version::supports_data_cache_line_flush()) {
3158 return false;
3159 }
3160 break;
3161 case Op_ExtractB:
3162 case Op_ExtractL:
3163 case Op_ExtractI:
3164 case Op_RoundDoubleMode:
3165 if (UseSSE < 4) {
3166 return false;
3167 }
3168 break;
3169 case Op_RoundDoubleModeV:
3170 if (VM_Version::supports_avx() == false) {
3171 return false; // 128bit vroundpd is not available
3172 }
3173 break;
3174 case Op_LoadVectorGather:
3175 case Op_LoadVectorGatherMasked:
3176 if (UseAVX < 2) {
3177 return false;
3178 }
3179 break;
3180 case Op_FmaF:
3181 case Op_FmaD:
3182 case Op_FmaVD:
3183 case Op_FmaVF:
3184 if (!UseFMA) {
3185 return false;
3186 }
3187 break;
3188 case Op_MacroLogicV:
3189 if (UseAVX < 3 || !UseVectorMacroLogic) {
3190 return false;
3191 }
3192 break;
3193
3194 case Op_VectorCmpMasked:
3195 case Op_VectorMaskGen:
3196 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3197 return false;
3198 }
3199 break;
3200 case Op_VectorMaskFirstTrue:
3201 case Op_VectorMaskLastTrue:
3202 case Op_VectorMaskTrueCount:
3203 case Op_VectorMaskToLong:
3204 if (UseAVX < 1) {
3205 return false;
3206 }
3207 break;
3208 case Op_RoundF:
3209 case Op_RoundD:
3210 break;
3211 case Op_CopySignD:
3212 case Op_CopySignF:
3213 if (UseAVX < 3) {
3214 return false;
3215 }
3216 if (!VM_Version::supports_avx512vl()) {
3217 return false;
3218 }
3219 break;
3220 case Op_CompressBits:
3221 case Op_ExpandBits:
3222 if (!VM_Version::supports_bmi2()) {
3223 return false;
3224 }
3225 break;
3226 case Op_CompressM:
3227 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3228 return false;
3229 }
3230 break;
3231 case Op_ConvF2HF:
3232 case Op_ConvHF2F:
3233 if (!VM_Version::supports_float16()) {
3234 return false;
3235 }
3236 break;
3237 case Op_VectorCastF2HF:
3238 case Op_VectorCastHF2F:
3239 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3240 return false;
3241 }
3242 break;
3243 }
3244 return true; // Match rules are supported by default.
3245 }
3246
3247 //------------------------------------------------------------------------
3248
3249 static inline bool is_pop_count_instr_target(BasicType bt) {
3250 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3251 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3252 }
3253
3254 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3255 return match_rule_supported_vector(opcode, vlen, bt);
3256 }
3257
3258 // Identify extra cases that we might want to provide match rules for vector nodes and
3259 // other intrinsics guarded with vector length (vlen) and element type (bt).
3260 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3261 if (!match_rule_supported(opcode)) {
3262 return false;
3263 }
3264 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3265 // * SSE2 supports 128bit vectors for all types;
3266 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3267 // * AVX2 supports 256bit vectors for all types;
3268 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3269 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3270 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3271 // And MaxVectorSize is taken into account as well.
3272 if (!vector_size_supported(bt, vlen)) {
3273 return false;
3274 }
3275 // Special cases which require vector length follow:
3276 // * implementation limitations
3277 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3278 // * 128bit vroundpd instruction is present only in AVX1
3279 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3280 switch (opcode) {
3281 case Op_MaxVHF:
3282 case Op_MinVHF:
3283 if (!VM_Version::supports_avx512bw()) {
3284 return false;
3285 }
3286 case Op_AddVHF:
3287 case Op_DivVHF:
3288 case Op_FmaVHF:
3289 case Op_MulVHF:
3290 case Op_SubVHF:
3291 case Op_SqrtVHF:
3292 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3293 return false;
3294 }
3295 if (!VM_Version::supports_avx512_fp16()) {
3296 return false;
3297 }
3298 break;
3299 case Op_AbsVF:
3300 case Op_NegVF:
3301 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3302 return false; // 512bit vandps and vxorps are not available
3303 }
3304 break;
3305 case Op_AbsVD:
3306 case Op_NegVD:
3307 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3308 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3309 }
3310 break;
3311 case Op_RotateRightV:
3312 case Op_RotateLeftV:
3313 if (bt != T_INT && bt != T_LONG) {
3314 return false;
3315 } // fallthrough
3316 case Op_MacroLogicV:
3317 if (!VM_Version::supports_evex() ||
3318 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3319 return false;
3320 }
3321 break;
3322 case Op_ClearArray:
3323 case Op_VectorMaskGen:
3324 case Op_VectorCmpMasked:
3325 if (!VM_Version::supports_avx512bw()) {
3326 return false;
3327 }
3328 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3329 return false;
3330 }
3331 break;
3332 case Op_LoadVectorMasked:
3333 case Op_StoreVectorMasked:
3334 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3335 return false;
3336 }
3337 break;
3338 case Op_UMinV:
3339 case Op_UMaxV:
3340 if (UseAVX == 0) {
3341 return false;
3342 }
3343 break;
3344 case Op_MaxV:
3345 case Op_MinV:
3346 if (UseSSE < 4 && is_integral_type(bt)) {
3347 return false;
3348 }
3349 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3350 // Float/Double intrinsics are enabled for AVX family currently.
3351 if (UseAVX == 0) {
3352 return false;
3353 }
3354 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3355 return false;
3356 }
3357 }
3358 break;
3359 case Op_CallLeafVector:
3360 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3361 return false;
3362 }
3363 break;
3364 case Op_AddReductionVI:
3365 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3366 return false;
3367 }
3368 // fallthrough
3369 case Op_AndReductionV:
3370 case Op_OrReductionV:
3371 case Op_XorReductionV:
3372 if (is_subword_type(bt) && (UseSSE < 4)) {
3373 return false;
3374 }
3375 break;
3376 case Op_MinReductionV:
3377 case Op_MaxReductionV:
3378 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3379 return false;
3380 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3381 return false;
3382 }
3383 // Float/Double intrinsics enabled for AVX family.
3384 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3385 return false;
3386 }
3387 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3388 return false;
3389 }
3390 break;
3391 case Op_VectorBlend:
3392 if (UseAVX == 0 && size_in_bits < 128) {
3393 return false;
3394 }
3395 break;
3396 case Op_VectorTest:
3397 if (UseSSE < 4) {
3398 return false; // Implementation limitation
3399 } else if (size_in_bits < 32) {
3400 return false; // Implementation limitation
3401 }
3402 break;
3403 case Op_VectorLoadShuffle:
3404 case Op_VectorRearrange:
3405 if(vlen == 2) {
3406 return false; // Implementation limitation due to how shuffle is loaded
3407 } else if (size_in_bits == 256 && UseAVX < 2) {
3408 return false; // Implementation limitation
3409 }
3410 break;
3411 case Op_VectorLoadMask:
3412 case Op_VectorMaskCast:
3413 if (size_in_bits == 256 && UseAVX < 2) {
3414 return false; // Implementation limitation
3415 }
3416 // fallthrough
3417 case Op_VectorStoreMask:
3418 if (vlen == 2) {
3419 return false; // Implementation limitation
3420 }
3421 break;
3422 case Op_PopulateIndex:
3423 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3424 return false;
3425 }
3426 break;
3427 case Op_VectorCastB2X:
3428 case Op_VectorCastS2X:
3429 case Op_VectorCastI2X:
3430 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3431 return false;
3432 }
3433 break;
3434 case Op_VectorCastL2X:
3435 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3436 return false;
3437 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3438 return false;
3439 }
3440 break;
3441 case Op_VectorCastF2X: {
3442 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3443 // happen after intermediate conversion to integer and special handling
3444 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3445 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3446 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3447 return false;
3448 }
3449 }
3450 // fallthrough
3451 case Op_VectorCastD2X:
3452 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3453 return false;
3454 }
3455 break;
3456 case Op_VectorCastF2HF:
3457 case Op_VectorCastHF2F:
3458 if (!VM_Version::supports_f16c() &&
3459 ((!VM_Version::supports_evex() ||
3460 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3461 return false;
3462 }
3463 break;
3464 case Op_RoundVD:
3465 if (!VM_Version::supports_avx512dq()) {
3466 return false;
3467 }
3468 break;
3469 case Op_MulReductionVI:
3470 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3471 return false;
3472 }
3473 break;
3474 case Op_LoadVectorGatherMasked:
3475 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3476 return false;
3477 }
3478 if (is_subword_type(bt) &&
3479 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3480 (size_in_bits < 64) ||
3481 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3482 return false;
3483 }
3484 break;
3485 case Op_StoreVectorScatterMasked:
3486 case Op_StoreVectorScatter:
3487 if (is_subword_type(bt)) {
3488 return false;
3489 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3490 return false;
3491 }
3492 // fallthrough
3493 case Op_LoadVectorGather:
3494 if (!is_subword_type(bt) && size_in_bits == 64) {
3495 return false;
3496 }
3497 if (is_subword_type(bt) && size_in_bits < 64) {
3498 return false;
3499 }
3500 break;
3501 case Op_SaturatingAddV:
3502 case Op_SaturatingSubV:
3503 if (UseAVX < 1) {
3504 return false; // Implementation limitation
3505 }
3506 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3507 return false;
3508 }
3509 break;
3510 case Op_SelectFromTwoVector:
3511 if (size_in_bits < 128) {
3512 return false;
3513 }
3514 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3515 return false;
3516 }
3517 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3518 return false;
3519 }
3520 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3521 return false;
3522 }
3523 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3524 return false;
3525 }
3526 break;
3527 case Op_MaskAll:
3528 if (!VM_Version::supports_evex()) {
3529 return false;
3530 }
3531 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3532 return false;
3533 }
3534 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3535 return false;
3536 }
3537 break;
3538 case Op_VectorMaskCmp:
3539 if (vlen < 2 || size_in_bits < 32) {
3540 return false;
3541 }
3542 break;
3543 case Op_CompressM:
3544 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3545 return false;
3546 }
3547 break;
3548 case Op_CompressV:
3549 case Op_ExpandV:
3550 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3551 return false;
3552 }
3553 if (size_in_bits < 128 ) {
3554 return false;
3555 }
3556 case Op_VectorLongToMask:
3557 if (UseAVX < 1) {
3558 return false;
3559 }
3560 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3561 return false;
3562 }
3563 break;
3564 case Op_SignumVD:
3565 case Op_SignumVF:
3566 if (UseAVX < 1) {
3567 return false;
3568 }
3569 break;
3570 case Op_PopCountVI:
3571 case Op_PopCountVL: {
3572 if (!is_pop_count_instr_target(bt) &&
3573 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3574 return false;
3575 }
3576 }
3577 break;
3578 case Op_ReverseV:
3579 case Op_ReverseBytesV:
3580 if (UseAVX < 2) {
3581 return false;
3582 }
3583 break;
3584 case Op_CountTrailingZerosV:
3585 case Op_CountLeadingZerosV:
3586 if (UseAVX < 2) {
3587 return false;
3588 }
3589 break;
3590 }
3591 return true; // Per default match rules are supported.
3592 }
3593
3594 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3595 // ADLC based match_rule_supported routine checks for the existence of pattern based
3596 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3597 // of their non-masked counterpart with mask edge being the differentiator.
3598 // This routine does a strict check on the existence of masked operation patterns
3599 // by returning a default false value for all the other opcodes apart from the
3600 // ones whose masked instruction patterns are defined in this file.
3601 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3602 return false;
3603 }
3604
3605 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3606 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3607 return false;
3608 }
3609 switch(opcode) {
3610 // Unary masked operations
3611 case Op_AbsVB:
3612 case Op_AbsVS:
3613 if(!VM_Version::supports_avx512bw()) {
3614 return false; // Implementation limitation
3615 }
3616 case Op_AbsVI:
3617 case Op_AbsVL:
3618 return true;
3619
3620 // Ternary masked operations
3621 case Op_FmaVF:
3622 case Op_FmaVD:
3623 return true;
3624
3625 case Op_MacroLogicV:
3626 if(bt != T_INT && bt != T_LONG) {
3627 return false;
3628 }
3629 return true;
3630
3631 // Binary masked operations
3632 case Op_AddVB:
3633 case Op_AddVS:
3634 case Op_SubVB:
3635 case Op_SubVS:
3636 case Op_MulVS:
3637 case Op_LShiftVS:
3638 case Op_RShiftVS:
3639 case Op_URShiftVS:
3640 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3641 if (!VM_Version::supports_avx512bw()) {
3642 return false; // Implementation limitation
3643 }
3644 return true;
3645
3646 case Op_MulVL:
3647 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3648 if (!VM_Version::supports_avx512dq()) {
3649 return false; // Implementation limitation
3650 }
3651 return true;
3652
3653 case Op_AndV:
3654 case Op_OrV:
3655 case Op_XorV:
3656 case Op_RotateRightV:
3657 case Op_RotateLeftV:
3658 if (bt != T_INT && bt != T_LONG) {
3659 return false; // Implementation limitation
3660 }
3661 return true;
3662
3663 case Op_VectorLoadMask:
3664 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3665 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3666 return false;
3667 }
3668 return true;
3669
3670 case Op_AddVI:
3671 case Op_AddVL:
3672 case Op_AddVF:
3673 case Op_AddVD:
3674 case Op_SubVI:
3675 case Op_SubVL:
3676 case Op_SubVF:
3677 case Op_SubVD:
3678 case Op_MulVI:
3679 case Op_MulVF:
3680 case Op_MulVD:
3681 case Op_DivVF:
3682 case Op_DivVD:
3683 case Op_SqrtVF:
3684 case Op_SqrtVD:
3685 case Op_LShiftVI:
3686 case Op_LShiftVL:
3687 case Op_RShiftVI:
3688 case Op_RShiftVL:
3689 case Op_URShiftVI:
3690 case Op_URShiftVL:
3691 case Op_LoadVectorMasked:
3692 case Op_StoreVectorMasked:
3693 case Op_LoadVectorGatherMasked:
3694 case Op_StoreVectorScatterMasked:
3695 return true;
3696
3697 case Op_UMinV:
3698 case Op_UMaxV:
3699 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3700 return false;
3701 } // fallthrough
3702 case Op_MaxV:
3703 case Op_MinV:
3704 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3705 return false; // Implementation limitation
3706 }
3707 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3708 return false; // Implementation limitation
3709 }
3710 return true;
3711 case Op_SaturatingAddV:
3712 case Op_SaturatingSubV:
3713 if (!is_subword_type(bt)) {
3714 return false;
3715 }
3716 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3717 return false; // Implementation limitation
3718 }
3719 return true;
3720
3721 case Op_VectorMaskCmp:
3722 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3723 return false; // Implementation limitation
3724 }
3725 return true;
3726
3727 case Op_VectorRearrange:
3728 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3729 return false; // Implementation limitation
3730 }
3731 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3732 return false; // Implementation limitation
3733 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3734 return false; // Implementation limitation
3735 }
3736 return true;
3737
3738 // Binary Logical operations
3739 case Op_AndVMask:
3740 case Op_OrVMask:
3741 case Op_XorVMask:
3742 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3743 return false; // Implementation limitation
3744 }
3745 return true;
3746
3747 case Op_PopCountVI:
3748 case Op_PopCountVL:
3749 if (!is_pop_count_instr_target(bt)) {
3750 return false;
3751 }
3752 return true;
3753
3754 case Op_MaskAll:
3755 return true;
3756
3757 case Op_CountLeadingZerosV:
3758 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3759 return true;
3760 }
3761 default:
3762 return false;
3763 }
3764 }
3765
3766 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3767 return false;
3768 }
3769
3770 // Return true if Vector::rearrange needs preparation of the shuffle argument
3771 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3772 switch (elem_bt) {
3773 case T_BYTE: return false;
3774 case T_SHORT: return !VM_Version::supports_avx512bw();
3775 case T_INT: return !VM_Version::supports_avx();
3776 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3777 default:
3778 ShouldNotReachHere();
3779 return false;
3780 }
3781 }
3782
3783 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3784 // Prefer predicate if the mask type is "TypeVectMask".
3785 return vt->isa_vectmask() != nullptr;
3786 }
3787
3788 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3789 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3790 bool legacy = (generic_opnd->opcode() == LEGVEC);
3791 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3792 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3793 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3794 return new legVecZOper();
3795 }
3796 if (legacy) {
3797 switch (ideal_reg) {
3798 case Op_VecS: return new legVecSOper();
3799 case Op_VecD: return new legVecDOper();
3800 case Op_VecX: return new legVecXOper();
3801 case Op_VecY: return new legVecYOper();
3802 case Op_VecZ: return new legVecZOper();
3803 }
3804 } else {
3805 switch (ideal_reg) {
3806 case Op_VecS: return new vecSOper();
3807 case Op_VecD: return new vecDOper();
3808 case Op_VecX: return new vecXOper();
3809 case Op_VecY: return new vecYOper();
3810 case Op_VecZ: return new vecZOper();
3811 }
3812 }
3813 ShouldNotReachHere();
3814 return nullptr;
3815 }
3816
3817 bool Matcher::is_reg2reg_move(MachNode* m) {
3818 switch (m->rule()) {
3819 case MoveVec2Leg_rule:
3820 case MoveLeg2Vec_rule:
3821 case MoveF2VL_rule:
3822 case MoveF2LEG_rule:
3823 case MoveVL2F_rule:
3824 case MoveLEG2F_rule:
3825 case MoveD2VL_rule:
3826 case MoveD2LEG_rule:
3827 case MoveVL2D_rule:
3828 case MoveLEG2D_rule:
3829 return true;
3830 default:
3831 return false;
3832 }
3833 }
3834
3835 bool Matcher::is_generic_vector(MachOper* opnd) {
3836 switch (opnd->opcode()) {
3837 case VEC:
3838 case LEGVEC:
3839 return true;
3840 default:
3841 return false;
3842 }
3843 }
3844
3845 //------------------------------------------------------------------------
3846
3847 const RegMask* Matcher::predicate_reg_mask(void) {
3848 return &_VECTMASK_REG_mask;
3849 }
3850
3851 // Max vector size in bytes. 0 if not supported.
3852 int Matcher::vector_width_in_bytes(BasicType bt) {
3853 assert(is_java_primitive(bt), "only primitive type vectors");
3854 // SSE2 supports 128bit vectors for all types.
3855 // AVX2 supports 256bit vectors for all types.
3856 // AVX2/EVEX supports 512bit vectors for all types.
3857 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3858 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3859 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3860 size = (UseAVX > 2) ? 64 : 32;
3861 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3862 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3863 // Use flag to limit vector size.
3864 size = MIN2(size,(int)MaxVectorSize);
3865 // Minimum 2 values in vector (or 4 for bytes).
3866 switch (bt) {
3867 case T_DOUBLE:
3868 case T_LONG:
3869 if (size < 16) return 0;
3870 break;
3871 case T_FLOAT:
3872 case T_INT:
3873 if (size < 8) return 0;
3874 break;
3875 case T_BOOLEAN:
3876 if (size < 4) return 0;
3877 break;
3878 case T_CHAR:
3879 if (size < 4) return 0;
3880 break;
3881 case T_BYTE:
3882 if (size < 4) return 0;
3883 break;
3884 case T_SHORT:
3885 if (size < 4) return 0;
3886 break;
3887 default:
3888 ShouldNotReachHere();
3889 }
3890 return size;
3891 }
3892
3893 // Limits on vector size (number of elements) loaded into vector.
3894 int Matcher::max_vector_size(const BasicType bt) {
3895 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3896 }
3897 int Matcher::min_vector_size(const BasicType bt) {
3898 int max_size = max_vector_size(bt);
3899 // Min size which can be loaded into vector is 4 bytes.
3900 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3901 // Support for calling svml double64 vectors
3902 if (bt == T_DOUBLE) {
3903 size = 1;
3904 }
3905 return MIN2(size,max_size);
3906 }
3907
3908 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3909 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3910 // by default on Cascade Lake
3911 if (VM_Version::is_default_intel_cascade_lake()) {
3912 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3913 }
3914 return Matcher::max_vector_size(bt);
3915 }
3916
3917 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3918 return -1;
3919 }
3920
3921 // Vector ideal reg corresponding to specified size in bytes
3922 uint Matcher::vector_ideal_reg(int size) {
3923 assert(MaxVectorSize >= size, "");
3924 switch(size) {
3925 case 4: return Op_VecS;
3926 case 8: return Op_VecD;
3927 case 16: return Op_VecX;
3928 case 32: return Op_VecY;
3929 case 64: return Op_VecZ;
3930 }
3931 ShouldNotReachHere();
3932 return 0;
3933 }
3934
3935 // Check for shift by small constant as well
3936 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3937 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3938 shift->in(2)->get_int() <= 3 &&
3939 // Are there other uses besides address expressions?
3940 !matcher->is_visited(shift)) {
3941 address_visited.set(shift->_idx); // Flag as address_visited
3942 mstack.push(shift->in(2), Matcher::Visit);
3943 Node *conv = shift->in(1);
3944 // Allow Matcher to match the rule which bypass
3945 // ConvI2L operation for an array index on LP64
3946 // if the index value is positive.
3947 if (conv->Opcode() == Op_ConvI2L &&
3948 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3949 // Are there other uses besides address expressions?
3950 !matcher->is_visited(conv)) {
3951 address_visited.set(conv->_idx); // Flag as address_visited
3952 mstack.push(conv->in(1), Matcher::Pre_Visit);
3953 } else {
3954 mstack.push(conv, Matcher::Pre_Visit);
3955 }
3956 return true;
3957 }
3958 return false;
3959 }
3960
3961 // This function identifies sub-graphs in which a 'load' node is
3962 // input to two different nodes, and such that it can be matched
3963 // with BMI instructions like blsi, blsr, etc.
3964 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3965 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3966 // refers to the same node.
3967 //
3968 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3969 // This is a temporary solution until we make DAGs expressible in ADL.
3970 template<typename ConType>
3971 class FusedPatternMatcher {
3972 Node* _op1_node;
3973 Node* _mop_node;
3974 int _con_op;
3975
3976 static int match_next(Node* n, int next_op, int next_op_idx) {
3977 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3978 return -1;
3979 }
3980
3981 if (next_op_idx == -1) { // n is commutative, try rotations
3982 if (n->in(1)->Opcode() == next_op) {
3983 return 1;
3984 } else if (n->in(2)->Opcode() == next_op) {
3985 return 2;
3986 }
3987 } else {
3988 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3989 if (n->in(next_op_idx)->Opcode() == next_op) {
3990 return next_op_idx;
3991 }
3992 }
3993 return -1;
3994 }
3995
3996 public:
3997 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3998 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3999
4000 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4001 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4002 typename ConType::NativeType con_value) {
4003 if (_op1_node->Opcode() != op1) {
4004 return false;
4005 }
4006 if (_mop_node->outcnt() > 2) {
4007 return false;
4008 }
4009 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4010 if (op1_op2_idx == -1) {
4011 return false;
4012 }
4013 // Memory operation must be the other edge
4014 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4015
4016 // Check that the mop node is really what we want
4017 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4018 Node* op2_node = _op1_node->in(op1_op2_idx);
4019 if (op2_node->outcnt() > 1) {
4020 return false;
4021 }
4022 assert(op2_node->Opcode() == op2, "Should be");
4023 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4024 if (op2_con_idx == -1) {
4025 return false;
4026 }
4027 // Memory operation must be the other edge
4028 int op2_mop_idx = (op2_con_idx & 1) + 1;
4029 // Check that the memory operation is the same node
4030 if (op2_node->in(op2_mop_idx) == _mop_node) {
4031 // Now check the constant
4032 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4033 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4034 return true;
4035 }
4036 }
4037 }
4038 return false;
4039 }
4040 };
4041
4042 static bool is_bmi_pattern(Node* n, Node* m) {
4043 assert(UseBMI1Instructions, "sanity");
4044 if (n != nullptr && m != nullptr) {
4045 if (m->Opcode() == Op_LoadI) {
4046 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4047 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4048 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4049 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4050 } else if (m->Opcode() == Op_LoadL) {
4051 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4052 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4053 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4054 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4055 }
4056 }
4057 return false;
4058 }
4059
4060 // Should the matcher clone input 'm' of node 'n'?
4061 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4062 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4063 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4064 mstack.push(m, Visit);
4065 return true;
4066 }
4067 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4068 mstack.push(m, Visit); // m = ShiftCntV
4069 return true;
4070 }
4071 if (is_encode_and_store_pattern(n, m)) {
4072 mstack.push(m, Visit);
4073 return true;
4074 }
4075 return false;
4076 }
4077
4078 // Should the Matcher clone shifts on addressing modes, expecting them
4079 // to be subsumed into complex addressing expressions or compute them
4080 // into registers?
4081 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4082 Node *off = m->in(AddPNode::Offset);
4083 if (off->is_Con()) {
4084 address_visited.test_set(m->_idx); // Flag as address_visited
4085 Node *adr = m->in(AddPNode::Address);
4086
4087 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4088 // AtomicAdd is not an addressing expression.
4089 // Cheap to find it by looking for screwy base.
4090 if (adr->is_AddP() &&
4091 !adr->in(AddPNode::Base)->is_top() &&
4092 !adr->in(AddPNode::Offset)->is_Con() &&
4093 off->get_long() == (int) (off->get_long()) && // immL32
4094 // Are there other uses besides address expressions?
4095 !is_visited(adr)) {
4096 address_visited.set(adr->_idx); // Flag as address_visited
4097 Node *shift = adr->in(AddPNode::Offset);
4098 if (!clone_shift(shift, this, mstack, address_visited)) {
4099 mstack.push(shift, Pre_Visit);
4100 }
4101 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4102 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4103 } else {
4104 mstack.push(adr, Pre_Visit);
4105 }
4106
4107 // Clone X+offset as it also folds into most addressing expressions
4108 mstack.push(off, Visit);
4109 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4110 return true;
4111 } else if (clone_shift(off, this, mstack, address_visited)) {
4112 address_visited.test_set(m->_idx); // Flag as address_visited
4113 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4114 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4115 return true;
4116 }
4117 return false;
4118 }
4119
4120 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4121 switch (bt) {
4122 case BoolTest::eq:
4123 return Assembler::eq;
4124 case BoolTest::ne:
4125 return Assembler::neq;
4126 case BoolTest::le:
4127 case BoolTest::ule:
4128 return Assembler::le;
4129 case BoolTest::ge:
4130 case BoolTest::uge:
4131 return Assembler::nlt;
4132 case BoolTest::lt:
4133 case BoolTest::ult:
4134 return Assembler::lt;
4135 case BoolTest::gt:
4136 case BoolTest::ugt:
4137 return Assembler::nle;
4138 default : ShouldNotReachHere(); return Assembler::_false;
4139 }
4140 }
4141
4142 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4143 switch (bt) {
4144 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4145 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4146 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4147 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4148 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4149 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4150 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4151 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4152 }
4153 }
4154
4155 // Helper methods for MachSpillCopyNode::implementation().
4156 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4157 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4158 assert(ireg == Op_VecS || // 32bit vector
4159 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4160 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4161 "no non-adjacent vector moves" );
4162 if (masm) {
4163 switch (ireg) {
4164 case Op_VecS: // copy whole register
4165 case Op_VecD:
4166 case Op_VecX:
4167 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4168 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4169 } else {
4170 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4171 }
4172 break;
4173 case Op_VecY:
4174 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4175 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4176 } else {
4177 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4178 }
4179 break;
4180 case Op_VecZ:
4181 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4182 break;
4183 default:
4184 ShouldNotReachHere();
4185 }
4186 #ifndef PRODUCT
4187 } else {
4188 switch (ireg) {
4189 case Op_VecS:
4190 case Op_VecD:
4191 case Op_VecX:
4192 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4193 break;
4194 case Op_VecY:
4195 case Op_VecZ:
4196 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4197 break;
4198 default:
4199 ShouldNotReachHere();
4200 }
4201 #endif
4202 }
4203 }
4204
4205 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4206 int stack_offset, int reg, uint ireg, outputStream* st) {
4207 if (masm) {
4208 if (is_load) {
4209 switch (ireg) {
4210 case Op_VecS:
4211 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4212 break;
4213 case Op_VecD:
4214 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4215 break;
4216 case Op_VecX:
4217 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4218 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4219 } else {
4220 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4221 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4222 }
4223 break;
4224 case Op_VecY:
4225 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4226 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4227 } else {
4228 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4229 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4230 }
4231 break;
4232 case Op_VecZ:
4233 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4234 break;
4235 default:
4236 ShouldNotReachHere();
4237 }
4238 } else { // store
4239 switch (ireg) {
4240 case Op_VecS:
4241 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4242 break;
4243 case Op_VecD:
4244 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4245 break;
4246 case Op_VecX:
4247 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4248 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4249 }
4250 else {
4251 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4252 }
4253 break;
4254 case Op_VecY:
4255 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4256 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4257 }
4258 else {
4259 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4260 }
4261 break;
4262 case Op_VecZ:
4263 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4264 break;
4265 default:
4266 ShouldNotReachHere();
4267 }
4268 }
4269 #ifndef PRODUCT
4270 } else {
4271 if (is_load) {
4272 switch (ireg) {
4273 case Op_VecS:
4274 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4275 break;
4276 case Op_VecD:
4277 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4278 break;
4279 case Op_VecX:
4280 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4281 break;
4282 case Op_VecY:
4283 case Op_VecZ:
4284 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4285 break;
4286 default:
4287 ShouldNotReachHere();
4288 }
4289 } else { // store
4290 switch (ireg) {
4291 case Op_VecS:
4292 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4293 break;
4294 case Op_VecD:
4295 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4296 break;
4297 case Op_VecX:
4298 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4299 break;
4300 case Op_VecY:
4301 case Op_VecZ:
4302 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4303 break;
4304 default:
4305 ShouldNotReachHere();
4306 }
4307 }
4308 #endif
4309 }
4310 }
4311
4312 template <class T>
4313 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4314 int size = type2aelembytes(bt) * len;
4315 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4316 for (int i = 0; i < len; i++) {
4317 int offset = i * type2aelembytes(bt);
4318 switch (bt) {
4319 case T_BYTE: val->at(i) = con; break;
4320 case T_SHORT: {
4321 jshort c = con;
4322 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4323 break;
4324 }
4325 case T_INT: {
4326 jint c = con;
4327 memcpy(val->adr_at(offset), &c, sizeof(jint));
4328 break;
4329 }
4330 case T_LONG: {
4331 jlong c = con;
4332 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4333 break;
4334 }
4335 case T_FLOAT: {
4336 jfloat c = con;
4337 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4338 break;
4339 }
4340 case T_DOUBLE: {
4341 jdouble c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4343 break;
4344 }
4345 default: assert(false, "%s", type2name(bt));
4346 }
4347 }
4348 return val;
4349 }
4350
4351 static inline jlong high_bit_set(BasicType bt) {
4352 switch (bt) {
4353 case T_BYTE: return 0x8080808080808080;
4354 case T_SHORT: return 0x8000800080008000;
4355 case T_INT: return 0x8000000080000000;
4356 case T_LONG: return 0x8000000000000000;
4357 default:
4358 ShouldNotReachHere();
4359 return 0;
4360 }
4361 }
4362
4363 #ifndef PRODUCT
4364 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4365 st->print("nop \t# %d bytes pad for loops and calls", _count);
4366 }
4367 #endif
4368
4369 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4370 __ nop(_count);
4371 }
4372
4373 uint MachNopNode::size(PhaseRegAlloc*) const {
4374 return _count;
4375 }
4376
4377 #ifndef PRODUCT
4378 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4379 st->print("# breakpoint");
4380 }
4381 #endif
4382
4383 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4384 __ int3();
4385 }
4386
4387 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4388 return MachNode::size(ra_);
4389 }
4390
4391 %}
4392
4393 //----------ENCODING BLOCK-----------------------------------------------------
4394 // This block specifies the encoding classes used by the compiler to
4395 // output byte streams. Encoding classes are parameterized macros
4396 // used by Machine Instruction Nodes in order to generate the bit
4397 // encoding of the instruction. Operands specify their base encoding
4398 // interface with the interface keyword. There are currently
4399 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4400 // COND_INTER. REG_INTER causes an operand to generate a function
4401 // which returns its register number when queried. CONST_INTER causes
4402 // an operand to generate a function which returns the value of the
4403 // constant when queried. MEMORY_INTER causes an operand to generate
4404 // four functions which return the Base Register, the Index Register,
4405 // the Scale Value, and the Offset Value of the operand when queried.
4406 // COND_INTER causes an operand to generate six functions which return
4407 // the encoding code (ie - encoding bits for the instruction)
4408 // associated with each basic boolean condition for a conditional
4409 // instruction.
4410 //
4411 // Instructions specify two basic values for encoding. Again, a
4412 // function is available to check if the constant displacement is an
4413 // oop. They use the ins_encode keyword to specify their encoding
4414 // classes (which must be a sequence of enc_class names, and their
4415 // parameters, specified in the encoding block), and they use the
4416 // opcode keyword to specify, in order, their primary, secondary, and
4417 // tertiary opcode. Only the opcode sections which a particular
4418 // instruction needs for encoding need to be specified.
4419 encode %{
4420 enc_class cdql_enc(no_rax_rdx_RegI div)
4421 %{
4422 // Full implementation of Java idiv and irem; checks for
4423 // special case as described in JVM spec., p.243 & p.271.
4424 //
4425 // normal case special case
4426 //
4427 // input : rax: dividend min_int
4428 // reg: divisor -1
4429 //
4430 // output: rax: quotient (= rax idiv reg) min_int
4431 // rdx: remainder (= rax irem reg) 0
4432 //
4433 // Code sequnce:
4434 //
4435 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4436 // 5: 75 07/08 jne e <normal>
4437 // 7: 33 d2 xor %edx,%edx
4438 // [div >= 8 -> offset + 1]
4439 // [REX_B]
4440 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4441 // c: 74 03/04 je 11 <done>
4442 // 000000000000000e <normal>:
4443 // e: 99 cltd
4444 // [div >= 8 -> offset + 1]
4445 // [REX_B]
4446 // f: f7 f9 idiv $div
4447 // 0000000000000011 <done>:
4448 Label normal;
4449 Label done;
4450
4451 // cmp $0x80000000,%eax
4452 __ cmpl(as_Register(RAX_enc), 0x80000000);
4453
4454 // jne e <normal>
4455 __ jccb(Assembler::notEqual, normal);
4456
4457 // xor %edx,%edx
4458 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4459
4460 // cmp $0xffffffffffffffff,%ecx
4461 __ cmpl($div$$Register, -1);
4462
4463 // je 11 <done>
4464 __ jccb(Assembler::equal, done);
4465
4466 // <normal>
4467 // cltd
4468 __ bind(normal);
4469 __ cdql();
4470
4471 // idivl
4472 // <done>
4473 __ idivl($div$$Register);
4474 __ bind(done);
4475 %}
4476
4477 enc_class cdqq_enc(no_rax_rdx_RegL div)
4478 %{
4479 // Full implementation of Java ldiv and lrem; checks for
4480 // special case as described in JVM spec., p.243 & p.271.
4481 //
4482 // normal case special case
4483 //
4484 // input : rax: dividend min_long
4485 // reg: divisor -1
4486 //
4487 // output: rax: quotient (= rax idiv reg) min_long
4488 // rdx: remainder (= rax irem reg) 0
4489 //
4490 // Code sequnce:
4491 //
4492 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4493 // 7: 00 00 80
4494 // a: 48 39 d0 cmp %rdx,%rax
4495 // d: 75 08 jne 17 <normal>
4496 // f: 33 d2 xor %edx,%edx
4497 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4498 // 15: 74 05 je 1c <done>
4499 // 0000000000000017 <normal>:
4500 // 17: 48 99 cqto
4501 // 19: 48 f7 f9 idiv $div
4502 // 000000000000001c <done>:
4503 Label normal;
4504 Label done;
4505
4506 // mov $0x8000000000000000,%rdx
4507 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4508
4509 // cmp %rdx,%rax
4510 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4511
4512 // jne 17 <normal>
4513 __ jccb(Assembler::notEqual, normal);
4514
4515 // xor %edx,%edx
4516 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4517
4518 // cmp $0xffffffffffffffff,$div
4519 __ cmpq($div$$Register, -1);
4520
4521 // je 1e <done>
4522 __ jccb(Assembler::equal, done);
4523
4524 // <normal>
4525 // cqto
4526 __ bind(normal);
4527 __ cdqq();
4528
4529 // idivq (note: must be emitted by the user of this rule)
4530 // <done>
4531 __ idivq($div$$Register);
4532 __ bind(done);
4533 %}
4534
4535 enc_class clear_avx %{
4536 DEBUG_ONLY(int off0 = __ offset());
4537 if (generate_vzeroupper(Compile::current())) {
4538 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4539 // Clear upper bits of YMM registers when current compiled code uses
4540 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4541 __ vzeroupper();
4542 }
4543 DEBUG_ONLY(int off1 = __ offset());
4544 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4545 %}
4546
4547 enc_class Java_To_Runtime(method meth) %{
4548 __ lea(r10, RuntimeAddress((address)$meth$$method));
4549 __ call(r10);
4550 __ post_call_nop();
4551 %}
4552
4553 enc_class Java_Static_Call(method meth)
4554 %{
4555 // JAVA STATIC CALL
4556 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4557 // determine who we intended to call.
4558 if (!_method) {
4559 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4560 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4561 // The NOP here is purely to ensure that eliding a call to
4562 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4563 __ addr_nop_5();
4564 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4565 } else {
4566 int method_index = resolved_method_index(masm);
4567 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4568 : static_call_Relocation::spec(method_index);
4569 address mark = __ pc();
4570 int call_offset = __ offset();
4571 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4572 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4573 // Calls of the same statically bound method can share
4574 // a stub to the interpreter.
4575 __ code()->shared_stub_to_interp_for(_method, call_offset);
4576 } else {
4577 // Emit stubs for static call.
4578 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4579 __ clear_inst_mark();
4580 if (stub == nullptr) {
4581 ciEnv::current()->record_failure("CodeCache is full");
4582 return;
4583 }
4584 }
4585 }
4586 __ post_call_nop();
4587 %}
4588
4589 enc_class Java_Dynamic_Call(method meth) %{
4590 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class call_epilog %{
4595 if (VerifyStackAtCalls) {
4596 // Check that stack depth is unchanged: find majik cookie on stack
4597 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4598 Label L;
4599 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4600 __ jccb(Assembler::equal, L);
4601 // Die if stack mismatch
4602 __ int3();
4603 __ bind(L);
4604 }
4605 %}
4606
4607 %}
4608
4609 //----------FRAME--------------------------------------------------------------
4610 // Definition of frame structure and management information.
4611 //
4612 // S T A C K L A Y O U T Allocators stack-slot number
4613 // | (to get allocators register number
4614 // G Owned by | | v add OptoReg::stack0())
4615 // r CALLER | |
4616 // o | +--------+ pad to even-align allocators stack-slot
4617 // w V | pad0 | numbers; owned by CALLER
4618 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4619 // h ^ | in | 5
4620 // | | args | 4 Holes in incoming args owned by SELF
4621 // | | | | 3
4622 // | | +--------+
4623 // V | | old out| Empty on Intel, window on Sparc
4624 // | old |preserve| Must be even aligned.
4625 // | SP-+--------+----> Matcher::_old_SP, even aligned
4626 // | | in | 3 area for Intel ret address
4627 // Owned by |preserve| Empty on Sparc.
4628 // SELF +--------+
4629 // | | pad2 | 2 pad to align old SP
4630 // | +--------+ 1
4631 // | | locks | 0
4632 // | +--------+----> OptoReg::stack0(), even aligned
4633 // | | pad1 | 11 pad to align new SP
4634 // | +--------+
4635 // | | | 10
4636 // | | spills | 9 spills
4637 // V | | 8 (pad0 slot for callee)
4638 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4639 // ^ | out | 7
4640 // | | args | 6 Holes in outgoing args owned by CALLEE
4641 // Owned by +--------+
4642 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4643 // | new |preserve| Must be even-aligned.
4644 // | SP-+--------+----> Matcher::_new_SP, even aligned
4645 // | | |
4646 //
4647 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4648 // known from SELF's arguments and the Java calling convention.
4649 // Region 6-7 is determined per call site.
4650 // Note 2: If the calling convention leaves holes in the incoming argument
4651 // area, those holes are owned by SELF. Holes in the outgoing area
4652 // are owned by the CALLEE. Holes should not be necessary in the
4653 // incoming area, as the Java calling convention is completely under
4654 // the control of the AD file. Doubles can be sorted and packed to
4655 // avoid holes. Holes in the outgoing arguments may be necessary for
4656 // varargs C calling conventions.
4657 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4658 // even aligned with pad0 as needed.
4659 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4660 // region 6-11 is even aligned; it may be padded out more so that
4661 // the region from SP to FP meets the minimum stack alignment.
4662 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4663 // alignment. Region 11, pad1, may be dynamically extended so that
4664 // SP meets the minimum alignment.
4665
4666 frame
4667 %{
4668 // These three registers define part of the calling convention
4669 // between compiled code and the interpreter.
4670 inline_cache_reg(RAX); // Inline Cache Register
4671
4672 // Optional: name the operand used by cisc-spilling to access
4673 // [stack_pointer + offset]
4674 cisc_spilling_operand_name(indOffset32);
4675
4676 // Number of stack slots consumed by locking an object
4677 sync_stack_slots(2);
4678
4679 // Compiled code's Frame Pointer
4680 frame_pointer(RSP);
4681
4682 // Interpreter stores its frame pointer in a register which is
4683 // stored to the stack by I2CAdaptors.
4684 // I2CAdaptors convert from interpreted java to compiled java.
4685 interpreter_frame_pointer(RBP);
4686
4687 // Stack alignment requirement
4688 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4689
4690 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4691 // for calls to C. Supports the var-args backing area for register parms.
4692 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4693
4694 // The after-PROLOG location of the return address. Location of
4695 // return address specifies a type (REG or STACK) and a number
4696 // representing the register number (i.e. - use a register name) or
4697 // stack slot.
4698 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4699 // Otherwise, it is above the locks and verification slot and alignment word
4700 return_addr(STACK - 2 +
4701 align_up((Compile::current()->in_preserve_stack_slots() +
4702 Compile::current()->fixed_slots()),
4703 stack_alignment_in_slots()));
4704
4705 // Location of compiled Java return values. Same as C for now.
4706 return_value
4707 %{
4708 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4709 "only return normal values");
4710
4711 static const int lo[Op_RegL + 1] = {
4712 0,
4713 0,
4714 RAX_num, // Op_RegN
4715 RAX_num, // Op_RegI
4716 RAX_num, // Op_RegP
4717 XMM0_num, // Op_RegF
4718 XMM0_num, // Op_RegD
4719 RAX_num // Op_RegL
4720 };
4721 static const int hi[Op_RegL + 1] = {
4722 0,
4723 0,
4724 OptoReg::Bad, // Op_RegN
4725 OptoReg::Bad, // Op_RegI
4726 RAX_H_num, // Op_RegP
4727 OptoReg::Bad, // Op_RegF
4728 XMM0b_num, // Op_RegD
4729 RAX_H_num // Op_RegL
4730 };
4731 // Excluded flags and vector registers.
4732 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4733 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4734 %}
4735 %}
4736
4737 //----------ATTRIBUTES---------------------------------------------------------
4738 //----------Operand Attributes-------------------------------------------------
4739 op_attrib op_cost(0); // Required cost attribute
4740
4741 //----------Instruction Attributes---------------------------------------------
4742 ins_attrib ins_cost(100); // Required cost attribute
4743 ins_attrib ins_size(8); // Required size attribute (in bits)
4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4745 // a non-matching short branch variant
4746 // of some long branch?
4747 ins_attrib ins_alignment(1); // Required alignment attribute (must
4748 // be a power of 2) specifies the
4749 // alignment that some part of the
4750 // instruction (not necessarily the
4751 // start) requires. If > 1, a
4752 // compute_padding() function must be
4753 // provided for the instruction
4754
4755 // Whether this node is expanded during code emission into a sequence of
4756 // instructions and the first instruction can perform an implicit null check.
4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4758
4759 //----------OPERANDS-----------------------------------------------------------
4760 // Operand definitions must precede instruction definitions for correct parsing
4761 // in the ADLC because operands constitute user defined types which are used in
4762 // instruction definitions.
4763
4764 //----------Simple Operands----------------------------------------------------
4765 // Immediate Operands
4766 // Integer Immediate
4767 operand immI()
4768 %{
4769 match(ConI);
4770
4771 op_cost(10);
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 // Constant for test vs zero
4777 operand immI_0()
4778 %{
4779 predicate(n->get_int() == 0);
4780 match(ConI);
4781
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Constant for increment
4788 operand immI_1()
4789 %{
4790 predicate(n->get_int() == 1);
4791 match(ConI);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Constant for decrement
4799 operand immI_M1()
4800 %{
4801 predicate(n->get_int() == -1);
4802 match(ConI);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immI_2()
4810 %{
4811 predicate(n->get_int() == 2);
4812 match(ConI);
4813
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 operand immI_4()
4820 %{
4821 predicate(n->get_int() == 4);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 operand immI_8()
4830 %{
4831 predicate(n->get_int() == 8);
4832 match(ConI);
4833
4834 op_cost(0);
4835 format %{ %}
4836 interface(CONST_INTER);
4837 %}
4838
4839 // Valid scale values for addressing modes
4840 operand immI2()
4841 %{
4842 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4843 match(ConI);
4844
4845 format %{ %}
4846 interface(CONST_INTER);
4847 %}
4848
4849 operand immU7()
4850 %{
4851 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4852 match(ConI);
4853
4854 op_cost(5);
4855 format %{ %}
4856 interface(CONST_INTER);
4857 %}
4858
4859 operand immI8()
4860 %{
4861 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4862 match(ConI);
4863
4864 op_cost(5);
4865 format %{ %}
4866 interface(CONST_INTER);
4867 %}
4868
4869 operand immU8()
4870 %{
4871 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4872 match(ConI);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879 operand immI16()
4880 %{
4881 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4882 match(ConI);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Int Immediate non-negative
4890 operand immU31()
4891 %{
4892 predicate(n->get_int() >= 0);
4893 match(ConI);
4894
4895 op_cost(0);
4896 format %{ %}
4897 interface(CONST_INTER);
4898 %}
4899
4900 // Pointer Immediate
4901 operand immP()
4902 %{
4903 match(ConP);
4904
4905 op_cost(10);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910 // Null Pointer Immediate
4911 operand immP0()
4912 %{
4913 predicate(n->get_ptr() == 0);
4914 match(ConP);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Pointer Immediate
4922 operand immN() %{
4923 match(ConN);
4924
4925 op_cost(10);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immNKlass() %{
4931 match(ConNKlass);
4932
4933 op_cost(10);
4934 format %{ %}
4935 interface(CONST_INTER);
4936 %}
4937
4938 // Null Pointer Immediate
4939 operand immN0() %{
4940 predicate(n->get_narrowcon() == 0);
4941 match(ConN);
4942
4943 op_cost(5);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 operand immP31()
4949 %{
4950 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4951 && (n->get_ptr() >> 31) == 0);
4952 match(ConP);
4953
4954 op_cost(5);
4955 format %{ %}
4956 interface(CONST_INTER);
4957 %}
4958
4959
4960 // Long Immediate
4961 operand immL()
4962 %{
4963 match(ConL);
4964
4965 op_cost(20);
4966 format %{ %}
4967 interface(CONST_INTER);
4968 %}
4969
4970 // Long Immediate 8-bit
4971 operand immL8()
4972 %{
4973 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4974 match(ConL);
4975
4976 op_cost(5);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate 32-bit unsigned
4982 operand immUL32()
4983 %{
4984 predicate(n->get_long() == (unsigned int) (n->get_long()));
4985 match(ConL);
4986
4987 op_cost(10);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Long Immediate 32-bit signed
4993 operand immL32()
4994 %{
4995 predicate(n->get_long() == (int) (n->get_long()));
4996 match(ConL);
4997
4998 op_cost(15);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003 operand immL_Pow2()
5004 %{
5005 predicate(is_power_of_2((julong)n->get_long()));
5006 match(ConL);
5007
5008 op_cost(15);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 operand immL_NotPow2()
5014 %{
5015 predicate(is_power_of_2((julong)~n->get_long()));
5016 match(ConL);
5017
5018 op_cost(15);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate zero
5024 operand immL0()
5025 %{
5026 predicate(n->get_long() == 0L);
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Constant for increment
5035 operand immL1()
5036 %{
5037 predicate(n->get_long() == 1);
5038 match(ConL);
5039
5040 format %{ %}
5041 interface(CONST_INTER);
5042 %}
5043
5044 // Constant for decrement
5045 operand immL_M1()
5046 %{
5047 predicate(n->get_long() == -1);
5048 match(ConL);
5049
5050 format %{ %}
5051 interface(CONST_INTER);
5052 %}
5053
5054 // Long Immediate: low 32-bit mask
5055 operand immL_32bits()
5056 %{
5057 predicate(n->get_long() == 0xFFFFFFFFL);
5058 match(ConL);
5059 op_cost(20);
5060
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Int Immediate: 2^n-1, positive
5066 operand immI_Pow2M1()
5067 %{
5068 predicate((n->get_int() > 0)
5069 && is_power_of_2((juint)n->get_int() + 1));
5070 match(ConI);
5071
5072 op_cost(20);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Float Immediate zero
5078 operand immF0()
5079 %{
5080 predicate(jint_cast(n->getf()) == 0);
5081 match(ConF);
5082
5083 op_cost(5);
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Float Immediate
5089 operand immF()
5090 %{
5091 match(ConF);
5092
5093 op_cost(15);
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Half Float Immediate
5099 operand immH()
5100 %{
5101 match(ConH);
5102
5103 op_cost(15);
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Double Immediate zero
5109 operand immD0()
5110 %{
5111 predicate(jlong_cast(n->getd()) == 0);
5112 match(ConD);
5113
5114 op_cost(5);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Double Immediate
5120 operand immD()
5121 %{
5122 match(ConD);
5123
5124 op_cost(15);
5125 format %{ %}
5126 interface(CONST_INTER);
5127 %}
5128
5129 // Immediates for special shifts (sign extend)
5130
5131 // Constants for increment
5132 operand immI_16()
5133 %{
5134 predicate(n->get_int() == 16);
5135 match(ConI);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand immI_24()
5142 %{
5143 predicate(n->get_int() == 24);
5144 match(ConI);
5145
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Constant for byte-wide masking
5151 operand immI_255()
5152 %{
5153 predicate(n->get_int() == 255);
5154 match(ConI);
5155
5156 format %{ %}
5157 interface(CONST_INTER);
5158 %}
5159
5160 // Constant for short-wide masking
5161 operand immI_65535()
5162 %{
5163 predicate(n->get_int() == 65535);
5164 match(ConI);
5165
5166 format %{ %}
5167 interface(CONST_INTER);
5168 %}
5169
5170 // Constant for byte-wide masking
5171 operand immL_255()
5172 %{
5173 predicate(n->get_long() == 255);
5174 match(ConL);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 // Constant for short-wide masking
5181 operand immL_65535()
5182 %{
5183 predicate(n->get_long() == 65535);
5184 match(ConL);
5185
5186 format %{ %}
5187 interface(CONST_INTER);
5188 %}
5189
5190 // AOT Runtime Constants Address
5191 operand immAOTRuntimeConstantsAddress()
5192 %{
5193 // Check if the address is in the range of AOT Runtime Constants
5194 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5195 match(ConP);
5196
5197 op_cost(0);
5198 format %{ %}
5199 interface(CONST_INTER);
5200 %}
5201
5202 operand kReg()
5203 %{
5204 constraint(ALLOC_IN_RC(vectmask_reg));
5205 match(RegVectMask);
5206 format %{%}
5207 interface(REG_INTER);
5208 %}
5209
5210 // Register Operands
5211 // Integer Register
5212 operand rRegI()
5213 %{
5214 constraint(ALLOC_IN_RC(int_reg));
5215 match(RegI);
5216
5217 match(rax_RegI);
5218 match(rbx_RegI);
5219 match(rcx_RegI);
5220 match(rdx_RegI);
5221 match(rdi_RegI);
5222
5223 format %{ %}
5224 interface(REG_INTER);
5225 %}
5226
5227 // Special Registers
5228 operand rax_RegI()
5229 %{
5230 constraint(ALLOC_IN_RC(int_rax_reg));
5231 match(RegI);
5232 match(rRegI);
5233
5234 format %{ "RAX" %}
5235 interface(REG_INTER);
5236 %}
5237
5238 // Special Registers
5239 operand rbx_RegI()
5240 %{
5241 constraint(ALLOC_IN_RC(int_rbx_reg));
5242 match(RegI);
5243 match(rRegI);
5244
5245 format %{ "RBX" %}
5246 interface(REG_INTER);
5247 %}
5248
5249 operand rcx_RegI()
5250 %{
5251 constraint(ALLOC_IN_RC(int_rcx_reg));
5252 match(RegI);
5253 match(rRegI);
5254
5255 format %{ "RCX" %}
5256 interface(REG_INTER);
5257 %}
5258
5259 operand rdx_RegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_rdx_reg));
5262 match(RegI);
5263 match(rRegI);
5264
5265 format %{ "RDX" %}
5266 interface(REG_INTER);
5267 %}
5268
5269 operand rdi_RegI()
5270 %{
5271 constraint(ALLOC_IN_RC(int_rdi_reg));
5272 match(RegI);
5273 match(rRegI);
5274
5275 format %{ "RDI" %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand no_rax_rdx_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5282 match(RegI);
5283 match(rbx_RegI);
5284 match(rcx_RegI);
5285 match(rdi_RegI);
5286
5287 format %{ %}
5288 interface(REG_INTER);
5289 %}
5290
5291 operand no_rbp_r13_RegI()
5292 %{
5293 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5294 match(RegI);
5295 match(rRegI);
5296 match(rax_RegI);
5297 match(rbx_RegI);
5298 match(rcx_RegI);
5299 match(rdx_RegI);
5300 match(rdi_RegI);
5301
5302 format %{ %}
5303 interface(REG_INTER);
5304 %}
5305
5306 // Pointer Register
5307 operand any_RegP()
5308 %{
5309 constraint(ALLOC_IN_RC(any_reg));
5310 match(RegP);
5311 match(rax_RegP);
5312 match(rbx_RegP);
5313 match(rdi_RegP);
5314 match(rsi_RegP);
5315 match(rbp_RegP);
5316 match(r15_RegP);
5317 match(rRegP);
5318
5319 format %{ %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand rRegP()
5324 %{
5325 constraint(ALLOC_IN_RC(ptr_reg));
5326 match(RegP);
5327 match(rax_RegP);
5328 match(rbx_RegP);
5329 match(rdi_RegP);
5330 match(rsi_RegP);
5331 match(rbp_RegP); // See Q&A below about
5332 match(r15_RegP); // r15_RegP and rbp_RegP.
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 operand rRegN() %{
5339 constraint(ALLOC_IN_RC(int_reg));
5340 match(RegN);
5341
5342 format %{ %}
5343 interface(REG_INTER);
5344 %}
5345
5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5349 // The output of an instruction is controlled by the allocator, which respects
5350 // register class masks, not match rules. Unless an instruction mentions
5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5352 // by the allocator as an input.
5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5355 // result, RBP is not included in the output of the instruction either.
5356
5357 // This operand is not allowed to use RBP even if
5358 // RBP is not used to hold the frame pointer.
5359 operand no_rbp_RegP()
5360 %{
5361 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5362 match(RegP);
5363 match(rbx_RegP);
5364 match(rsi_RegP);
5365 match(rdi_RegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 // Special Registers
5372 // Return a pointer value
5373 operand rax_RegP()
5374 %{
5375 constraint(ALLOC_IN_RC(ptr_rax_reg));
5376 match(RegP);
5377 match(rRegP);
5378
5379 format %{ %}
5380 interface(REG_INTER);
5381 %}
5382
5383 // Special Registers
5384 // Return a compressed pointer value
5385 operand rax_RegN()
5386 %{
5387 constraint(ALLOC_IN_RC(int_rax_reg));
5388 match(RegN);
5389 match(rRegN);
5390
5391 format %{ %}
5392 interface(REG_INTER);
5393 %}
5394
5395 // Used in AtomicAdd
5396 operand rbx_RegP()
5397 %{
5398 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5399 match(RegP);
5400 match(rRegP);
5401
5402 format %{ %}
5403 interface(REG_INTER);
5404 %}
5405
5406 operand rsi_RegP()
5407 %{
5408 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5409 match(RegP);
5410 match(rRegP);
5411
5412 format %{ %}
5413 interface(REG_INTER);
5414 %}
5415
5416 operand rbp_RegP()
5417 %{
5418 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5419 match(RegP);
5420 match(rRegP);
5421
5422 format %{ %}
5423 interface(REG_INTER);
5424 %}
5425
5426 // Used in rep stosq
5427 operand rdi_RegP()
5428 %{
5429 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5430 match(RegP);
5431 match(rRegP);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 operand r15_RegP()
5438 %{
5439 constraint(ALLOC_IN_RC(ptr_r15_reg));
5440 match(RegP);
5441 match(rRegP);
5442
5443 format %{ %}
5444 interface(REG_INTER);
5445 %}
5446
5447 operand rRegL()
5448 %{
5449 constraint(ALLOC_IN_RC(long_reg));
5450 match(RegL);
5451 match(rax_RegL);
5452 match(rdx_RegL);
5453
5454 format %{ %}
5455 interface(REG_INTER);
5456 %}
5457
5458 // Special Registers
5459 operand no_rax_rdx_RegL()
5460 %{
5461 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5462 match(RegL);
5463 match(rRegL);
5464
5465 format %{ %}
5466 interface(REG_INTER);
5467 %}
5468
5469 operand rax_RegL()
5470 %{
5471 constraint(ALLOC_IN_RC(long_rax_reg));
5472 match(RegL);
5473 match(rRegL);
5474
5475 format %{ "RAX" %}
5476 interface(REG_INTER);
5477 %}
5478
5479 operand rcx_RegL()
5480 %{
5481 constraint(ALLOC_IN_RC(long_rcx_reg));
5482 match(RegL);
5483 match(rRegL);
5484
5485 format %{ %}
5486 interface(REG_INTER);
5487 %}
5488
5489 operand rdx_RegL()
5490 %{
5491 constraint(ALLOC_IN_RC(long_rdx_reg));
5492 match(RegL);
5493 match(rRegL);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 operand r11_RegL()
5500 %{
5501 constraint(ALLOC_IN_RC(long_r11_reg));
5502 match(RegL);
5503 match(rRegL);
5504
5505 format %{ %}
5506 interface(REG_INTER);
5507 %}
5508
5509 operand no_rbp_r13_RegL()
5510 %{
5511 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5512 match(RegL);
5513 match(rRegL);
5514 match(rax_RegL);
5515 match(rcx_RegL);
5516 match(rdx_RegL);
5517
5518 format %{ %}
5519 interface(REG_INTER);
5520 %}
5521
5522 // Flags register, used as output of compare instructions
5523 operand rFlagsReg()
5524 %{
5525 constraint(ALLOC_IN_RC(int_flags));
5526 match(RegFlags);
5527
5528 format %{ "RFLAGS" %}
5529 interface(REG_INTER);
5530 %}
5531
5532 // Flags register, used as output of FLOATING POINT compare instructions
5533 operand rFlagsRegU()
5534 %{
5535 constraint(ALLOC_IN_RC(int_flags));
5536 match(RegFlags);
5537
5538 format %{ "RFLAGS_U" %}
5539 interface(REG_INTER);
5540 %}
5541
5542 operand rFlagsRegUCF() %{
5543 constraint(ALLOC_IN_RC(int_flags));
5544 match(RegFlags);
5545 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5546
5547 format %{ "RFLAGS_U_CF" %}
5548 interface(REG_INTER);
5549 %}
5550
5551 operand rFlagsRegUCFE() %{
5552 constraint(ALLOC_IN_RC(int_flags));
5553 match(RegFlags);
5554 predicate(UseAPX && VM_Version::supports_avx10_2());
5555
5556 format %{ "RFLAGS_U_CFE" %}
5557 interface(REG_INTER);
5558 %}
5559
5560 // Float register operands
5561 operand regF() %{
5562 constraint(ALLOC_IN_RC(float_reg));
5563 match(RegF);
5564
5565 format %{ %}
5566 interface(REG_INTER);
5567 %}
5568
5569 // Float register operands
5570 operand legRegF() %{
5571 constraint(ALLOC_IN_RC(float_reg_legacy));
5572 match(RegF);
5573
5574 format %{ %}
5575 interface(REG_INTER);
5576 %}
5577
5578 // Float register operands
5579 operand vlRegF() %{
5580 constraint(ALLOC_IN_RC(float_reg_vl));
5581 match(RegF);
5582
5583 format %{ %}
5584 interface(REG_INTER);
5585 %}
5586
5587 // Double register operands
5588 operand regD() %{
5589 constraint(ALLOC_IN_RC(double_reg));
5590 match(RegD);
5591
5592 format %{ %}
5593 interface(REG_INTER);
5594 %}
5595
5596 // Double register operands
5597 operand legRegD() %{
5598 constraint(ALLOC_IN_RC(double_reg_legacy));
5599 match(RegD);
5600
5601 format %{ %}
5602 interface(REG_INTER);
5603 %}
5604
5605 // Double register operands
5606 operand vlRegD() %{
5607 constraint(ALLOC_IN_RC(double_reg_vl));
5608 match(RegD);
5609
5610 format %{ %}
5611 interface(REG_INTER);
5612 %}
5613
5614 //----------Memory Operands----------------------------------------------------
5615 // Direct Memory Operand
5616 // operand direct(immP addr)
5617 // %{
5618 // match(addr);
5619
5620 // format %{ "[$addr]" %}
5621 // interface(MEMORY_INTER) %{
5622 // base(0xFFFFFFFF);
5623 // index(0x4);
5624 // scale(0x0);
5625 // disp($addr);
5626 // %}
5627 // %}
5628
5629 // Indirect Memory Operand
5630 operand indirect(any_RegP reg)
5631 %{
5632 constraint(ALLOC_IN_RC(ptr_reg));
5633 match(reg);
5634
5635 format %{ "[$reg]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index(0x4);
5639 scale(0x0);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Plus Short Offset Operand
5645 operand indOffset8(any_RegP reg, immL8 off)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP reg off);
5649
5650 format %{ "[$reg + $off (8-bit)]" %}
5651 interface(MEMORY_INTER) %{
5652 base($reg);
5653 index(0x4);
5654 scale(0x0);
5655 disp($off);
5656 %}
5657 %}
5658
5659 // Indirect Memory Plus Long Offset Operand
5660 operand indOffset32(any_RegP reg, immL32 off)
5661 %{
5662 constraint(ALLOC_IN_RC(ptr_reg));
5663 match(AddP reg off);
5664
5665 format %{ "[$reg + $off (32-bit)]" %}
5666 interface(MEMORY_INTER) %{
5667 base($reg);
5668 index(0x4);
5669 scale(0x0);
5670 disp($off);
5671 %}
5672 %}
5673
5674 // Indirect Memory Plus Index Register Plus Offset Operand
5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5676 %{
5677 constraint(ALLOC_IN_RC(ptr_reg));
5678 match(AddP (AddP reg lreg) off);
5679
5680 op_cost(10);
5681 format %{"[$reg + $off + $lreg]" %}
5682 interface(MEMORY_INTER) %{
5683 base($reg);
5684 index($lreg);
5685 scale(0x0);
5686 disp($off);
5687 %}
5688 %}
5689
5690 // Indirect Memory Plus Index Register Plus Offset Operand
5691 operand indIndex(any_RegP reg, rRegL lreg)
5692 %{
5693 constraint(ALLOC_IN_RC(ptr_reg));
5694 match(AddP reg lreg);
5695
5696 op_cost(10);
5697 format %{"[$reg + $lreg]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index($lreg);
5701 scale(0x0);
5702 disp(0x0);
5703 %}
5704 %}
5705
5706 // Indirect Memory Times Scale Plus Index Register
5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP reg (LShiftL lreg scale));
5711
5712 op_cost(10);
5713 format %{"[$reg + $lreg << $scale]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index($lreg);
5717 scale($scale);
5718 disp(0x0);
5719 %}
5720 %}
5721
5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5726 match(AddP reg (LShiftL (ConvI2L idx) scale));
5727
5728 op_cost(10);
5729 format %{"[$reg + pos $idx << $scale]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($idx);
5733 scale($scale);
5734 disp(0x0);
5735 %}
5736 %}
5737
5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP (AddP reg (LShiftL lreg scale)) off);
5743
5744 op_cost(10);
5745 format %{"[$reg + $off + $lreg << $scale]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale($scale);
5750 disp($off);
5751 %}
5752 %}
5753
5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5756 %{
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5759 match(AddP (AddP reg (ConvI2L idx)) off);
5760
5761 op_cost(10);
5762 format %{"[$reg + $off + $idx]" %}
5763 interface(MEMORY_INTER) %{
5764 base($reg);
5765 index($idx);
5766 scale(0x0);
5767 disp($off);
5768 %}
5769 %}
5770
5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5773 %{
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5776 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5777
5778 op_cost(10);
5779 format %{"[$reg + $off + $idx << $scale]" %}
5780 interface(MEMORY_INTER) %{
5781 base($reg);
5782 index($idx);
5783 scale($scale);
5784 disp($off);
5785 %}
5786 %}
5787
5788 // Indirect Narrow Oop Plus Offset Operand
5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5790 // we can't free r12 even with CompressedOops::base() == nullptr.
5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5792 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5793 constraint(ALLOC_IN_RC(ptr_reg));
5794 match(AddP (DecodeN reg) off);
5795
5796 op_cost(10);
5797 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5798 interface(MEMORY_INTER) %{
5799 base(0xc); // R12
5800 index($reg);
5801 scale(0x3);
5802 disp($off);
5803 %}
5804 %}
5805
5806 // Indirect Memory Operand
5807 operand indirectNarrow(rRegN reg)
5808 %{
5809 predicate(CompressedOops::shift() == 0);
5810 constraint(ALLOC_IN_RC(ptr_reg));
5811 match(DecodeN reg);
5812
5813 format %{ "[$reg]" %}
5814 interface(MEMORY_INTER) %{
5815 base($reg);
5816 index(0x4);
5817 scale(0x0);
5818 disp(0x0);
5819 %}
5820 %}
5821
5822 // Indirect Memory Plus Short Offset Operand
5823 operand indOffset8Narrow(rRegN reg, immL8 off)
5824 %{
5825 predicate(CompressedOops::shift() == 0);
5826 constraint(ALLOC_IN_RC(ptr_reg));
5827 match(AddP (DecodeN reg) off);
5828
5829 format %{ "[$reg + $off (8-bit)]" %}
5830 interface(MEMORY_INTER) %{
5831 base($reg);
5832 index(0x4);
5833 scale(0x0);
5834 disp($off);
5835 %}
5836 %}
5837
5838 // Indirect Memory Plus Long Offset Operand
5839 operand indOffset32Narrow(rRegN reg, immL32 off)
5840 %{
5841 predicate(CompressedOops::shift() == 0);
5842 constraint(ALLOC_IN_RC(ptr_reg));
5843 match(AddP (DecodeN reg) off);
5844
5845 format %{ "[$reg + $off (32-bit)]" %}
5846 interface(MEMORY_INTER) %{
5847 base($reg);
5848 index(0x4);
5849 scale(0x0);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Plus Index Register Plus Offset Operand
5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5856 %{
5857 predicate(CompressedOops::shift() == 0);
5858 constraint(ALLOC_IN_RC(ptr_reg));
5859 match(AddP (AddP (DecodeN reg) lreg) off);
5860
5861 op_cost(10);
5862 format %{"[$reg + $off + $lreg]" %}
5863 interface(MEMORY_INTER) %{
5864 base($reg);
5865 index($lreg);
5866 scale(0x0);
5867 disp($off);
5868 %}
5869 %}
5870
5871 // Indirect Memory Plus Index Register Plus Offset Operand
5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
5873 %{
5874 predicate(CompressedOops::shift() == 0);
5875 constraint(ALLOC_IN_RC(ptr_reg));
5876 match(AddP (DecodeN reg) lreg);
5877
5878 op_cost(10);
5879 format %{"[$reg + $lreg]" %}
5880 interface(MEMORY_INTER) %{
5881 base($reg);
5882 index($lreg);
5883 scale(0x0);
5884 disp(0x0);
5885 %}
5886 %}
5887
5888 // Indirect Memory Times Scale Plus Index Register
5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5890 %{
5891 predicate(CompressedOops::shift() == 0);
5892 constraint(ALLOC_IN_RC(ptr_reg));
5893 match(AddP (DecodeN reg) (LShiftL lreg scale));
5894
5895 op_cost(10);
5896 format %{"[$reg + $lreg << $scale]" %}
5897 interface(MEMORY_INTER) %{
5898 base($reg);
5899 index($lreg);
5900 scale($scale);
5901 disp(0x0);
5902 %}
5903 %}
5904
5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5907 %{
5908 predicate(CompressedOops::shift() == 0);
5909 constraint(ALLOC_IN_RC(ptr_reg));
5910 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5911
5912 op_cost(10);
5913 format %{"[$reg + $off + $lreg << $scale]" %}
5914 interface(MEMORY_INTER) %{
5915 base($reg);
5916 index($lreg);
5917 scale($scale);
5918 disp($off);
5919 %}
5920 %}
5921
5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5924 %{
5925 constraint(ALLOC_IN_RC(ptr_reg));
5926 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5927 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5928
5929 op_cost(10);
5930 format %{"[$reg + $off + $idx]" %}
5931 interface(MEMORY_INTER) %{
5932 base($reg);
5933 index($idx);
5934 scale(0x0);
5935 disp($off);
5936 %}
5937 %}
5938
5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5941 %{
5942 constraint(ALLOC_IN_RC(ptr_reg));
5943 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5944 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5945
5946 op_cost(10);
5947 format %{"[$reg + $off + $idx << $scale]" %}
5948 interface(MEMORY_INTER) %{
5949 base($reg);
5950 index($idx);
5951 scale($scale);
5952 disp($off);
5953 %}
5954 %}
5955
5956 //----------Special Memory Operands--------------------------------------------
5957 // Stack Slot Operand - This operand is used for loading and storing temporary
5958 // values on the stack where a match requires a value to
5959 // flow through memory.
5960 operand stackSlotP(sRegP reg)
5961 %{
5962 constraint(ALLOC_IN_RC(stack_slots));
5963 // No match rule because this operand is only generated in matching
5964
5965 format %{ "[$reg]" %}
5966 interface(MEMORY_INTER) %{
5967 base(0x4); // RSP
5968 index(0x4); // No Index
5969 scale(0x0); // No Scale
5970 disp($reg); // Stack Offset
5971 %}
5972 %}
5973
5974 operand stackSlotI(sRegI reg)
5975 %{
5976 constraint(ALLOC_IN_RC(stack_slots));
5977 // No match rule because this operand is only generated in matching
5978
5979 format %{ "[$reg]" %}
5980 interface(MEMORY_INTER) %{
5981 base(0x4); // RSP
5982 index(0x4); // No Index
5983 scale(0x0); // No Scale
5984 disp($reg); // Stack Offset
5985 %}
5986 %}
5987
5988 operand stackSlotF(sRegF reg)
5989 %{
5990 constraint(ALLOC_IN_RC(stack_slots));
5991 // No match rule because this operand is only generated in matching
5992
5993 format %{ "[$reg]" %}
5994 interface(MEMORY_INTER) %{
5995 base(0x4); // RSP
5996 index(0x4); // No Index
5997 scale(0x0); // No Scale
5998 disp($reg); // Stack Offset
5999 %}
6000 %}
6001
6002 operand stackSlotD(sRegD reg)
6003 %{
6004 constraint(ALLOC_IN_RC(stack_slots));
6005 // No match rule because this operand is only generated in matching
6006
6007 format %{ "[$reg]" %}
6008 interface(MEMORY_INTER) %{
6009 base(0x4); // RSP
6010 index(0x4); // No Index
6011 scale(0x0); // No Scale
6012 disp($reg); // Stack Offset
6013 %}
6014 %}
6015 operand stackSlotL(sRegL reg)
6016 %{
6017 constraint(ALLOC_IN_RC(stack_slots));
6018 // No match rule because this operand is only generated in matching
6019
6020 format %{ "[$reg]" %}
6021 interface(MEMORY_INTER) %{
6022 base(0x4); // RSP
6023 index(0x4); // No Index
6024 scale(0x0); // No Scale
6025 disp($reg); // Stack Offset
6026 %}
6027 %}
6028
6029 //----------Conditional Branch Operands----------------------------------------
6030 // Comparison Op - This is the operation of the comparison, and is limited to
6031 // the following set of codes:
6032 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6033 //
6034 // Other attributes of the comparison, such as unsignedness, are specified
6035 // by the comparison instruction that sets a condition code flags register.
6036 // That result is represented by a flags operand whose subtype is appropriate
6037 // to the unsignedness (etc.) of the comparison.
6038 //
6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6042
6043 // Comparison Code
6044 operand cmpOp()
6045 %{
6046 match(Bool);
6047
6048 format %{ "" %}
6049 interface(COND_INTER) %{
6050 equal(0x4, "e");
6051 not_equal(0x5, "ne");
6052 less(0xc, "l");
6053 greater_equal(0xd, "ge");
6054 less_equal(0xe, "le");
6055 greater(0xf, "g");
6056 overflow(0x0, "o");
6057 no_overflow(0x1, "no");
6058 %}
6059 %}
6060
6061 // Comparison Code, unsigned compare. Used by FP also, with
6062 // C2 (unordered) turned into GT or LT already. The other bits
6063 // C0 and C3 are turned into Carry & Zero flags.
6064 operand cmpOpU()
6065 %{
6066 match(Bool);
6067
6068 format %{ "" %}
6069 interface(COND_INTER) %{
6070 equal(0x4, "e");
6071 not_equal(0x5, "ne");
6072 less(0x2, "b");
6073 greater_equal(0x3, "ae");
6074 less_equal(0x6, "be");
6075 greater(0x7, "a");
6076 overflow(0x0, "o");
6077 no_overflow(0x1, "no");
6078 %}
6079 %}
6080
6081
6082 // Floating comparisons that don't require any fixup for the unordered case,
6083 // If both inputs of the comparison are the same, ZF is always set so we
6084 // don't need to use cmpOpUCF2 for eq/ne
6085 operand cmpOpUCF() %{
6086 match(Bool);
6087 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6088 (n->as_Bool()->_test._test == BoolTest::lt ||
6089 n->as_Bool()->_test._test == BoolTest::ge ||
6090 n->as_Bool()->_test._test == BoolTest::le ||
6091 n->as_Bool()->_test._test == BoolTest::gt ||
6092 n->in(1)->in(1) == n->in(1)->in(2)));
6093 format %{ "" %}
6094 interface(COND_INTER) %{
6095 equal(0xb, "np");
6096 not_equal(0xa, "p");
6097 less(0x2, "b");
6098 greater_equal(0x3, "ae");
6099 less_equal(0x6, "be");
6100 greater(0x7, "a");
6101 overflow(0x0, "o");
6102 no_overflow(0x1, "no");
6103 %}
6104 %}
6105
6106
6107 // Floating comparisons that can be fixed up with extra conditional jumps
6108 operand cmpOpUCF2() %{
6109 match(Bool);
6110 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6111 (n->as_Bool()->_test._test == BoolTest::ne ||
6112 n->as_Bool()->_test._test == BoolTest::eq) &&
6113 n->in(1)->in(1) != n->in(1)->in(2));
6114 format %{ "" %}
6115 interface(COND_INTER) %{
6116 equal(0x4, "e");
6117 not_equal(0x5, "ne");
6118 less(0x2, "b");
6119 greater_equal(0x3, "ae");
6120 less_equal(0x6, "be");
6121 greater(0x7, "a");
6122 overflow(0x0, "o");
6123 no_overflow(0x1, "no");
6124 %}
6125 %}
6126
6127
6128 // Floating point comparisons that set condition flags to test more directly,
6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
6131 // latter conditions to ones that use unsigned tests before passing into an
6132 // instruction because the preceding comparison might be based on a three way
6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6134 operand cmpOpUCFE()
6135 %{
6136 match(Bool);
6137 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6138 (n->as_Bool()->_test._test == BoolTest::ne ||
6139 n->as_Bool()->_test._test == BoolTest::eq ||
6140 n->as_Bool()->_test._test == BoolTest::lt ||
6141 n->as_Bool()->_test._test == BoolTest::ge ||
6142 n->as_Bool()->_test._test == BoolTest::le ||
6143 n->as_Bool()->_test._test == BoolTest::gt));
6144
6145 format %{ "" %}
6146 interface(COND_INTER) %{
6147 equal(0x4, "e");
6148 not_equal(0x5, "ne");
6149 less(0x2, "b");
6150 greater_equal(0x3, "ae");
6151 less_equal(0x6, "be");
6152 greater(0x7, "a");
6153 overflow(0x0, "o");
6154 no_overflow(0x1, "no");
6155 %}
6156 %}
6157
6158 // Operands for bound floating pointer register arguments
6159 operand rxmm0() %{
6160 constraint(ALLOC_IN_RC(xmm0_reg));
6161 match(VecX);
6162 format%{%}
6163 interface(REG_INTER);
6164 %}
6165
6166 // Vectors
6167
6168 // Dummy generic vector class. Should be used for all vector operands.
6169 // Replaced with vec[SDXYZ] during post-selection pass.
6170 operand vec() %{
6171 constraint(ALLOC_IN_RC(dynamic));
6172 match(VecX);
6173 match(VecY);
6174 match(VecZ);
6175 match(VecS);
6176 match(VecD);
6177
6178 format %{ %}
6179 interface(REG_INTER);
6180 %}
6181
6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6185 // runtime code generation via reg_class_dynamic.
6186 operand legVec() %{
6187 constraint(ALLOC_IN_RC(dynamic));
6188 match(VecX);
6189 match(VecY);
6190 match(VecZ);
6191 match(VecS);
6192 match(VecD);
6193
6194 format %{ %}
6195 interface(REG_INTER);
6196 %}
6197
6198 // Replaces vec during post-selection cleanup. See above.
6199 operand vecS() %{
6200 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6201 match(VecS);
6202
6203 format %{ %}
6204 interface(REG_INTER);
6205 %}
6206
6207 // Replaces legVec during post-selection cleanup. See above.
6208 operand legVecS() %{
6209 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6210 match(VecS);
6211
6212 format %{ %}
6213 interface(REG_INTER);
6214 %}
6215
6216 // Replaces vec during post-selection cleanup. See above.
6217 operand vecD() %{
6218 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6219 match(VecD);
6220
6221 format %{ %}
6222 interface(REG_INTER);
6223 %}
6224
6225 // Replaces legVec during post-selection cleanup. See above.
6226 operand legVecD() %{
6227 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6228 match(VecD);
6229
6230 format %{ %}
6231 interface(REG_INTER);
6232 %}
6233
6234 // Replaces vec during post-selection cleanup. See above.
6235 operand vecX() %{
6236 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6237 match(VecX);
6238
6239 format %{ %}
6240 interface(REG_INTER);
6241 %}
6242
6243 // Replaces legVec during post-selection cleanup. See above.
6244 operand legVecX() %{
6245 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6246 match(VecX);
6247
6248 format %{ %}
6249 interface(REG_INTER);
6250 %}
6251
6252 // Replaces vec during post-selection cleanup. See above.
6253 operand vecY() %{
6254 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6255 match(VecY);
6256
6257 format %{ %}
6258 interface(REG_INTER);
6259 %}
6260
6261 // Replaces legVec during post-selection cleanup. See above.
6262 operand legVecY() %{
6263 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6264 match(VecY);
6265
6266 format %{ %}
6267 interface(REG_INTER);
6268 %}
6269
6270 // Replaces vec during post-selection cleanup. See above.
6271 operand vecZ() %{
6272 constraint(ALLOC_IN_RC(vectorz_reg));
6273 match(VecZ);
6274
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 // Replaces legVec during post-selection cleanup. See above.
6280 operand legVecZ() %{
6281 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6282 match(VecZ);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------OPERAND CLASSES----------------------------------------------------
6289 // Operand Classes are groups of operands that are used as to simplify
6290 // instruction definitions by not requiring the AD writer to specify separate
6291 // instructions for every form of operand when the instruction accepts
6292 // multiple operand types with the same basic encoding and format. The classic
6293 // case of this is memory operands.
6294
6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6296 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6297 indCompressedOopOffset,
6298 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6299 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6300 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6301
6302 //----------PIPELINE-----------------------------------------------------------
6303 // Rules which define the behavior of the target architectures pipeline.
6304 pipeline %{
6305
6306 //----------ATTRIBUTES---------------------------------------------------------
6307 attributes %{
6308 variable_size_instructions; // Fixed size instructions
6309 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6310 instruction_unit_size = 1; // An instruction is 1 bytes long
6311 instruction_fetch_unit_size = 16; // The processor fetches one line
6312 instruction_fetch_units = 1; // of 16 bytes
6313 %}
6314
6315 //----------RESOURCES----------------------------------------------------------
6316 // Resources are the functional units available to the machine
6317
6318 // Generic P2/P3 pipeline
6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6320 // 3 instructions decoded per cycle.
6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6322 // 3 ALU op, only ALU0 handles mul instructions.
6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6324 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6325 BR, FPU,
6326 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6327
6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
6329 // Pipeline Description specifies the stages in the machine's pipeline
6330
6331 // Generic P2/P3 pipeline
6332 pipe_desc(S0, S1, S2, S3, S4, S5);
6333
6334 //----------PIPELINE CLASSES---------------------------------------------------
6335 // Pipeline Classes describe the stages in which input and output are
6336 // referenced by the hardware pipeline.
6337
6338 // Naming convention: ialu or fpu
6339 // Then: _reg
6340 // Then: _reg if there is a 2nd register
6341 // Then: _long if it's a pair of instructions implementing a long
6342 // Then: _fat if it requires the big decoder
6343 // Or: _mem if it requires the big decoder and a memory unit.
6344
6345 // Integer ALU reg operation
6346 pipe_class ialu_reg(rRegI dst)
6347 %{
6348 single_instruction;
6349 dst : S4(write);
6350 dst : S3(read);
6351 DECODE : S0; // any decoder
6352 ALU : S3; // any alu
6353 %}
6354
6355 // Long ALU reg operation
6356 pipe_class ialu_reg_long(rRegL dst)
6357 %{
6358 instruction_count(2);
6359 dst : S4(write);
6360 dst : S3(read);
6361 DECODE : S0(2); // any 2 decoders
6362 ALU : S3(2); // both alus
6363 %}
6364
6365 // Integer ALU reg operation using big decoder
6366 pipe_class ialu_reg_fat(rRegI dst)
6367 %{
6368 single_instruction;
6369 dst : S4(write);
6370 dst : S3(read);
6371 D0 : S0; // big decoder only
6372 ALU : S3; // any alu
6373 %}
6374
6375 // Integer ALU reg-reg operation
6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6377 %{
6378 single_instruction;
6379 dst : S4(write);
6380 src : S3(read);
6381 DECODE : S0; // any decoder
6382 ALU : S3; // any alu
6383 %}
6384
6385 // Integer ALU reg-reg operation
6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6387 %{
6388 single_instruction;
6389 dst : S4(write);
6390 src : S3(read);
6391 D0 : S0; // big decoder only
6392 ALU : S3; // any alu
6393 %}
6394
6395 // Integer ALU reg-mem operation
6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6397 %{
6398 single_instruction;
6399 dst : S5(write);
6400 mem : S3(read);
6401 D0 : S0; // big decoder only
6402 ALU : S4; // any alu
6403 MEM : S3; // any mem
6404 %}
6405
6406 // Integer mem operation (prefetch)
6407 pipe_class ialu_mem(memory mem)
6408 %{
6409 single_instruction;
6410 mem : S3(read);
6411 D0 : S0; // big decoder only
6412 MEM : S3; // any mem
6413 %}
6414
6415 // Integer Store to Memory
6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
6417 %{
6418 single_instruction;
6419 mem : S3(read);
6420 src : S5(read);
6421 D0 : S0; // big decoder only
6422 ALU : S4; // any alu
6423 MEM : S3;
6424 %}
6425
6426 // // Long Store to Memory
6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6428 // %{
6429 // instruction_count(2);
6430 // mem : S3(read);
6431 // src : S5(read);
6432 // D0 : S0(2); // big decoder only; twice
6433 // ALU : S4(2); // any 2 alus
6434 // MEM : S3(2); // Both mems
6435 // %}
6436
6437 // Integer Store to Memory
6438 pipe_class ialu_mem_imm(memory mem)
6439 %{
6440 single_instruction;
6441 mem : S3(read);
6442 D0 : S0; // big decoder only
6443 ALU : S4; // any alu
6444 MEM : S3;
6445 %}
6446
6447 // Integer ALU0 reg-reg operation
6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6449 %{
6450 single_instruction;
6451 dst : S4(write);
6452 src : S3(read);
6453 D0 : S0; // Big decoder only
6454 ALU0 : S3; // only alu0
6455 %}
6456
6457 // Integer ALU0 reg-mem operation
6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6459 %{
6460 single_instruction;
6461 dst : S5(write);
6462 mem : S3(read);
6463 D0 : S0; // big decoder only
6464 ALU0 : S4; // ALU0 only
6465 MEM : S3; // any mem
6466 %}
6467
6468 // Integer ALU reg-reg operation
6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6470 %{
6471 single_instruction;
6472 cr : S4(write);
6473 src1 : S3(read);
6474 src2 : S3(read);
6475 DECODE : S0; // any decoder
6476 ALU : S3; // any alu
6477 %}
6478
6479 // Integer ALU reg-imm operation
6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6481 %{
6482 single_instruction;
6483 cr : S4(write);
6484 src1 : S3(read);
6485 DECODE : S0; // any decoder
6486 ALU : S3; // any alu
6487 %}
6488
6489 // Integer ALU reg-mem operation
6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6491 %{
6492 single_instruction;
6493 cr : S4(write);
6494 src1 : S3(read);
6495 src2 : S3(read);
6496 D0 : S0; // big decoder only
6497 ALU : S4; // any alu
6498 MEM : S3;
6499 %}
6500
6501 // Conditional move reg-reg
6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6503 %{
6504 instruction_count(4);
6505 y : S4(read);
6506 q : S3(read);
6507 p : S3(read);
6508 DECODE : S0(4); // any decoder
6509 %}
6510
6511 // Conditional move reg-reg
6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6513 %{
6514 single_instruction;
6515 dst : S4(write);
6516 src : S3(read);
6517 cr : S3(read);
6518 DECODE : S0; // any decoder
6519 %}
6520
6521 // Conditional move reg-mem
6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6523 %{
6524 single_instruction;
6525 dst : S4(write);
6526 src : S3(read);
6527 cr : S3(read);
6528 DECODE : S0; // any decoder
6529 MEM : S3;
6530 %}
6531
6532 // Conditional move reg-reg long
6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6534 %{
6535 single_instruction;
6536 dst : S4(write);
6537 src : S3(read);
6538 cr : S3(read);
6539 DECODE : S0(2); // any 2 decoders
6540 %}
6541
6542 // Float reg-reg operation
6543 pipe_class fpu_reg(regD dst)
6544 %{
6545 instruction_count(2);
6546 dst : S3(read);
6547 DECODE : S0(2); // any 2 decoders
6548 FPU : S3;
6549 %}
6550
6551 // Float reg-reg operation
6552 pipe_class fpu_reg_reg(regD dst, regD src)
6553 %{
6554 instruction_count(2);
6555 dst : S4(write);
6556 src : S3(read);
6557 DECODE : S0(2); // any 2 decoders
6558 FPU : S3;
6559 %}
6560
6561 // Float reg-reg operation
6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6563 %{
6564 instruction_count(3);
6565 dst : S4(write);
6566 src1 : S3(read);
6567 src2 : S3(read);
6568 DECODE : S0(3); // any 3 decoders
6569 FPU : S3(2);
6570 %}
6571
6572 // Float reg-reg operation
6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6574 %{
6575 instruction_count(4);
6576 dst : S4(write);
6577 src1 : S3(read);
6578 src2 : S3(read);
6579 src3 : S3(read);
6580 DECODE : S0(4); // any 3 decoders
6581 FPU : S3(2);
6582 %}
6583
6584 // Float reg-reg operation
6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6586 %{
6587 instruction_count(4);
6588 dst : S4(write);
6589 src1 : S3(read);
6590 src2 : S3(read);
6591 src3 : S3(read);
6592 DECODE : S1(3); // any 3 decoders
6593 D0 : S0; // Big decoder only
6594 FPU : S3(2);
6595 MEM : S3;
6596 %}
6597
6598 // Float reg-mem operation
6599 pipe_class fpu_reg_mem(regD dst, memory mem)
6600 %{
6601 instruction_count(2);
6602 dst : S5(write);
6603 mem : S3(read);
6604 D0 : S0; // big decoder only
6605 DECODE : S1; // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // Float reg-mem operation
6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6612 %{
6613 instruction_count(3);
6614 dst : S5(write);
6615 src1 : S3(read);
6616 mem : S3(read);
6617 D0 : S0; // big decoder only
6618 DECODE : S1(2); // any decoder for FPU POP
6619 FPU : S4;
6620 MEM : S3; // any mem
6621 %}
6622
6623 // Float mem-reg operation
6624 pipe_class fpu_mem_reg(memory mem, regD src)
6625 %{
6626 instruction_count(2);
6627 src : S5(read);
6628 mem : S3(read);
6629 DECODE : S0; // any decoder for FPU PUSH
6630 D0 : S1; // big decoder only
6631 FPU : S4;
6632 MEM : S3; // any mem
6633 %}
6634
6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6636 %{
6637 instruction_count(3);
6638 src1 : S3(read);
6639 src2 : S3(read);
6640 mem : S3(read);
6641 DECODE : S0(2); // any decoder for FPU PUSH
6642 D0 : S1; // big decoder only
6643 FPU : S4;
6644 MEM : S3; // any mem
6645 %}
6646
6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6648 %{
6649 instruction_count(3);
6650 src1 : S3(read);
6651 src2 : S3(read);
6652 mem : S4(read);
6653 DECODE : S0; // any decoder for FPU PUSH
6654 D0 : S0(2); // big decoder only
6655 FPU : S4;
6656 MEM : S3(2); // any mem
6657 %}
6658
6659 pipe_class fpu_mem_mem(memory dst, memory src1)
6660 %{
6661 instruction_count(2);
6662 src1 : S3(read);
6663 dst : S4(read);
6664 D0 : S0(2); // big decoder only
6665 MEM : S3(2); // any mem
6666 %}
6667
6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6669 %{
6670 instruction_count(3);
6671 src1 : S3(read);
6672 src2 : S3(read);
6673 dst : S4(read);
6674 D0 : S0(3); // big decoder only
6675 FPU : S4;
6676 MEM : S3(3); // any mem
6677 %}
6678
6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6680 %{
6681 instruction_count(3);
6682 src1 : S4(read);
6683 mem : S4(read);
6684 DECODE : S0; // any decoder for FPU PUSH
6685 D0 : S0(2); // big decoder only
6686 FPU : S4;
6687 MEM : S3(2); // any mem
6688 %}
6689
6690 // Float load constant
6691 pipe_class fpu_reg_con(regD dst)
6692 %{
6693 instruction_count(2);
6694 dst : S5(write);
6695 D0 : S0; // big decoder only for the load
6696 DECODE : S1; // any decoder for FPU POP
6697 FPU : S4;
6698 MEM : S3; // any mem
6699 %}
6700
6701 // Float load constant
6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
6703 %{
6704 instruction_count(3);
6705 dst : S5(write);
6706 src : S3(read);
6707 D0 : S0; // big decoder only for the load
6708 DECODE : S1(2); // any decoder for FPU POP
6709 FPU : S4;
6710 MEM : S3; // any mem
6711 %}
6712
6713 // UnConditional branch
6714 pipe_class pipe_jmp(label labl)
6715 %{
6716 single_instruction;
6717 BR : S3;
6718 %}
6719
6720 // Conditional branch
6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6722 %{
6723 single_instruction;
6724 cr : S1(read);
6725 BR : S3;
6726 %}
6727
6728 // Allocation idiom
6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6730 %{
6731 instruction_count(1); force_serialization;
6732 fixed_latency(6);
6733 heap_ptr : S3(read);
6734 DECODE : S0(3);
6735 D0 : S2;
6736 MEM : S3;
6737 ALU : S3(2);
6738 dst : S5(write);
6739 BR : S5;
6740 %}
6741
6742 // Generic big/slow expanded idiom
6743 pipe_class pipe_slow()
6744 %{
6745 instruction_count(10); multiple_bundles; force_serialization;
6746 fixed_latency(100);
6747 D0 : S0(2);
6748 MEM : S3(2);
6749 %}
6750
6751 // The real do-nothing guy
6752 pipe_class empty()
6753 %{
6754 instruction_count(0);
6755 %}
6756
6757 // Define the class for the Nop node
6758 define
6759 %{
6760 MachNop = empty;
6761 %}
6762
6763 %}
6764
6765 //----------INSTRUCTIONS-------------------------------------------------------
6766 //
6767 // match -- States which machine-independent subtree may be replaced
6768 // by this instruction.
6769 // ins_cost -- The estimated cost of this instruction is used by instruction
6770 // selection to identify a minimum cost tree of machine
6771 // instructions that matches a tree of machine-independent
6772 // instructions.
6773 // format -- A string providing the disassembly for this instruction.
6774 // The value of an instruction's operand may be inserted
6775 // by referring to it with a '$' prefix.
6776 // opcode -- Three instruction opcodes may be provided. These are referred
6777 // to within an encode class as $primary, $secondary, and $tertiary
6778 // rrspectively. The primary opcode is commonly used to
6779 // indicate the type of machine instruction, while secondary
6780 // and tertiary are often used for prefix options or addressing
6781 // modes.
6782 // ins_encode -- A list of encode classes with parameters. The encode class
6783 // name must have been defined in an 'enc_class' specification
6784 // in the encode section of the architecture description.
6785
6786 // ============================================================================
6787
6788 instruct ShouldNotReachHere() %{
6789 match(Halt);
6790 format %{ "stop\t# ShouldNotReachHere" %}
6791 ins_encode %{
6792 if (is_reachable()) {
6793 const char* str = __ code_string(_halt_reason);
6794 __ stop(str);
6795 }
6796 %}
6797 ins_pipe(pipe_slow);
6798 %}
6799
6800 // ============================================================================
6801
6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6803 // Load Float
6804 instruct MoveF2VL(vlRegF dst, regF src) %{
6805 match(Set dst src);
6806 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6807 ins_encode %{
6808 ShouldNotReachHere();
6809 %}
6810 ins_pipe( fpu_reg_reg );
6811 %}
6812
6813 // Load Float
6814 instruct MoveF2LEG(legRegF dst, regF src) %{
6815 match(Set dst src);
6816 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6817 ins_encode %{
6818 ShouldNotReachHere();
6819 %}
6820 ins_pipe( fpu_reg_reg );
6821 %}
6822
6823 // Load Float
6824 instruct MoveVL2F(regF dst, vlRegF src) %{
6825 match(Set dst src);
6826 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6827 ins_encode %{
6828 ShouldNotReachHere();
6829 %}
6830 ins_pipe( fpu_reg_reg );
6831 %}
6832
6833 // Load Float
6834 instruct MoveLEG2F(regF dst, legRegF src) %{
6835 match(Set dst src);
6836 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6837 ins_encode %{
6838 ShouldNotReachHere();
6839 %}
6840 ins_pipe( fpu_reg_reg );
6841 %}
6842
6843 // Load Double
6844 instruct MoveD2VL(vlRegD dst, regD src) %{
6845 match(Set dst src);
6846 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6847 ins_encode %{
6848 ShouldNotReachHere();
6849 %}
6850 ins_pipe( fpu_reg_reg );
6851 %}
6852
6853 // Load Double
6854 instruct MoveD2LEG(legRegD dst, regD src) %{
6855 match(Set dst src);
6856 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6857 ins_encode %{
6858 ShouldNotReachHere();
6859 %}
6860 ins_pipe( fpu_reg_reg );
6861 %}
6862
6863 // Load Double
6864 instruct MoveVL2D(regD dst, vlRegD src) %{
6865 match(Set dst src);
6866 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6867 ins_encode %{
6868 ShouldNotReachHere();
6869 %}
6870 ins_pipe( fpu_reg_reg );
6871 %}
6872
6873 // Load Double
6874 instruct MoveLEG2D(regD dst, legRegD src) %{
6875 match(Set dst src);
6876 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6877 ins_encode %{
6878 ShouldNotReachHere();
6879 %}
6880 ins_pipe( fpu_reg_reg );
6881 %}
6882
6883 //----------Load/Store/Move Instructions---------------------------------------
6884 //----------Load Instructions--------------------------------------------------
6885
6886 // Load Byte (8 bit signed)
6887 instruct loadB(rRegI dst, memory mem)
6888 %{
6889 match(Set dst (LoadB mem));
6890
6891 ins_cost(125);
6892 format %{ "movsbl $dst, $mem\t# byte" %}
6893
6894 ins_encode %{
6895 __ movsbl($dst$$Register, $mem$$Address);
6896 %}
6897
6898 ins_pipe(ialu_reg_mem);
6899 %}
6900
6901 // Load Byte (8 bit signed) into Long Register
6902 instruct loadB2L(rRegL dst, memory mem)
6903 %{
6904 match(Set dst (ConvI2L (LoadB mem)));
6905
6906 ins_cost(125);
6907 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6908
6909 ins_encode %{
6910 __ movsbq($dst$$Register, $mem$$Address);
6911 %}
6912
6913 ins_pipe(ialu_reg_mem);
6914 %}
6915
6916 // Load Unsigned Byte (8 bit UNsigned)
6917 instruct loadUB(rRegI dst, memory mem)
6918 %{
6919 match(Set dst (LoadUB mem));
6920
6921 ins_cost(125);
6922 format %{ "movzbl $dst, $mem\t# ubyte" %}
6923
6924 ins_encode %{
6925 __ movzbl($dst$$Register, $mem$$Address);
6926 %}
6927
6928 ins_pipe(ialu_reg_mem);
6929 %}
6930
6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6932 instruct loadUB2L(rRegL dst, memory mem)
6933 %{
6934 match(Set dst (ConvI2L (LoadUB mem)));
6935
6936 ins_cost(125);
6937 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6938
6939 ins_encode %{
6940 __ movzbq($dst$$Register, $mem$$Address);
6941 %}
6942
6943 ins_pipe(ialu_reg_mem);
6944 %}
6945
6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6948 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6949 effect(KILL cr);
6950
6951 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6952 "andl $dst, right_n_bits($mask, 8)" %}
6953 ins_encode %{
6954 Register Rdst = $dst$$Register;
6955 __ movzbq(Rdst, $mem$$Address);
6956 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6957 %}
6958 ins_pipe(ialu_reg_mem);
6959 %}
6960
6961 // Load Short (16 bit signed)
6962 instruct loadS(rRegI dst, memory mem)
6963 %{
6964 match(Set dst (LoadS mem));
6965
6966 ins_cost(125);
6967 format %{ "movswl $dst, $mem\t# short" %}
6968
6969 ins_encode %{
6970 __ movswl($dst$$Register, $mem$$Address);
6971 %}
6972
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Short (16 bit signed) to Byte (8 bit signed)
6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6978 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6979
6980 ins_cost(125);
6981 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6982 ins_encode %{
6983 __ movsbl($dst$$Register, $mem$$Address);
6984 %}
6985 ins_pipe(ialu_reg_mem);
6986 %}
6987
6988 // Load Short (16 bit signed) into Long Register
6989 instruct loadS2L(rRegL dst, memory mem)
6990 %{
6991 match(Set dst (ConvI2L (LoadS mem)));
6992
6993 ins_cost(125);
6994 format %{ "movswq $dst, $mem\t# short -> long" %}
6995
6996 ins_encode %{
6997 __ movswq($dst$$Register, $mem$$Address);
6998 %}
6999
7000 ins_pipe(ialu_reg_mem);
7001 %}
7002
7003 // Load Unsigned Short/Char (16 bit UNsigned)
7004 instruct loadUS(rRegI dst, memory mem)
7005 %{
7006 match(Set dst (LoadUS mem));
7007
7008 ins_cost(125);
7009 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7010
7011 ins_encode %{
7012 __ movzwl($dst$$Register, $mem$$Address);
7013 %}
7014
7015 ins_pipe(ialu_reg_mem);
7016 %}
7017
7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7020 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7021
7022 ins_cost(125);
7023 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7024 ins_encode %{
7025 __ movsbl($dst$$Register, $mem$$Address);
7026 %}
7027 ins_pipe(ialu_reg_mem);
7028 %}
7029
7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7031 instruct loadUS2L(rRegL dst, memory mem)
7032 %{
7033 match(Set dst (ConvI2L (LoadUS mem)));
7034
7035 ins_cost(125);
7036 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7037
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041
7042 ins_pipe(ialu_reg_mem);
7043 %}
7044
7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7047 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7048
7049 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7050 ins_encode %{
7051 __ movzbq($dst$$Register, $mem$$Address);
7052 %}
7053 ins_pipe(ialu_reg_mem);
7054 %}
7055
7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7058 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7059 effect(KILL cr);
7060
7061 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7062 "andl $dst, right_n_bits($mask, 16)" %}
7063 ins_encode %{
7064 Register Rdst = $dst$$Register;
7065 __ movzwq(Rdst, $mem$$Address);
7066 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7067 %}
7068 ins_pipe(ialu_reg_mem);
7069 %}
7070
7071 // Load Integer
7072 instruct loadI(rRegI dst, memory mem)
7073 %{
7074 match(Set dst (LoadI mem));
7075
7076 ins_cost(125);
7077 format %{ "movl $dst, $mem\t# int" %}
7078
7079 ins_encode %{
7080 __ movl($dst$$Register, $mem$$Address);
7081 %}
7082
7083 ins_pipe(ialu_reg_mem);
7084 %}
7085
7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7088 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7089
7090 ins_cost(125);
7091 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7092 ins_encode %{
7093 __ movsbl($dst$$Register, $mem$$Address);
7094 %}
7095 ins_pipe(ialu_reg_mem);
7096 %}
7097
7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7100 match(Set dst (AndI (LoadI mem) mask));
7101
7102 ins_cost(125);
7103 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7104 ins_encode %{
7105 __ movzbl($dst$$Register, $mem$$Address);
7106 %}
7107 ins_pipe(ialu_reg_mem);
7108 %}
7109
7110 // Load Integer (32 bit signed) to Short (16 bit signed)
7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7112 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7113
7114 ins_cost(125);
7115 format %{ "movswl $dst, $mem\t# int -> short" %}
7116 ins_encode %{
7117 __ movswl($dst$$Register, $mem$$Address);
7118 %}
7119 ins_pipe(ialu_reg_mem);
7120 %}
7121
7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7124 match(Set dst (AndI (LoadI mem) mask));
7125
7126 ins_cost(125);
7127 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7128 ins_encode %{
7129 __ movzwl($dst$$Register, $mem$$Address);
7130 %}
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer into Long Register
7135 instruct loadI2L(rRegL dst, memory mem)
7136 %{
7137 match(Set dst (ConvI2L (LoadI mem)));
7138
7139 ins_cost(125);
7140 format %{ "movslq $dst, $mem\t# int -> long" %}
7141
7142 ins_encode %{
7143 __ movslq($dst$$Register, $mem$$Address);
7144 %}
7145
7146 ins_pipe(ialu_reg_mem);
7147 %}
7148
7149 // Load Integer with mask 0xFF into Long Register
7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7151 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7152
7153 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7154 ins_encode %{
7155 __ movzbq($dst$$Register, $mem$$Address);
7156 %}
7157 ins_pipe(ialu_reg_mem);
7158 %}
7159
7160 // Load Integer with mask 0xFFFF into Long Register
7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7162 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7163
7164 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7165 ins_encode %{
7166 __ movzwq($dst$$Register, $mem$$Address);
7167 %}
7168 ins_pipe(ialu_reg_mem);
7169 %}
7170
7171 // Load Integer with a 31-bit mask into Long Register
7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7173 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7174 effect(KILL cr);
7175
7176 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7177 "andl $dst, $mask" %}
7178 ins_encode %{
7179 Register Rdst = $dst$$Register;
7180 __ movl(Rdst, $mem$$Address);
7181 __ andl(Rdst, $mask$$constant);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Unsigned Integer into Long Register
7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7188 %{
7189 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7190
7191 ins_cost(125);
7192 format %{ "movl $dst, $mem\t# uint -> long" %}
7193
7194 ins_encode %{
7195 __ movl($dst$$Register, $mem$$Address);
7196 %}
7197
7198 ins_pipe(ialu_reg_mem);
7199 %}
7200
7201 // Load Long
7202 instruct loadL(rRegL dst, memory mem)
7203 %{
7204 match(Set dst (LoadL mem));
7205
7206 ins_cost(125);
7207 format %{ "movq $dst, $mem\t# long" %}
7208
7209 ins_encode %{
7210 __ movq($dst$$Register, $mem$$Address);
7211 %}
7212
7213 ins_pipe(ialu_reg_mem); // XXX
7214 %}
7215
7216 // Load Range
7217 instruct loadRange(rRegI dst, memory mem)
7218 %{
7219 match(Set dst (LoadRange mem));
7220
7221 ins_cost(125); // XXX
7222 format %{ "movl $dst, $mem\t# range" %}
7223 ins_encode %{
7224 __ movl($dst$$Register, $mem$$Address);
7225 %}
7226 ins_pipe(ialu_reg_mem);
7227 %}
7228
7229 // Load Pointer
7230 instruct loadP(rRegP dst, memory mem)
7231 %{
7232 match(Set dst (LoadP mem));
7233 predicate(n->as_Load()->barrier_data() == 0);
7234
7235 ins_cost(125); // XXX
7236 format %{ "movq $dst, $mem\t# ptr" %}
7237 ins_encode %{
7238 __ movq($dst$$Register, $mem$$Address);
7239 %}
7240 ins_pipe(ialu_reg_mem); // XXX
7241 %}
7242
7243 // Load Compressed Pointer
7244 instruct loadN(rRegN dst, memory mem)
7245 %{
7246 predicate(n->as_Load()->barrier_data() == 0);
7247 match(Set dst (LoadN mem));
7248
7249 ins_cost(125); // XXX
7250 format %{ "movl $dst, $mem\t# compressed ptr" %}
7251 ins_encode %{
7252 __ movl($dst$$Register, $mem$$Address);
7253 %}
7254 ins_pipe(ialu_reg_mem); // XXX
7255 %}
7256
7257
7258 // Load Klass Pointer
7259 instruct loadKlass(rRegP dst, memory mem)
7260 %{
7261 match(Set dst (LoadKlass mem));
7262
7263 ins_cost(125); // XXX
7264 format %{ "movq $dst, $mem\t# class" %}
7265 ins_encode %{
7266 __ movq($dst$$Register, $mem$$Address);
7267 %}
7268 ins_pipe(ialu_reg_mem); // XXX
7269 %}
7270
7271 // Load narrow Klass Pointer
7272 instruct loadNKlass(rRegN dst, memory mem)
7273 %{
7274 predicate(!UseCompactObjectHeaders);
7275 match(Set dst (LoadNKlass mem));
7276
7277 ins_cost(125); // XXX
7278 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7279 ins_encode %{
7280 __ movl($dst$$Register, $mem$$Address);
7281 %}
7282 ins_pipe(ialu_reg_mem); // XXX
7283 %}
7284
7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7286 %{
7287 predicate(UseCompactObjectHeaders);
7288 match(Set dst (LoadNKlass mem));
7289 effect(KILL cr);
7290 ins_cost(125);
7291 format %{
7292 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7293 "shrl $dst, markWord::klass_shift_at_offset"
7294 %}
7295 ins_encode %{
7296 if (UseAPX) {
7297 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7298 }
7299 else {
7300 __ movl($dst$$Register, $mem$$Address);
7301 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7302 }
7303 %}
7304 ins_pipe(ialu_reg_mem);
7305 %}
7306
7307 // Load Float
7308 instruct loadF(regF dst, memory mem)
7309 %{
7310 match(Set dst (LoadF mem));
7311
7312 ins_cost(145); // XXX
7313 format %{ "movss $dst, $mem\t# float" %}
7314 ins_encode %{
7315 __ movflt($dst$$XMMRegister, $mem$$Address);
7316 %}
7317 ins_pipe(pipe_slow); // XXX
7318 %}
7319
7320 // Load Double
7321 instruct loadD_partial(regD dst, memory mem)
7322 %{
7323 predicate(!UseXmmLoadAndClearUpper);
7324 match(Set dst (LoadD mem));
7325
7326 ins_cost(145); // XXX
7327 format %{ "movlpd $dst, $mem\t# double" %}
7328 ins_encode %{
7329 __ movdbl($dst$$XMMRegister, $mem$$Address);
7330 %}
7331 ins_pipe(pipe_slow); // XXX
7332 %}
7333
7334 instruct loadD(regD dst, memory mem)
7335 %{
7336 predicate(UseXmmLoadAndClearUpper);
7337 match(Set dst (LoadD mem));
7338
7339 ins_cost(145); // XXX
7340 format %{ "movsd $dst, $mem\t# double" %}
7341 ins_encode %{
7342 __ movdbl($dst$$XMMRegister, $mem$$Address);
7343 %}
7344 ins_pipe(pipe_slow); // XXX
7345 %}
7346
7347 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7348 %{
7349 match(Set dst con);
7350
7351 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7352
7353 ins_encode %{
7354 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7355 %}
7356
7357 ins_pipe(ialu_reg_fat);
7358 %}
7359
7360 // max = java.lang.Math.max(float a, float b)
7361 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7362 predicate(VM_Version::supports_avx10_2());
7363 match(Set dst (MaxF a b));
7364 format %{ "maxF $dst, $a, $b" %}
7365 ins_encode %{
7366 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7367 %}
7368 ins_pipe( pipe_slow );
7369 %}
7370
7371 // max = java.lang.Math.max(float a, float b)
7372 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7373 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7374 match(Set dst (MaxF a b));
7375 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7376 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7377 ins_encode %{
7378 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7379 %}
7380 ins_pipe( pipe_slow );
7381 %}
7382
7383 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7384 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7385 match(Set dst (MaxF a b));
7386 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7387
7388 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7389 ins_encode %{
7390 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7391 false /*min*/, true /*single*/);
7392 %}
7393 ins_pipe( pipe_slow );
7394 %}
7395
7396 // max = java.lang.Math.max(double a, double b)
7397 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7398 predicate(VM_Version::supports_avx10_2());
7399 match(Set dst (MaxD a b));
7400 format %{ "maxD $dst, $a, $b" %}
7401 ins_encode %{
7402 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7403 %}
7404 ins_pipe( pipe_slow );
7405 %}
7406
7407 // max = java.lang.Math.max(double a, double b)
7408 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7409 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7410 match(Set dst (MaxD a b));
7411 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7412 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7413 ins_encode %{
7414 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7415 %}
7416 ins_pipe( pipe_slow );
7417 %}
7418
7419 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7420 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7421 match(Set dst (MaxD a b));
7422 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7423
7424 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7425 ins_encode %{
7426 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7427 false /*min*/, false /*single*/);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 // max = java.lang.Math.min(float a, float b)
7433 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7434 predicate(VM_Version::supports_avx10_2());
7435 match(Set dst (MinF a b));
7436 format %{ "minF $dst, $a, $b" %}
7437 ins_encode %{
7438 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7439 %}
7440 ins_pipe( pipe_slow );
7441 %}
7442
7443 // min = java.lang.Math.min(float a, float b)
7444 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7445 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7446 match(Set dst (MinF a b));
7447 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7448 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7449 ins_encode %{
7450 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7451 %}
7452 ins_pipe( pipe_slow );
7453 %}
7454
7455 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7456 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7457 match(Set dst (MinF a b));
7458 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7459
7460 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7461 ins_encode %{
7462 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7463 true /*min*/, true /*single*/);
7464 %}
7465 ins_pipe( pipe_slow );
7466 %}
7467
7468 // max = java.lang.Math.min(double a, double b)
7469 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7470 predicate(VM_Version::supports_avx10_2());
7471 match(Set dst (MinD a b));
7472 format %{ "minD $dst, $a, $b" %}
7473 ins_encode %{
7474 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7475 %}
7476 ins_pipe( pipe_slow );
7477 %}
7478
7479 // min = java.lang.Math.min(double a, double b)
7480 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7481 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7482 match(Set dst (MinD a b));
7483 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7484 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7485 ins_encode %{
7486 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7487 %}
7488 ins_pipe( pipe_slow );
7489 %}
7490
7491 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7492 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7493 match(Set dst (MinD a b));
7494 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7495
7496 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7497 ins_encode %{
7498 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7499 true /*min*/, false /*single*/);
7500 %}
7501 ins_pipe( pipe_slow );
7502 %}
7503
7504 // Load Effective Address
7505 instruct leaP8(rRegP dst, indOffset8 mem)
7506 %{
7507 match(Set dst mem);
7508
7509 ins_cost(110); // XXX
7510 format %{ "leaq $dst, $mem\t# ptr 8" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaP32(rRegP dst, indOffset32 mem)
7518 %{
7519 match(Set dst mem);
7520
7521 ins_cost(110);
7522 format %{ "leaq $dst, $mem\t# ptr 32" %}
7523 ins_encode %{
7524 __ leaq($dst$$Register, $mem$$Address);
7525 %}
7526 ins_pipe(ialu_reg_reg_fat);
7527 %}
7528
7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7530 %{
7531 match(Set dst mem);
7532
7533 ins_cost(110);
7534 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7535 ins_encode %{
7536 __ leaq($dst$$Register, $mem$$Address);
7537 %}
7538 ins_pipe(ialu_reg_reg_fat);
7539 %}
7540
7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7542 %{
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7554 %{
7555 match(Set dst mem);
7556
7557 ins_cost(110);
7558 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7559 ins_encode %{
7560 __ leaq($dst$$Register, $mem$$Address);
7561 %}
7562 ins_pipe(ialu_reg_reg_fat);
7563 %}
7564
7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7590 %{
7591 match(Set dst mem);
7592
7593 ins_cost(110);
7594 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7595 ins_encode %{
7596 __ leaq($dst$$Register, $mem$$Address);
7597 %}
7598 ins_pipe(ialu_reg_reg_fat);
7599 %}
7600
7601 // Load Effective Address which uses Narrow (32-bits) oop
7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7603 %{
7604 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7605 match(Set dst mem);
7606
7607 ins_cost(110);
7608 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7609 ins_encode %{
7610 __ leaq($dst$$Register, $mem$$Address);
7611 %}
7612 ins_pipe(ialu_reg_reg_fat);
7613 %}
7614
7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7616 %{
7617 predicate(CompressedOops::shift() == 0);
7618 match(Set dst mem);
7619
7620 ins_cost(110); // XXX
7621 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7622 ins_encode %{
7623 __ leaq($dst$$Register, $mem$$Address);
7624 %}
7625 ins_pipe(ialu_reg_reg_fat);
7626 %}
7627
7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7629 %{
7630 predicate(CompressedOops::shift() == 0);
7631 match(Set dst mem);
7632
7633 ins_cost(110);
7634 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7635 ins_encode %{
7636 __ leaq($dst$$Register, $mem$$Address);
7637 %}
7638 ins_pipe(ialu_reg_reg_fat);
7639 %}
7640
7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7642 %{
7643 predicate(CompressedOops::shift() == 0);
7644 match(Set dst mem);
7645
7646 ins_cost(110);
7647 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7648 ins_encode %{
7649 __ leaq($dst$$Register, $mem$$Address);
7650 %}
7651 ins_pipe(ialu_reg_reg_fat);
7652 %}
7653
7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7655 %{
7656 predicate(CompressedOops::shift() == 0);
7657 match(Set dst mem);
7658
7659 ins_cost(110);
7660 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7661 ins_encode %{
7662 __ leaq($dst$$Register, $mem$$Address);
7663 %}
7664 ins_pipe(ialu_reg_reg_fat);
7665 %}
7666
7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7668 %{
7669 predicate(CompressedOops::shift() == 0);
7670 match(Set dst mem);
7671
7672 ins_cost(110);
7673 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7674 ins_encode %{
7675 __ leaq($dst$$Register, $mem$$Address);
7676 %}
7677 ins_pipe(ialu_reg_reg_fat);
7678 %}
7679
7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7681 %{
7682 predicate(CompressedOops::shift() == 0);
7683 match(Set dst mem);
7684
7685 ins_cost(110);
7686 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7687 ins_encode %{
7688 __ leaq($dst$$Register, $mem$$Address);
7689 %}
7690 ins_pipe(ialu_reg_reg_fat);
7691 %}
7692
7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7694 %{
7695 predicate(CompressedOops::shift() == 0);
7696 match(Set dst mem);
7697
7698 ins_cost(110);
7699 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7700 ins_encode %{
7701 __ leaq($dst$$Register, $mem$$Address);
7702 %}
7703 ins_pipe(ialu_reg_reg_fat);
7704 %}
7705
7706 instruct loadConI(rRegI dst, immI src)
7707 %{
7708 match(Set dst src);
7709
7710 format %{ "movl $dst, $src\t# int" %}
7711 ins_encode %{
7712 __ movl($dst$$Register, $src$$constant);
7713 %}
7714 ins_pipe(ialu_reg_fat); // XXX
7715 %}
7716
7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7718 %{
7719 match(Set dst src);
7720 effect(KILL cr);
7721
7722 ins_cost(50);
7723 format %{ "xorl $dst, $dst\t# int" %}
7724 ins_encode %{
7725 __ xorl($dst$$Register, $dst$$Register);
7726 %}
7727 ins_pipe(ialu_reg);
7728 %}
7729
7730 instruct loadConL(rRegL dst, immL src)
7731 %{
7732 match(Set dst src);
7733
7734 ins_cost(150);
7735 format %{ "movq $dst, $src\t# long" %}
7736 ins_encode %{
7737 __ mov64($dst$$Register, $src$$constant);
7738 %}
7739 ins_pipe(ialu_reg);
7740 %}
7741
7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7743 %{
7744 match(Set dst src);
7745 effect(KILL cr);
7746
7747 ins_cost(50);
7748 format %{ "xorl $dst, $dst\t# long" %}
7749 ins_encode %{
7750 __ xorl($dst$$Register, $dst$$Register);
7751 %}
7752 ins_pipe(ialu_reg); // XXX
7753 %}
7754
7755 instruct loadConUL32(rRegL dst, immUL32 src)
7756 %{
7757 match(Set dst src);
7758
7759 ins_cost(60);
7760 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7761 ins_encode %{
7762 __ movl($dst$$Register, $src$$constant);
7763 %}
7764 ins_pipe(ialu_reg);
7765 %}
7766
7767 instruct loadConL32(rRegL dst, immL32 src)
7768 %{
7769 match(Set dst src);
7770
7771 ins_cost(70);
7772 format %{ "movq $dst, $src\t# long (32-bit)" %}
7773 ins_encode %{
7774 __ movq($dst$$Register, $src$$constant);
7775 %}
7776 ins_pipe(ialu_reg);
7777 %}
7778
7779 instruct loadConP(rRegP dst, immP con) %{
7780 match(Set dst con);
7781
7782 format %{ "movq $dst, $con\t# ptr" %}
7783 ins_encode %{
7784 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7785 %}
7786 ins_pipe(ialu_reg_fat); // XXX
7787 %}
7788
7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7790 %{
7791 match(Set dst src);
7792 effect(KILL cr);
7793
7794 ins_cost(50);
7795 format %{ "xorl $dst, $dst\t# ptr" %}
7796 ins_encode %{
7797 __ xorl($dst$$Register, $dst$$Register);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7803 %{
7804 match(Set dst src);
7805 effect(KILL cr);
7806
7807 ins_cost(60);
7808 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7809 ins_encode %{
7810 __ movl($dst$$Register, $src$$constant);
7811 %}
7812 ins_pipe(ialu_reg);
7813 %}
7814
7815 instruct loadConF(regF dst, immF con) %{
7816 match(Set dst con);
7817 ins_cost(125);
7818 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7819 ins_encode %{
7820 __ movflt($dst$$XMMRegister, $constantaddress($con));
7821 %}
7822 ins_pipe(pipe_slow);
7823 %}
7824
7825 instruct loadConH(regF dst, immH con) %{
7826 match(Set dst con);
7827 ins_cost(125);
7828 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7829 ins_encode %{
7830 __ movflt($dst$$XMMRegister, $constantaddress($con));
7831 %}
7832 ins_pipe(pipe_slow);
7833 %}
7834
7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7836 match(Set dst src);
7837 effect(KILL cr);
7838 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7839 ins_encode %{
7840 __ xorq($dst$$Register, $dst$$Register);
7841 %}
7842 ins_pipe(ialu_reg);
7843 %}
7844
7845 instruct loadConN(rRegN dst, immN src) %{
7846 match(Set dst src);
7847
7848 ins_cost(125);
7849 format %{ "movl $dst, $src\t# compressed ptr" %}
7850 ins_encode %{
7851 address con = (address)$src$$constant;
7852 if (con == nullptr) {
7853 ShouldNotReachHere();
7854 } else {
7855 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7856 }
7857 %}
7858 ins_pipe(ialu_reg_fat); // XXX
7859 %}
7860
7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7862 match(Set dst src);
7863
7864 ins_cost(125);
7865 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7866 ins_encode %{
7867 address con = (address)$src$$constant;
7868 if (con == nullptr) {
7869 ShouldNotReachHere();
7870 } else {
7871 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7872 }
7873 %}
7874 ins_pipe(ialu_reg_fat); // XXX
7875 %}
7876
7877 instruct loadConF0(regF dst, immF0 src)
7878 %{
7879 match(Set dst src);
7880 ins_cost(100);
7881
7882 format %{ "xorps $dst, $dst\t# float 0.0" %}
7883 ins_encode %{
7884 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7885 %}
7886 ins_pipe(pipe_slow);
7887 %}
7888
7889 // Use the same format since predicate() can not be used here.
7890 instruct loadConD(regD dst, immD con) %{
7891 match(Set dst con);
7892 ins_cost(125);
7893 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7894 ins_encode %{
7895 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7896 %}
7897 ins_pipe(pipe_slow);
7898 %}
7899
7900 instruct loadConD0(regD dst, immD0 src)
7901 %{
7902 match(Set dst src);
7903 ins_cost(100);
7904
7905 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7906 ins_encode %{
7907 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7908 %}
7909 ins_pipe(pipe_slow);
7910 %}
7911
7912 instruct loadSSI(rRegI dst, stackSlotI src)
7913 %{
7914 match(Set dst src);
7915
7916 ins_cost(125);
7917 format %{ "movl $dst, $src\t# int stk" %}
7918 ins_encode %{
7919 __ movl($dst$$Register, $src$$Address);
7920 %}
7921 ins_pipe(ialu_reg_mem);
7922 %}
7923
7924 instruct loadSSL(rRegL dst, stackSlotL src)
7925 %{
7926 match(Set dst src);
7927
7928 ins_cost(125);
7929 format %{ "movq $dst, $src\t# long stk" %}
7930 ins_encode %{
7931 __ movq($dst$$Register, $src$$Address);
7932 %}
7933 ins_pipe(ialu_reg_mem);
7934 %}
7935
7936 instruct loadSSP(rRegP dst, stackSlotP src)
7937 %{
7938 match(Set dst src);
7939
7940 ins_cost(125);
7941 format %{ "movq $dst, $src\t# ptr stk" %}
7942 ins_encode %{
7943 __ movq($dst$$Register, $src$$Address);
7944 %}
7945 ins_pipe(ialu_reg_mem);
7946 %}
7947
7948 instruct loadSSF(regF dst, stackSlotF src)
7949 %{
7950 match(Set dst src);
7951
7952 ins_cost(125);
7953 format %{ "movss $dst, $src\t# float stk" %}
7954 ins_encode %{
7955 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7956 %}
7957 ins_pipe(pipe_slow); // XXX
7958 %}
7959
7960 // Use the same format since predicate() can not be used here.
7961 instruct loadSSD(regD dst, stackSlotD src)
7962 %{
7963 match(Set dst src);
7964
7965 ins_cost(125);
7966 format %{ "movsd $dst, $src\t# double stk" %}
7967 ins_encode %{
7968 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7969 %}
7970 ins_pipe(pipe_slow); // XXX
7971 %}
7972
7973 // Prefetch instructions for allocation.
7974 // Must be safe to execute with invalid address (cannot fault).
7975
7976 instruct prefetchAlloc( memory mem ) %{
7977 predicate(AllocatePrefetchInstr==3);
7978 match(PrefetchAllocation mem);
7979 ins_cost(125);
7980
7981 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7982 ins_encode %{
7983 __ prefetchw($mem$$Address);
7984 %}
7985 ins_pipe(ialu_mem);
7986 %}
7987
7988 instruct prefetchAllocNTA( memory mem ) %{
7989 predicate(AllocatePrefetchInstr==0);
7990 match(PrefetchAllocation mem);
7991 ins_cost(125);
7992
7993 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7994 ins_encode %{
7995 __ prefetchnta($mem$$Address);
7996 %}
7997 ins_pipe(ialu_mem);
7998 %}
7999
8000 instruct prefetchAllocT0( memory mem ) %{
8001 predicate(AllocatePrefetchInstr==1);
8002 match(PrefetchAllocation mem);
8003 ins_cost(125);
8004
8005 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8006 ins_encode %{
8007 __ prefetcht0($mem$$Address);
8008 %}
8009 ins_pipe(ialu_mem);
8010 %}
8011
8012 instruct prefetchAllocT2( memory mem ) %{
8013 predicate(AllocatePrefetchInstr==2);
8014 match(PrefetchAllocation mem);
8015 ins_cost(125);
8016
8017 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8018 ins_encode %{
8019 __ prefetcht2($mem$$Address);
8020 %}
8021 ins_pipe(ialu_mem);
8022 %}
8023
8024 //----------Store Instructions-------------------------------------------------
8025
8026 // Store Byte
8027 instruct storeB(memory mem, rRegI src)
8028 %{
8029 match(Set mem (StoreB mem src));
8030
8031 ins_cost(125); // XXX
8032 format %{ "movb $mem, $src\t# byte" %}
8033 ins_encode %{
8034 __ movb($mem$$Address, $src$$Register);
8035 %}
8036 ins_pipe(ialu_mem_reg);
8037 %}
8038
8039 // Store Char/Short
8040 instruct storeC(memory mem, rRegI src)
8041 %{
8042 match(Set mem (StoreC mem src));
8043
8044 ins_cost(125); // XXX
8045 format %{ "movw $mem, $src\t# char/short" %}
8046 ins_encode %{
8047 __ movw($mem$$Address, $src$$Register);
8048 %}
8049 ins_pipe(ialu_mem_reg);
8050 %}
8051
8052 // Store Integer
8053 instruct storeI(memory mem, rRegI src)
8054 %{
8055 match(Set mem (StoreI mem src));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, $src\t# int" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, $src$$Register);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 // Store Long
8066 instruct storeL(memory mem, rRegL src)
8067 %{
8068 match(Set mem (StoreL mem src));
8069
8070 ins_cost(125); // XXX
8071 format %{ "movq $mem, $src\t# long" %}
8072 ins_encode %{
8073 __ movq($mem$$Address, $src$$Register);
8074 %}
8075 ins_pipe(ialu_mem_reg); // XXX
8076 %}
8077
8078 // Store Pointer
8079 instruct storeP(memory mem, any_RegP src)
8080 %{
8081 predicate(n->as_Store()->barrier_data() == 0);
8082 match(Set mem (StoreP mem src));
8083
8084 ins_cost(125); // XXX
8085 format %{ "movq $mem, $src\t# ptr" %}
8086 ins_encode %{
8087 __ movq($mem$$Address, $src$$Register);
8088 %}
8089 ins_pipe(ialu_mem_reg);
8090 %}
8091
8092 instruct storeImmP0(memory mem, immP0 zero)
8093 %{
8094 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8095 match(Set mem (StoreP mem zero));
8096
8097 ins_cost(125); // XXX
8098 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8099 ins_encode %{
8100 __ movq($mem$$Address, r12);
8101 %}
8102 ins_pipe(ialu_mem_reg);
8103 %}
8104
8105 // Store Null Pointer, mark word, or other simple pointer constant.
8106 instruct storeImmP(memory mem, immP31 src)
8107 %{
8108 predicate(n->as_Store()->barrier_data() == 0);
8109 match(Set mem (StoreP mem src));
8110
8111 ins_cost(150); // XXX
8112 format %{ "movq $mem, $src\t# ptr" %}
8113 ins_encode %{
8114 __ movq($mem$$Address, $src$$constant);
8115 %}
8116 ins_pipe(ialu_mem_imm);
8117 %}
8118
8119 // Store Compressed Pointer
8120 instruct storeN(memory mem, rRegN src)
8121 %{
8122 predicate(n->as_Store()->barrier_data() == 0);
8123 match(Set mem (StoreN mem src));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, $src\t# compressed ptr" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, $src$$Register);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeNKlass(memory mem, rRegN src)
8134 %{
8135 match(Set mem (StoreNKlass mem src));
8136
8137 ins_cost(125); // XXX
8138 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8139 ins_encode %{
8140 __ movl($mem$$Address, $src$$Register);
8141 %}
8142 ins_pipe(ialu_mem_reg);
8143 %}
8144
8145 instruct storeImmN0(memory mem, immN0 zero)
8146 %{
8147 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8148 match(Set mem (StoreN mem zero));
8149
8150 ins_cost(125); // XXX
8151 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8152 ins_encode %{
8153 __ movl($mem$$Address, r12);
8154 %}
8155 ins_pipe(ialu_mem_reg);
8156 %}
8157
8158 instruct storeImmN(memory mem, immN src)
8159 %{
8160 predicate(n->as_Store()->barrier_data() == 0);
8161 match(Set mem (StoreN mem src));
8162
8163 ins_cost(150); // XXX
8164 format %{ "movl $mem, $src\t# compressed ptr" %}
8165 ins_encode %{
8166 address con = (address)$src$$constant;
8167 if (con == nullptr) {
8168 __ movl($mem$$Address, 0);
8169 } else {
8170 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8171 }
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 instruct storeImmNKlass(memory mem, immNKlass src)
8177 %{
8178 match(Set mem (StoreNKlass mem src));
8179
8180 ins_cost(150); // XXX
8181 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8182 ins_encode %{
8183 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8184 %}
8185 ins_pipe(ialu_mem_imm);
8186 %}
8187
8188 // Store Integer Immediate
8189 instruct storeImmI0(memory mem, immI_0 zero)
8190 %{
8191 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8192 match(Set mem (StoreI mem zero));
8193
8194 ins_cost(125); // XXX
8195 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, r12);
8198 %}
8199 ins_pipe(ialu_mem_reg);
8200 %}
8201
8202 instruct storeImmI(memory mem, immI src)
8203 %{
8204 match(Set mem (StoreI mem src));
8205
8206 ins_cost(150);
8207 format %{ "movl $mem, $src\t# int" %}
8208 ins_encode %{
8209 __ movl($mem$$Address, $src$$constant);
8210 %}
8211 ins_pipe(ialu_mem_imm);
8212 %}
8213
8214 // Store Long Immediate
8215 instruct storeImmL0(memory mem, immL0 zero)
8216 %{
8217 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8218 match(Set mem (StoreL mem zero));
8219
8220 ins_cost(125); // XXX
8221 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8222 ins_encode %{
8223 __ movq($mem$$Address, r12);
8224 %}
8225 ins_pipe(ialu_mem_reg);
8226 %}
8227
8228 instruct storeImmL(memory mem, immL32 src)
8229 %{
8230 match(Set mem (StoreL mem src));
8231
8232 ins_cost(150);
8233 format %{ "movq $mem, $src\t# long" %}
8234 ins_encode %{
8235 __ movq($mem$$Address, $src$$constant);
8236 %}
8237 ins_pipe(ialu_mem_imm);
8238 %}
8239
8240 // Store Short/Char Immediate
8241 instruct storeImmC0(memory mem, immI_0 zero)
8242 %{
8243 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8244 match(Set mem (StoreC mem zero));
8245
8246 ins_cost(125); // XXX
8247 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8248 ins_encode %{
8249 __ movw($mem$$Address, r12);
8250 %}
8251 ins_pipe(ialu_mem_reg);
8252 %}
8253
8254 instruct storeImmI16(memory mem, immI16 src)
8255 %{
8256 predicate(UseStoreImmI16);
8257 match(Set mem (StoreC mem src));
8258
8259 ins_cost(150);
8260 format %{ "movw $mem, $src\t# short/char" %}
8261 ins_encode %{
8262 __ movw($mem$$Address, $src$$constant);
8263 %}
8264 ins_pipe(ialu_mem_imm);
8265 %}
8266
8267 // Store Byte Immediate
8268 instruct storeImmB0(memory mem, immI_0 zero)
8269 %{
8270 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8271 match(Set mem (StoreB mem zero));
8272
8273 ins_cost(125); // XXX
8274 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8275 ins_encode %{
8276 __ movb($mem$$Address, r12);
8277 %}
8278 ins_pipe(ialu_mem_reg);
8279 %}
8280
8281 instruct storeImmB(memory mem, immI8 src)
8282 %{
8283 match(Set mem (StoreB mem src));
8284
8285 ins_cost(150); // XXX
8286 format %{ "movb $mem, $src\t# byte" %}
8287 ins_encode %{
8288 __ movb($mem$$Address, $src$$constant);
8289 %}
8290 ins_pipe(ialu_mem_imm);
8291 %}
8292
8293 // Store Float
8294 instruct storeF(memory mem, regF src)
8295 %{
8296 match(Set mem (StoreF mem src));
8297
8298 ins_cost(95); // XXX
8299 format %{ "movss $mem, $src\t# float" %}
8300 ins_encode %{
8301 __ movflt($mem$$Address, $src$$XMMRegister);
8302 %}
8303 ins_pipe(pipe_slow); // XXX
8304 %}
8305
8306 // Store immediate Float value (it is faster than store from XMM register)
8307 instruct storeF0(memory mem, immF0 zero)
8308 %{
8309 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8310 match(Set mem (StoreF mem zero));
8311
8312 ins_cost(25); // XXX
8313 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8314 ins_encode %{
8315 __ movl($mem$$Address, r12);
8316 %}
8317 ins_pipe(ialu_mem_reg);
8318 %}
8319
8320 instruct storeF_imm(memory mem, immF src)
8321 %{
8322 match(Set mem (StoreF mem src));
8323
8324 ins_cost(50);
8325 format %{ "movl $mem, $src\t# float" %}
8326 ins_encode %{
8327 __ movl($mem$$Address, jint_cast($src$$constant));
8328 %}
8329 ins_pipe(ialu_mem_imm);
8330 %}
8331
8332 // Store Double
8333 instruct storeD(memory mem, regD src)
8334 %{
8335 match(Set mem (StoreD mem src));
8336
8337 ins_cost(95); // XXX
8338 format %{ "movsd $mem, $src\t# double" %}
8339 ins_encode %{
8340 __ movdbl($mem$$Address, $src$$XMMRegister);
8341 %}
8342 ins_pipe(pipe_slow); // XXX
8343 %}
8344
8345 // Store immediate double 0.0 (it is faster than store from XMM register)
8346 instruct storeD0_imm(memory mem, immD0 src)
8347 %{
8348 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8349 match(Set mem (StoreD mem src));
8350
8351 ins_cost(50);
8352 format %{ "movq $mem, $src\t# double 0." %}
8353 ins_encode %{
8354 __ movq($mem$$Address, $src$$constant);
8355 %}
8356 ins_pipe(ialu_mem_imm);
8357 %}
8358
8359 instruct storeD0(memory mem, immD0 zero)
8360 %{
8361 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8362 match(Set mem (StoreD mem zero));
8363
8364 ins_cost(25); // XXX
8365 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8366 ins_encode %{
8367 __ movq($mem$$Address, r12);
8368 %}
8369 ins_pipe(ialu_mem_reg);
8370 %}
8371
8372 instruct storeSSI(stackSlotI dst, rRegI src)
8373 %{
8374 match(Set dst src);
8375
8376 ins_cost(100);
8377 format %{ "movl $dst, $src\t# int stk" %}
8378 ins_encode %{
8379 __ movl($dst$$Address, $src$$Register);
8380 %}
8381 ins_pipe( ialu_mem_reg );
8382 %}
8383
8384 instruct storeSSL(stackSlotL dst, rRegL src)
8385 %{
8386 match(Set dst src);
8387
8388 ins_cost(100);
8389 format %{ "movq $dst, $src\t# long stk" %}
8390 ins_encode %{
8391 __ movq($dst$$Address, $src$$Register);
8392 %}
8393 ins_pipe(ialu_mem_reg);
8394 %}
8395
8396 instruct storeSSP(stackSlotP dst, rRegP src)
8397 %{
8398 match(Set dst src);
8399
8400 ins_cost(100);
8401 format %{ "movq $dst, $src\t# ptr stk" %}
8402 ins_encode %{
8403 __ movq($dst$$Address, $src$$Register);
8404 %}
8405 ins_pipe(ialu_mem_reg);
8406 %}
8407
8408 instruct storeSSF(stackSlotF dst, regF src)
8409 %{
8410 match(Set dst src);
8411
8412 ins_cost(95); // XXX
8413 format %{ "movss $dst, $src\t# float stk" %}
8414 ins_encode %{
8415 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8416 %}
8417 ins_pipe(pipe_slow); // XXX
8418 %}
8419
8420 instruct storeSSD(stackSlotD dst, regD src)
8421 %{
8422 match(Set dst src);
8423
8424 ins_cost(95); // XXX
8425 format %{ "movsd $dst, $src\t# double stk" %}
8426 ins_encode %{
8427 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8428 %}
8429 ins_pipe(pipe_slow); // XXX
8430 %}
8431
8432 instruct cacheWB(indirect addr)
8433 %{
8434 predicate(VM_Version::supports_data_cache_line_flush());
8435 match(CacheWB addr);
8436
8437 ins_cost(100);
8438 format %{"cache wb $addr" %}
8439 ins_encode %{
8440 assert($addr->index_position() < 0, "should be");
8441 assert($addr$$disp == 0, "should be");
8442 __ cache_wb(Address($addr$$base$$Register, 0));
8443 %}
8444 ins_pipe(pipe_slow); // XXX
8445 %}
8446
8447 instruct cacheWBPreSync()
8448 %{
8449 predicate(VM_Version::supports_data_cache_line_flush());
8450 match(CacheWBPreSync);
8451
8452 ins_cost(100);
8453 format %{"cache wb presync" %}
8454 ins_encode %{
8455 __ cache_wbsync(true);
8456 %}
8457 ins_pipe(pipe_slow); // XXX
8458 %}
8459
8460 instruct cacheWBPostSync()
8461 %{
8462 predicate(VM_Version::supports_data_cache_line_flush());
8463 match(CacheWBPostSync);
8464
8465 ins_cost(100);
8466 format %{"cache wb postsync" %}
8467 ins_encode %{
8468 __ cache_wbsync(false);
8469 %}
8470 ins_pipe(pipe_slow); // XXX
8471 %}
8472
8473 //----------BSWAP Instructions-------------------------------------------------
8474 instruct bytes_reverse_int(rRegI dst) %{
8475 match(Set dst (ReverseBytesI dst));
8476
8477 format %{ "bswapl $dst" %}
8478 ins_encode %{
8479 __ bswapl($dst$$Register);
8480 %}
8481 ins_pipe( ialu_reg );
8482 %}
8483
8484 instruct bytes_reverse_long(rRegL dst) %{
8485 match(Set dst (ReverseBytesL dst));
8486
8487 format %{ "bswapq $dst" %}
8488 ins_encode %{
8489 __ bswapq($dst$$Register);
8490 %}
8491 ins_pipe( ialu_reg);
8492 %}
8493
8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8495 match(Set dst (ReverseBytesUS dst));
8496 effect(KILL cr);
8497
8498 format %{ "bswapl $dst\n\t"
8499 "shrl $dst,16\n\t" %}
8500 ins_encode %{
8501 __ bswapl($dst$$Register);
8502 __ shrl($dst$$Register, 16);
8503 %}
8504 ins_pipe( ialu_reg );
8505 %}
8506
8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8508 match(Set dst (ReverseBytesS dst));
8509 effect(KILL cr);
8510
8511 format %{ "bswapl $dst\n\t"
8512 "sar $dst,16\n\t" %}
8513 ins_encode %{
8514 __ bswapl($dst$$Register);
8515 __ sarl($dst$$Register, 16);
8516 %}
8517 ins_pipe( ialu_reg );
8518 %}
8519
8520 //---------- Zeros Count Instructions ------------------------------------------
8521
8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8523 predicate(UseCountLeadingZerosInstruction);
8524 match(Set dst (CountLeadingZerosI src));
8525 effect(KILL cr);
8526
8527 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8528 ins_encode %{
8529 __ lzcntl($dst$$Register, $src$$Register);
8530 %}
8531 ins_pipe(ialu_reg);
8532 %}
8533
8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8535 predicate(UseCountLeadingZerosInstruction);
8536 match(Set dst (CountLeadingZerosI (LoadI src)));
8537 effect(KILL cr);
8538 ins_cost(175);
8539 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8540 ins_encode %{
8541 __ lzcntl($dst$$Register, $src$$Address);
8542 %}
8543 ins_pipe(ialu_reg_mem);
8544 %}
8545
8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8547 predicate(!UseCountLeadingZerosInstruction);
8548 match(Set dst (CountLeadingZerosI src));
8549 effect(KILL cr);
8550
8551 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8552 "jnz skip\n\t"
8553 "movl $dst, -1\n"
8554 "skip:\n\t"
8555 "negl $dst\n\t"
8556 "addl $dst, 31" %}
8557 ins_encode %{
8558 Register Rdst = $dst$$Register;
8559 Register Rsrc = $src$$Register;
8560 Label skip;
8561 __ bsrl(Rdst, Rsrc);
8562 __ jccb(Assembler::notZero, skip);
8563 __ movl(Rdst, -1);
8564 __ bind(skip);
8565 __ negl(Rdst);
8566 __ addl(Rdst, BitsPerInt - 1);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosL src));
8574 effect(KILL cr);
8575
8576 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8577 ins_encode %{
8578 __ lzcntq($dst$$Register, $src$$Register);
8579 %}
8580 ins_pipe(ialu_reg);
8581 %}
8582
8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8584 predicate(UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosL (LoadL src)));
8586 effect(KILL cr);
8587 ins_cost(175);
8588 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8589 ins_encode %{
8590 __ lzcntq($dst$$Register, $src$$Address);
8591 %}
8592 ins_pipe(ialu_reg_mem);
8593 %}
8594
8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8596 predicate(!UseCountLeadingZerosInstruction);
8597 match(Set dst (CountLeadingZerosL src));
8598 effect(KILL cr);
8599
8600 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8601 "jnz skip\n\t"
8602 "movl $dst, -1\n"
8603 "skip:\n\t"
8604 "negl $dst\n\t"
8605 "addl $dst, 63" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Register Rsrc = $src$$Register;
8609 Label skip;
8610 __ bsrq(Rdst, Rsrc);
8611 __ jccb(Assembler::notZero, skip);
8612 __ movl(Rdst, -1);
8613 __ bind(skip);
8614 __ negl(Rdst);
8615 __ addl(Rdst, BitsPerLong - 1);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8621 predicate(UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosI src));
8623 effect(KILL cr);
8624
8625 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8626 ins_encode %{
8627 __ tzcntl($dst$$Register, $src$$Register);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8633 predicate(UseCountTrailingZerosInstruction);
8634 match(Set dst (CountTrailingZerosI (LoadI src)));
8635 effect(KILL cr);
8636 ins_cost(175);
8637 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8638 ins_encode %{
8639 __ tzcntl($dst$$Register, $src$$Address);
8640 %}
8641 ins_pipe(ialu_reg_mem);
8642 %}
8643
8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8645 predicate(!UseCountTrailingZerosInstruction);
8646 match(Set dst (CountTrailingZerosI src));
8647 effect(KILL cr);
8648
8649 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8650 "jnz done\n\t"
8651 "movl $dst, 32\n"
8652 "done:" %}
8653 ins_encode %{
8654 Register Rdst = $dst$$Register;
8655 Label done;
8656 __ bsfl(Rdst, $src$$Register);
8657 __ jccb(Assembler::notZero, done);
8658 __ movl(Rdst, BitsPerInt);
8659 __ bind(done);
8660 %}
8661 ins_pipe(ialu_reg);
8662 %}
8663
8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8665 predicate(UseCountTrailingZerosInstruction);
8666 match(Set dst (CountTrailingZerosL src));
8667 effect(KILL cr);
8668
8669 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8670 ins_encode %{
8671 __ tzcntq($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8677 predicate(UseCountTrailingZerosInstruction);
8678 match(Set dst (CountTrailingZerosL (LoadL src)));
8679 effect(KILL cr);
8680 ins_cost(175);
8681 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8682 ins_encode %{
8683 __ tzcntq($dst$$Register, $src$$Address);
8684 %}
8685 ins_pipe(ialu_reg_mem);
8686 %}
8687
8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8689 predicate(!UseCountTrailingZerosInstruction);
8690 match(Set dst (CountTrailingZerosL src));
8691 effect(KILL cr);
8692
8693 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8694 "jnz done\n\t"
8695 "movl $dst, 64\n"
8696 "done:" %}
8697 ins_encode %{
8698 Register Rdst = $dst$$Register;
8699 Label done;
8700 __ bsfq(Rdst, $src$$Register);
8701 __ jccb(Assembler::notZero, done);
8702 __ movl(Rdst, BitsPerLong);
8703 __ bind(done);
8704 %}
8705 ins_pipe(ialu_reg);
8706 %}
8707
8708 //--------------- Reverse Operation Instructions ----------------
8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8710 predicate(!VM_Version::supports_gfni());
8711 match(Set dst (ReverseI src));
8712 effect(TEMP dst, TEMP rtmp, KILL cr);
8713 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8714 ins_encode %{
8715 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8716 %}
8717 ins_pipe( ialu_reg );
8718 %}
8719
8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8721 predicate(VM_Version::supports_gfni());
8722 match(Set dst (ReverseI src));
8723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8724 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8725 ins_encode %{
8726 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8727 %}
8728 ins_pipe( ialu_reg );
8729 %}
8730
8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8732 predicate(!VM_Version::supports_gfni());
8733 match(Set dst (ReverseL src));
8734 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8735 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8736 ins_encode %{
8737 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8738 %}
8739 ins_pipe( ialu_reg );
8740 %}
8741
8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8743 predicate(VM_Version::supports_gfni());
8744 match(Set dst (ReverseL src));
8745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8746 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8747 ins_encode %{
8748 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8749 %}
8750 ins_pipe( ialu_reg );
8751 %}
8752
8753 //---------- Population Count Instructions -------------------------------------
8754
8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8756 predicate(UsePopCountInstruction);
8757 match(Set dst (PopCountI src));
8758 effect(KILL cr);
8759
8760 format %{ "popcnt $dst, $src" %}
8761 ins_encode %{
8762 __ popcntl($dst$$Register, $src$$Register);
8763 %}
8764 ins_pipe(ialu_reg);
8765 %}
8766
8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8768 predicate(UsePopCountInstruction);
8769 match(Set dst (PopCountI (LoadI mem)));
8770 effect(KILL cr);
8771
8772 format %{ "popcnt $dst, $mem" %}
8773 ins_encode %{
8774 __ popcntl($dst$$Register, $mem$$Address);
8775 %}
8776 ins_pipe(ialu_reg);
8777 %}
8778
8779 // Note: Long.bitCount(long) returns an int.
8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8781 predicate(UsePopCountInstruction);
8782 match(Set dst (PopCountL src));
8783 effect(KILL cr);
8784
8785 format %{ "popcnt $dst, $src" %}
8786 ins_encode %{
8787 __ popcntq($dst$$Register, $src$$Register);
8788 %}
8789 ins_pipe(ialu_reg);
8790 %}
8791
8792 // Note: Long.bitCount(long) returns an int.
8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8794 predicate(UsePopCountInstruction);
8795 match(Set dst (PopCountL (LoadL mem)));
8796 effect(KILL cr);
8797
8798 format %{ "popcnt $dst, $mem" %}
8799 ins_encode %{
8800 __ popcntq($dst$$Register, $mem$$Address);
8801 %}
8802 ins_pipe(ialu_reg);
8803 %}
8804
8805
8806 //----------MemBar Instructions-----------------------------------------------
8807 // Memory barrier flavors
8808
8809 instruct membar_acquire()
8810 %{
8811 match(MemBarAcquire);
8812 match(LoadFence);
8813 ins_cost(0);
8814
8815 size(0);
8816 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8817 ins_encode();
8818 ins_pipe(empty);
8819 %}
8820
8821 instruct membar_acquire_lock()
8822 %{
8823 match(MemBarAcquireLock);
8824 ins_cost(0);
8825
8826 size(0);
8827 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8828 ins_encode();
8829 ins_pipe(empty);
8830 %}
8831
8832 instruct membar_release()
8833 %{
8834 match(MemBarRelease);
8835 match(StoreFence);
8836 ins_cost(0);
8837
8838 size(0);
8839 format %{ "MEMBAR-release ! (empty encoding)" %}
8840 ins_encode();
8841 ins_pipe(empty);
8842 %}
8843
8844 instruct membar_release_lock()
8845 %{
8846 match(MemBarReleaseLock);
8847 ins_cost(0);
8848
8849 size(0);
8850 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8851 ins_encode();
8852 ins_pipe(empty);
8853 %}
8854
8855 instruct membar_volatile(rFlagsReg cr) %{
8856 match(MemBarVolatile);
8857 effect(KILL cr);
8858 ins_cost(400);
8859
8860 format %{
8861 $$template
8862 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8863 %}
8864 ins_encode %{
8865 __ membar(Assembler::StoreLoad);
8866 %}
8867 ins_pipe(pipe_slow);
8868 %}
8869
8870 instruct unnecessary_membar_volatile()
8871 %{
8872 match(MemBarVolatile);
8873 predicate(Matcher::post_store_load_barrier(n));
8874 ins_cost(0);
8875
8876 size(0);
8877 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8878 ins_encode();
8879 ins_pipe(empty);
8880 %}
8881
8882 instruct membar_storestore() %{
8883 match(MemBarStoreStore);
8884 match(StoreStoreFence);
8885 ins_cost(0);
8886
8887 size(0);
8888 format %{ "MEMBAR-storestore (empty encoding)" %}
8889 ins_encode( );
8890 ins_pipe(empty);
8891 %}
8892
8893 //----------Move Instructions--------------------------------------------------
8894
8895 instruct castX2P(rRegP dst, rRegL src)
8896 %{
8897 match(Set dst (CastX2P src));
8898
8899 format %{ "movq $dst, $src\t# long->ptr" %}
8900 ins_encode %{
8901 if ($dst$$reg != $src$$reg) {
8902 __ movptr($dst$$Register, $src$$Register);
8903 }
8904 %}
8905 ins_pipe(ialu_reg_reg); // XXX
8906 %}
8907
8908 instruct castP2X(rRegL dst, rRegP src)
8909 %{
8910 match(Set dst (CastP2X src));
8911
8912 format %{ "movq $dst, $src\t# ptr -> long" %}
8913 ins_encode %{
8914 if ($dst$$reg != $src$$reg) {
8915 __ movptr($dst$$Register, $src$$Register);
8916 }
8917 %}
8918 ins_pipe(ialu_reg_reg); // XXX
8919 %}
8920
8921 // Convert oop into int for vectors alignment masking
8922 instruct convP2I(rRegI dst, rRegP src)
8923 %{
8924 match(Set dst (ConvL2I (CastP2X src)));
8925
8926 format %{ "movl $dst, $src\t# ptr -> int" %}
8927 ins_encode %{
8928 __ movl($dst$$Register, $src$$Register);
8929 %}
8930 ins_pipe(ialu_reg_reg); // XXX
8931 %}
8932
8933 // Convert compressed oop into int for vectors alignment masking
8934 // in case of 32bit oops (heap < 4Gb).
8935 instruct convN2I(rRegI dst, rRegN src)
8936 %{
8937 predicate(CompressedOops::shift() == 0);
8938 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8939
8940 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8941 ins_encode %{
8942 __ movl($dst$$Register, $src$$Register);
8943 %}
8944 ins_pipe(ialu_reg_reg); // XXX
8945 %}
8946
8947 // Convert oop pointer into compressed form
8948 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8949 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8950 match(Set dst (EncodeP src));
8951 effect(KILL cr);
8952 format %{ "encode_heap_oop $dst,$src" %}
8953 ins_encode %{
8954 Register s = $src$$Register;
8955 Register d = $dst$$Register;
8956 if (s != d) {
8957 __ movq(d, s);
8958 }
8959 __ encode_heap_oop(d);
8960 %}
8961 ins_pipe(ialu_reg_long);
8962 %}
8963
8964 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8965 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8966 match(Set dst (EncodeP src));
8967 effect(KILL cr);
8968 format %{ "encode_heap_oop_not_null $dst,$src" %}
8969 ins_encode %{
8970 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8971 %}
8972 ins_pipe(ialu_reg_long);
8973 %}
8974
8975 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8976 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8977 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8978 match(Set dst (DecodeN src));
8979 effect(KILL cr);
8980 format %{ "decode_heap_oop $dst,$src" %}
8981 ins_encode %{
8982 Register s = $src$$Register;
8983 Register d = $dst$$Register;
8984 if (s != d) {
8985 __ movq(d, s);
8986 }
8987 __ decode_heap_oop(d);
8988 %}
8989 ins_pipe(ialu_reg_long);
8990 %}
8991
8992 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8993 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8994 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8995 match(Set dst (DecodeN src));
8996 effect(KILL cr);
8997 format %{ "decode_heap_oop_not_null $dst,$src" %}
8998 ins_encode %{
8999 Register s = $src$$Register;
9000 Register d = $dst$$Register;
9001 if (s != d) {
9002 __ decode_heap_oop_not_null(d, s);
9003 } else {
9004 __ decode_heap_oop_not_null(d);
9005 }
9006 %}
9007 ins_pipe(ialu_reg_long);
9008 %}
9009
9010 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9011 match(Set dst (EncodePKlass src));
9012 effect(TEMP dst, KILL cr);
9013 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9014 ins_encode %{
9015 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9016 %}
9017 ins_pipe(ialu_reg_long);
9018 %}
9019
9020 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9021 match(Set dst (DecodeNKlass src));
9022 effect(TEMP dst, KILL cr);
9023 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9024 ins_encode %{
9025 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9026 %}
9027 ins_pipe(ialu_reg_long);
9028 %}
9029
9030 //----------Conditional Move---------------------------------------------------
9031 // Jump
9032 // dummy instruction for generating temp registers
9033 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9034 match(Jump (LShiftL switch_val shift));
9035 ins_cost(350);
9036 predicate(false);
9037 effect(TEMP dest);
9038
9039 format %{ "leaq $dest, [$constantaddress]\n\t"
9040 "jmp [$dest + $switch_val << $shift]\n\t" %}
9041 ins_encode %{
9042 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9043 // to do that and the compiler is using that register as one it can allocate.
9044 // So we build it all by hand.
9045 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9046 // ArrayAddress dispatch(table, index);
9047 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9048 __ lea($dest$$Register, $constantaddress);
9049 __ jmp(dispatch);
9050 %}
9051 ins_pipe(pipe_jmp);
9052 %}
9053
9054 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9055 match(Jump (AddL (LShiftL switch_val shift) offset));
9056 ins_cost(350);
9057 effect(TEMP dest);
9058
9059 format %{ "leaq $dest, [$constantaddress]\n\t"
9060 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9061 ins_encode %{
9062 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9063 // to do that and the compiler is using that register as one it can allocate.
9064 // So we build it all by hand.
9065 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9066 // ArrayAddress dispatch(table, index);
9067 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9068 __ lea($dest$$Register, $constantaddress);
9069 __ jmp(dispatch);
9070 %}
9071 ins_pipe(pipe_jmp);
9072 %}
9073
9074 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9075 match(Jump switch_val);
9076 ins_cost(350);
9077 effect(TEMP dest);
9078
9079 format %{ "leaq $dest, [$constantaddress]\n\t"
9080 "jmp [$dest + $switch_val]\n\t" %}
9081 ins_encode %{
9082 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9083 // to do that and the compiler is using that register as one it can allocate.
9084 // So we build it all by hand.
9085 // Address index(noreg, switch_reg, Address::times_1);
9086 // ArrayAddress dispatch(table, index);
9087 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9088 __ lea($dest$$Register, $constantaddress);
9089 __ jmp(dispatch);
9090 %}
9091 ins_pipe(pipe_jmp);
9092 %}
9093
9094 // Conditional move
9095 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9096 %{
9097 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9098 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9099
9100 ins_cost(100); // XXX
9101 format %{ "setbn$cop $dst\t# signed, int" %}
9102 ins_encode %{
9103 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9104 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9105 %}
9106 ins_pipe(ialu_reg);
9107 %}
9108
9109 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9110 %{
9111 predicate(!UseAPX);
9112 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9113
9114 ins_cost(200); // XXX
9115 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9116 ins_encode %{
9117 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9118 %}
9119 ins_pipe(pipe_cmov_reg);
9120 %}
9121
9122 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9123 %{
9124 predicate(UseAPX);
9125 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9126
9127 ins_cost(200);
9128 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9129 ins_encode %{
9130 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9131 %}
9132 ins_pipe(pipe_cmov_reg);
9133 %}
9134
9135 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9136 %{
9137 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9138 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9139
9140 ins_cost(100); // XXX
9141 format %{ "setbn$cop $dst\t# unsigned, int" %}
9142 ins_encode %{
9143 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9144 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9145 %}
9146 ins_pipe(ialu_reg);
9147 %}
9148
9149 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9150 predicate(!UseAPX);
9151 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9152
9153 ins_cost(200); // XXX
9154 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9155 ins_encode %{
9156 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9157 %}
9158 ins_pipe(pipe_cmov_reg);
9159 %}
9160
9161 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9162 predicate(UseAPX);
9163 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9164
9165 ins_cost(200);
9166 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9167 ins_encode %{
9168 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9169 %}
9170 ins_pipe(pipe_cmov_reg);
9171 %}
9172
9173 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9174 %{
9175 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9176 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9177
9178 ins_cost(100); // XXX
9179 format %{ "setbn$cop $dst\t# unsigned, int" %}
9180 ins_encode %{
9181 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9182 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9183 %}
9184 ins_pipe(ialu_reg);
9185 %}
9186
9187 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9188 %{
9189 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9190 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9191
9192 ins_cost(100); // XXX
9193 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9194 ins_encode %{
9195 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9196 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9197 %}
9198 ins_pipe(ialu_reg);
9199 %}
9200
9201 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9202 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9203
9204 ins_cost(200);
9205 expand %{
9206 cmovI_regU(cop, cr, dst, src);
9207 %}
9208 %}
9209
9210 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9211 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9212
9213 ins_cost(200);
9214 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9215 ins_encode %{
9216 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9217 %}
9218 ins_pipe(pipe_cmov_reg);
9219 %}
9220
9221 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9222 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9223 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9224
9225 ins_cost(200); // XXX
9226 format %{ "cmovpl $dst, $src\n\t"
9227 "cmovnel $dst, $src" %}
9228 ins_encode %{
9229 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9230 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9231 %}
9232 ins_pipe(pipe_cmov_reg);
9233 %}
9234
9235 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9236 // inputs of the CMove
9237 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9238 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9239 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9240 effect(TEMP dst);
9241
9242 ins_cost(200); // XXX
9243 format %{ "cmovpl $dst, $src\n\t"
9244 "cmovnel $dst, $src" %}
9245 ins_encode %{
9246 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9247 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9248 %}
9249 ins_pipe(pipe_cmov_reg);
9250 %}
9251
9252 // Conditional move
9253 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9254 predicate(!UseAPX);
9255 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9256
9257 ins_cost(250); // XXX
9258 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9259 ins_encode %{
9260 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9261 %}
9262 ins_pipe(pipe_cmov_mem);
9263 %}
9264
9265 // Conditional move
9266 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9267 %{
9268 predicate(UseAPX);
9269 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9270
9271 ins_cost(250);
9272 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9273 ins_encode %{
9274 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9275 %}
9276 ins_pipe(pipe_cmov_mem);
9277 %}
9278
9279 // Conditional move
9280 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9281 %{
9282 predicate(!UseAPX);
9283 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9284
9285 ins_cost(250); // XXX
9286 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9287 ins_encode %{
9288 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9289 %}
9290 ins_pipe(pipe_cmov_mem);
9291 %}
9292
9293 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9294 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9295
9296 ins_cost(250);
9297 expand %{
9298 cmovI_memU(cop, cr, dst, src);
9299 %}
9300 %}
9301
9302 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9303 %{
9304 predicate(UseAPX);
9305 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9306
9307 ins_cost(250);
9308 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9309 ins_encode %{
9310 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9311 %}
9312 ins_pipe(pipe_cmov_mem);
9313 %}
9314
9315 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9316 %{
9317 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9318
9319 ins_cost(250);
9320 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9321 ins_encode %{
9322 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9323 %}
9324 ins_pipe(pipe_cmov_mem);
9325 %}
9326
9327 // Conditional move
9328 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9329 %{
9330 predicate(!UseAPX);
9331 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9332
9333 ins_cost(200); // XXX
9334 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9335 ins_encode %{
9336 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9337 %}
9338 ins_pipe(pipe_cmov_reg);
9339 %}
9340
9341 // Conditional move ndd
9342 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9343 %{
9344 predicate(UseAPX);
9345 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9346
9347 ins_cost(200);
9348 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9349 ins_encode %{
9350 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9351 %}
9352 ins_pipe(pipe_cmov_reg);
9353 %}
9354
9355 // Conditional move
9356 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9357 %{
9358 predicate(!UseAPX);
9359 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9360
9361 ins_cost(200); // XXX
9362 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9363 ins_encode %{
9364 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9365 %}
9366 ins_pipe(pipe_cmov_reg);
9367 %}
9368
9369 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9370 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9371
9372 ins_cost(200);
9373 expand %{
9374 cmovN_regU(cop, cr, dst, src);
9375 %}
9376 %}
9377
9378 // Conditional move ndd
9379 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9380 %{
9381 predicate(UseAPX);
9382 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9383
9384 ins_cost(200);
9385 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9386 ins_encode %{
9387 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9388 %}
9389 ins_pipe(pipe_cmov_reg);
9390 %}
9391
9392 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9393 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9394
9395 ins_cost(200);
9396 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9397 ins_encode %{
9398 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9399 %}
9400 ins_pipe(pipe_cmov_reg);
9401 %}
9402
9403 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9404 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9405 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9406
9407 ins_cost(200); // XXX
9408 format %{ "cmovpl $dst, $src\n\t"
9409 "cmovnel $dst, $src" %}
9410 ins_encode %{
9411 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9412 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9413 %}
9414 ins_pipe(pipe_cmov_reg);
9415 %}
9416
9417 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9418 // inputs of the CMove
9419 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9420 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9421 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9422
9423 ins_cost(200); // XXX
9424 format %{ "cmovpl $dst, $src\n\t"
9425 "cmovnel $dst, $src" %}
9426 ins_encode %{
9427 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9428 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9429 %}
9430 ins_pipe(pipe_cmov_reg);
9431 %}
9432
9433 // Conditional move
9434 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9435 %{
9436 predicate(!UseAPX);
9437 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9438
9439 ins_cost(200); // XXX
9440 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9441 ins_encode %{
9442 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9443 %}
9444 ins_pipe(pipe_cmov_reg); // XXX
9445 %}
9446
9447 // Conditional move ndd
9448 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9449 %{
9450 predicate(UseAPX);
9451 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9452
9453 ins_cost(200);
9454 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9455 ins_encode %{
9456 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9457 %}
9458 ins_pipe(pipe_cmov_reg);
9459 %}
9460
9461 // Conditional move
9462 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9463 %{
9464 predicate(!UseAPX);
9465 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9466
9467 ins_cost(200); // XXX
9468 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9469 ins_encode %{
9470 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9471 %}
9472 ins_pipe(pipe_cmov_reg); // XXX
9473 %}
9474
9475 // Conditional move ndd
9476 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9477 %{
9478 predicate(UseAPX);
9479 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9480
9481 ins_cost(200);
9482 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9483 ins_encode %{
9484 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9485 %}
9486 ins_pipe(pipe_cmov_reg);
9487 %}
9488
9489 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9490 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9491
9492 ins_cost(200);
9493 expand %{
9494 cmovP_regU(cop, cr, dst, src);
9495 %}
9496 %}
9497
9498 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9499 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9500
9501 ins_cost(200);
9502 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9503 ins_encode %{
9504 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9505 %}
9506 ins_pipe(pipe_cmov_reg);
9507 %}
9508
9509 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9510 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9511 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9512
9513 ins_cost(200); // XXX
9514 format %{ "cmovpq $dst, $src\n\t"
9515 "cmovneq $dst, $src" %}
9516 ins_encode %{
9517 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9518 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9519 %}
9520 ins_pipe(pipe_cmov_reg);
9521 %}
9522
9523 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9524 // inputs of the CMove
9525 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9526 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9527 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9528
9529 ins_cost(200); // XXX
9530 format %{ "cmovpq $dst, $src\n\t"
9531 "cmovneq $dst, $src" %}
9532 ins_encode %{
9533 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9534 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9535 %}
9536 ins_pipe(pipe_cmov_reg);
9537 %}
9538
9539 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9540 %{
9541 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9542 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9543
9544 ins_cost(100); // XXX
9545 format %{ "setbn$cop $dst\t# signed, long" %}
9546 ins_encode %{
9547 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9548 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9549 %}
9550 ins_pipe(ialu_reg);
9551 %}
9552
9553 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9554 %{
9555 predicate(!UseAPX);
9556 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9557
9558 ins_cost(200); // XXX
9559 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9560 ins_encode %{
9561 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg); // XXX
9564 %}
9565
9566 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9567 %{
9568 predicate(UseAPX);
9569 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9570
9571 ins_cost(200);
9572 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9573 ins_encode %{
9574 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9575 %}
9576 ins_pipe(pipe_cmov_reg);
9577 %}
9578
9579 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9580 %{
9581 predicate(!UseAPX);
9582 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9583
9584 ins_cost(200); // XXX
9585 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9586 ins_encode %{
9587 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9588 %}
9589 ins_pipe(pipe_cmov_mem); // XXX
9590 %}
9591
9592 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9593 %{
9594 predicate(UseAPX);
9595 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9596
9597 ins_cost(200);
9598 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9599 ins_encode %{
9600 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9601 %}
9602 ins_pipe(pipe_cmov_mem);
9603 %}
9604
9605 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9606 %{
9607 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9608 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9609
9610 ins_cost(100); // XXX
9611 format %{ "setbn$cop $dst\t# unsigned, long" %}
9612 ins_encode %{
9613 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9614 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9615 %}
9616 ins_pipe(ialu_reg);
9617 %}
9618
9619 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9620 %{
9621 predicate(!UseAPX);
9622 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9623
9624 ins_cost(200); // XXX
9625 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9626 ins_encode %{
9627 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9628 %}
9629 ins_pipe(pipe_cmov_reg); // XXX
9630 %}
9631
9632 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9633 %{
9634 predicate(UseAPX);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9636
9637 ins_cost(200);
9638 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9639 ins_encode %{
9640 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9641 %}
9642 ins_pipe(pipe_cmov_reg);
9643 %}
9644
9645 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9646 %{
9647 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9648 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9649
9650 ins_cost(100); // XXX
9651 format %{ "setbn$cop $dst\t# unsigned, long" %}
9652 ins_encode %{
9653 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9654 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9655 %}
9656 ins_pipe(ialu_reg);
9657 %}
9658
9659 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9660 %{
9661 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9662 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9663
9664 ins_cost(100); // XXX
9665 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9666 ins_encode %{
9667 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9668 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9669 %}
9670 ins_pipe(ialu_reg);
9671 %}
9672
9673 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9674 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9675
9676 ins_cost(200);
9677 expand %{
9678 cmovL_regU(cop, cr, dst, src);
9679 %}
9680 %}
9681
9682 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9683 %{
9684 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9685
9686 ins_cost(200);
9687 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9688 ins_encode %{
9689 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9690 %}
9691 ins_pipe(pipe_cmov_reg);
9692 %}
9693
9694 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9695 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9696 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9697
9698 ins_cost(200); // XXX
9699 format %{ "cmovpq $dst, $src\n\t"
9700 "cmovneq $dst, $src" %}
9701 ins_encode %{
9702 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9703 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9704 %}
9705 ins_pipe(pipe_cmov_reg);
9706 %}
9707
9708 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9709 // inputs of the CMove
9710 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9711 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9712 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9713
9714 ins_cost(200); // XXX
9715 format %{ "cmovpq $dst, $src\n\t"
9716 "cmovneq $dst, $src" %}
9717 ins_encode %{
9718 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9719 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9720 %}
9721 ins_pipe(pipe_cmov_reg);
9722 %}
9723
9724 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9725 %{
9726 predicate(!UseAPX);
9727 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9728
9729 ins_cost(200); // XXX
9730 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9731 ins_encode %{
9732 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9733 %}
9734 ins_pipe(pipe_cmov_mem); // XXX
9735 %}
9736
9737 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9738 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9739
9740 ins_cost(200);
9741 expand %{
9742 cmovL_memU(cop, cr, dst, src);
9743 %}
9744 %}
9745
9746 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9747 %{
9748 predicate(UseAPX);
9749 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9750
9751 ins_cost(200);
9752 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9753 ins_encode %{
9754 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9755 %}
9756 ins_pipe(pipe_cmov_mem);
9757 %}
9758
9759 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9760 %{
9761 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9762
9763 ins_cost(200);
9764 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9765 ins_encode %{
9766 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9767 %}
9768 ins_pipe(pipe_cmov_mem);
9769 %}
9770
9771 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9772 %{
9773 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9774
9775 ins_cost(200); // XXX
9776 format %{ "jn$cop skip\t# signed cmove float\n\t"
9777 "movss $dst, $src\n"
9778 "skip:" %}
9779 ins_encode %{
9780 Label Lskip;
9781 // Invert sense of branch from sense of CMOV
9782 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9783 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9784 __ bind(Lskip);
9785 %}
9786 ins_pipe(pipe_slow);
9787 %}
9788
9789 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9790 %{
9791 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9792
9793 ins_cost(200); // XXX
9794 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9795 "movss $dst, $src\n"
9796 "skip:" %}
9797 ins_encode %{
9798 Label Lskip;
9799 // Invert sense of branch from sense of CMOV
9800 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9801 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9802 __ bind(Lskip);
9803 %}
9804 ins_pipe(pipe_slow);
9805 %}
9806
9807 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9808 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9809
9810 ins_cost(200);
9811 expand %{
9812 cmovF_regU(cop, cr, dst, src);
9813 %}
9814 %}
9815
9816 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9817 %{
9818 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9819
9820 ins_cost(200); // XXX
9821 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9822 "movss $dst, $src\n"
9823 "skip:" %}
9824 ins_encode %{
9825 Label Lskip;
9826 // Invert sense of branch from sense of CMOV
9827 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9828 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9829 __ bind(Lskip);
9830 %}
9831 ins_pipe(pipe_slow);
9832 %}
9833
9834 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9835 %{
9836 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9837
9838 ins_cost(200); // XXX
9839 format %{ "jn$cop skip\t# signed cmove double\n\t"
9840 "movsd $dst, $src\n"
9841 "skip:" %}
9842 ins_encode %{
9843 Label Lskip;
9844 // Invert sense of branch from sense of CMOV
9845 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9846 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9847 __ bind(Lskip);
9848 %}
9849 ins_pipe(pipe_slow);
9850 %}
9851
9852 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9853 %{
9854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9855
9856 ins_cost(200); // XXX
9857 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9858 "movsd $dst, $src\n"
9859 "skip:" %}
9860 ins_encode %{
9861 Label Lskip;
9862 // Invert sense of branch from sense of CMOV
9863 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9864 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9865 __ bind(Lskip);
9866 %}
9867 ins_pipe(pipe_slow);
9868 %}
9869
9870 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9871 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9872
9873 ins_cost(200);
9874 expand %{
9875 cmovD_regU(cop, cr, dst, src);
9876 %}
9877 %}
9878
9879 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9880 %{
9881 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9882
9883 ins_cost(200); // XXX
9884 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9885 "movsd $dst, $src\n"
9886 "skip:" %}
9887 ins_encode %{
9888 Label Lskip;
9889 // Invert sense of branch from sense of CMOV
9890 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9892 __ bind(Lskip);
9893 %}
9894 ins_pipe(pipe_slow);
9895 %}
9896
9897 //----------Arithmetic Instructions--------------------------------------------
9898 //----------Addition Instructions----------------------------------------------
9899
9900 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9901 %{
9902 predicate(!UseAPX);
9903 match(Set dst (AddI dst src));
9904 effect(KILL cr);
9905 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9906 format %{ "addl $dst, $src\t# int" %}
9907 ins_encode %{
9908 __ addl($dst$$Register, $src$$Register);
9909 %}
9910 ins_pipe(ialu_reg_reg);
9911 %}
9912
9913 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9914 %{
9915 predicate(UseAPX);
9916 match(Set dst (AddI src1 src2));
9917 effect(KILL cr);
9918 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9919
9920 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9921 ins_encode %{
9922 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9923 %}
9924 ins_pipe(ialu_reg_reg);
9925 %}
9926
9927 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9928 %{
9929 predicate(!UseAPX);
9930 match(Set dst (AddI dst src));
9931 effect(KILL cr);
9932 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9933
9934 format %{ "addl $dst, $src\t# int" %}
9935 ins_encode %{
9936 __ addl($dst$$Register, $src$$constant);
9937 %}
9938 ins_pipe( ialu_reg );
9939 %}
9940
9941 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9942 %{
9943 predicate(UseAPX);
9944 match(Set dst (AddI src1 src2));
9945 effect(KILL cr);
9946 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9947
9948 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9949 ins_encode %{
9950 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9951 %}
9952 ins_pipe( ialu_reg );
9953 %}
9954
9955 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9956 %{
9957 predicate(UseAPX);
9958 match(Set dst (AddI (LoadI src1) src2));
9959 effect(KILL cr);
9960 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9961
9962 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9963 ins_encode %{
9964 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9965 %}
9966 ins_pipe( ialu_reg );
9967 %}
9968
9969 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9970 %{
9971 predicate(!UseAPX);
9972 match(Set dst (AddI dst (LoadI src)));
9973 effect(KILL cr);
9974 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9975
9976 ins_cost(150); // XXX
9977 format %{ "addl $dst, $src\t# int" %}
9978 ins_encode %{
9979 __ addl($dst$$Register, $src$$Address);
9980 %}
9981 ins_pipe(ialu_reg_mem);
9982 %}
9983
9984 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9985 %{
9986 predicate(UseAPX);
9987 match(Set dst (AddI src1 (LoadI src2)));
9988 effect(KILL cr);
9989 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9990
9991 ins_cost(150);
9992 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9993 ins_encode %{
9994 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9995 %}
9996 ins_pipe(ialu_reg_mem);
9997 %}
9998
9999 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10000 %{
10001 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10002 effect(KILL cr);
10003 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10004
10005 ins_cost(150); // XXX
10006 format %{ "addl $dst, $src\t# int" %}
10007 ins_encode %{
10008 __ addl($dst$$Address, $src$$Register);
10009 %}
10010 ins_pipe(ialu_mem_reg);
10011 %}
10012
10013 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10014 %{
10015 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10016 effect(KILL cr);
10017 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10018
10019
10020 ins_cost(125); // XXX
10021 format %{ "addl $dst, $src\t# int" %}
10022 ins_encode %{
10023 __ addl($dst$$Address, $src$$constant);
10024 %}
10025 ins_pipe(ialu_mem_imm);
10026 %}
10027
10028 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10029 %{
10030 predicate(!UseAPX && UseIncDec);
10031 match(Set dst (AddI dst src));
10032 effect(KILL cr);
10033
10034 format %{ "incl $dst\t# int" %}
10035 ins_encode %{
10036 __ incrementl($dst$$Register);
10037 %}
10038 ins_pipe(ialu_reg);
10039 %}
10040
10041 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10042 %{
10043 predicate(UseAPX && UseIncDec);
10044 match(Set dst (AddI src val));
10045 effect(KILL cr);
10046 flag(PD::Flag_ndd_demotable_opr1);
10047
10048 format %{ "eincl $dst, $src\t# int ndd" %}
10049 ins_encode %{
10050 __ eincl($dst$$Register, $src$$Register, false);
10051 %}
10052 ins_pipe(ialu_reg);
10053 %}
10054
10055 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10056 %{
10057 predicate(UseAPX && UseIncDec);
10058 match(Set dst (AddI (LoadI src) val));
10059 effect(KILL cr);
10060
10061 format %{ "eincl $dst, $src\t# int ndd" %}
10062 ins_encode %{
10063 __ eincl($dst$$Register, $src$$Address, false);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10069 %{
10070 predicate(UseIncDec);
10071 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10072 effect(KILL cr);
10073
10074 ins_cost(125); // XXX
10075 format %{ "incl $dst\t# int" %}
10076 ins_encode %{
10077 __ incrementl($dst$$Address);
10078 %}
10079 ins_pipe(ialu_mem_imm);
10080 %}
10081
10082 // XXX why does that use AddI
10083 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10084 %{
10085 predicate(!UseAPX && UseIncDec);
10086 match(Set dst (AddI dst src));
10087 effect(KILL cr);
10088
10089 format %{ "decl $dst\t# int" %}
10090 ins_encode %{
10091 __ decrementl($dst$$Register);
10092 %}
10093 ins_pipe(ialu_reg);
10094 %}
10095
10096 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10097 %{
10098 predicate(UseAPX && UseIncDec);
10099 match(Set dst (AddI src val));
10100 effect(KILL cr);
10101 flag(PD::Flag_ndd_demotable_opr1);
10102
10103 format %{ "edecl $dst, $src\t# int ndd" %}
10104 ins_encode %{
10105 __ edecl($dst$$Register, $src$$Register, false);
10106 %}
10107 ins_pipe(ialu_reg);
10108 %}
10109
10110 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10111 %{
10112 predicate(UseAPX && UseIncDec);
10113 match(Set dst (AddI (LoadI src) val));
10114 effect(KILL cr);
10115
10116 format %{ "edecl $dst, $src\t# int ndd" %}
10117 ins_encode %{
10118 __ edecl($dst$$Register, $src$$Address, false);
10119 %}
10120 ins_pipe(ialu_reg);
10121 %}
10122
10123 // XXX why does that use AddI
10124 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10125 %{
10126 predicate(UseIncDec);
10127 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10128 effect(KILL cr);
10129
10130 ins_cost(125); // XXX
10131 format %{ "decl $dst\t# int" %}
10132 ins_encode %{
10133 __ decrementl($dst$$Address);
10134 %}
10135 ins_pipe(ialu_mem_imm);
10136 %}
10137
10138 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10139 %{
10140 predicate(VM_Version::supports_fast_2op_lea());
10141 match(Set dst (AddI (LShiftI index scale) disp));
10142
10143 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10144 ins_encode %{
10145 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10146 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10147 %}
10148 ins_pipe(ialu_reg_reg);
10149 %}
10150
10151 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10152 %{
10153 predicate(VM_Version::supports_fast_3op_lea());
10154 match(Set dst (AddI (AddI base index) disp));
10155
10156 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10157 ins_encode %{
10158 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10159 %}
10160 ins_pipe(ialu_reg_reg);
10161 %}
10162
10163 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10164 %{
10165 predicate(VM_Version::supports_fast_2op_lea());
10166 match(Set dst (AddI base (LShiftI index scale)));
10167
10168 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10169 ins_encode %{
10170 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10171 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10172 %}
10173 ins_pipe(ialu_reg_reg);
10174 %}
10175
10176 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10177 %{
10178 predicate(VM_Version::supports_fast_3op_lea());
10179 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10180
10181 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10182 ins_encode %{
10183 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10184 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10185 %}
10186 ins_pipe(ialu_reg_reg);
10187 %}
10188
10189 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10190 %{
10191 predicate(!UseAPX);
10192 match(Set dst (AddL dst src));
10193 effect(KILL cr);
10194 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10195
10196 format %{ "addq $dst, $src\t# long" %}
10197 ins_encode %{
10198 __ addq($dst$$Register, $src$$Register);
10199 %}
10200 ins_pipe(ialu_reg_reg);
10201 %}
10202
10203 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10204 %{
10205 predicate(UseAPX);
10206 match(Set dst (AddL src1 src2));
10207 effect(KILL cr);
10208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10209
10210 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10211 ins_encode %{
10212 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10213 %}
10214 ins_pipe(ialu_reg_reg);
10215 %}
10216
10217 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10218 %{
10219 predicate(!UseAPX);
10220 match(Set dst (AddL dst src));
10221 effect(KILL cr);
10222 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10223
10224 format %{ "addq $dst, $src\t# long" %}
10225 ins_encode %{
10226 __ addq($dst$$Register, $src$$constant);
10227 %}
10228 ins_pipe( ialu_reg );
10229 %}
10230
10231 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10232 %{
10233 predicate(UseAPX);
10234 match(Set dst (AddL src1 src2));
10235 effect(KILL cr);
10236 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10237
10238 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10239 ins_encode %{
10240 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10241 %}
10242 ins_pipe( ialu_reg );
10243 %}
10244
10245 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10246 %{
10247 predicate(UseAPX);
10248 match(Set dst (AddL (LoadL src1) src2));
10249 effect(KILL cr);
10250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10251
10252 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10253 ins_encode %{
10254 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10255 %}
10256 ins_pipe( ialu_reg );
10257 %}
10258
10259 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10260 %{
10261 predicate(!UseAPX);
10262 match(Set dst (AddL dst (LoadL src)));
10263 effect(KILL cr);
10264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10265
10266 ins_cost(150); // XXX
10267 format %{ "addq $dst, $src\t# long" %}
10268 ins_encode %{
10269 __ addq($dst$$Register, $src$$Address);
10270 %}
10271 ins_pipe(ialu_reg_mem);
10272 %}
10273
10274 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10275 %{
10276 predicate(UseAPX);
10277 match(Set dst (AddL src1 (LoadL src2)));
10278 effect(KILL cr);
10279 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10280
10281 ins_cost(150);
10282 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10283 ins_encode %{
10284 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10285 %}
10286 ins_pipe(ialu_reg_mem);
10287 %}
10288
10289 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10290 %{
10291 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10292 effect(KILL cr);
10293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10294
10295 ins_cost(150); // XXX
10296 format %{ "addq $dst, $src\t# long" %}
10297 ins_encode %{
10298 __ addq($dst$$Address, $src$$Register);
10299 %}
10300 ins_pipe(ialu_mem_reg);
10301 %}
10302
10303 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10304 %{
10305 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10306 effect(KILL cr);
10307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308
10309 ins_cost(125); // XXX
10310 format %{ "addq $dst, $src\t# long" %}
10311 ins_encode %{
10312 __ addq($dst$$Address, $src$$constant);
10313 %}
10314 ins_pipe(ialu_mem_imm);
10315 %}
10316
10317 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10318 %{
10319 predicate(!UseAPX && UseIncDec);
10320 match(Set dst (AddL dst src));
10321 effect(KILL cr);
10322
10323 format %{ "incq $dst\t# long" %}
10324 ins_encode %{
10325 __ incrementq($dst$$Register);
10326 %}
10327 ins_pipe(ialu_reg);
10328 %}
10329
10330 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10331 %{
10332 predicate(UseAPX && UseIncDec);
10333 match(Set dst (AddL src val));
10334 effect(KILL cr);
10335 flag(PD::Flag_ndd_demotable_opr1);
10336
10337 format %{ "eincq $dst, $src\t# long ndd" %}
10338 ins_encode %{
10339 __ eincq($dst$$Register, $src$$Register, false);
10340 %}
10341 ins_pipe(ialu_reg);
10342 %}
10343
10344 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10345 %{
10346 predicate(UseAPX && UseIncDec);
10347 match(Set dst (AddL (LoadL src) val));
10348 effect(KILL cr);
10349
10350 format %{ "eincq $dst, $src\t# long ndd" %}
10351 ins_encode %{
10352 __ eincq($dst$$Register, $src$$Address, false);
10353 %}
10354 ins_pipe(ialu_reg);
10355 %}
10356
10357 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10358 %{
10359 predicate(UseIncDec);
10360 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10361 effect(KILL cr);
10362
10363 ins_cost(125); // XXX
10364 format %{ "incq $dst\t# long" %}
10365 ins_encode %{
10366 __ incrementq($dst$$Address);
10367 %}
10368 ins_pipe(ialu_mem_imm);
10369 %}
10370
10371 // XXX why does that use AddL
10372 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10373 %{
10374 predicate(!UseAPX && UseIncDec);
10375 match(Set dst (AddL dst src));
10376 effect(KILL cr);
10377
10378 format %{ "decq $dst\t# long" %}
10379 ins_encode %{
10380 __ decrementq($dst$$Register);
10381 %}
10382 ins_pipe(ialu_reg);
10383 %}
10384
10385 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10386 %{
10387 predicate(UseAPX && UseIncDec);
10388 match(Set dst (AddL src val));
10389 effect(KILL cr);
10390 flag(PD::Flag_ndd_demotable_opr1);
10391
10392 format %{ "edecq $dst, $src\t# long ndd" %}
10393 ins_encode %{
10394 __ edecq($dst$$Register, $src$$Register, false);
10395 %}
10396 ins_pipe(ialu_reg);
10397 %}
10398
10399 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10400 %{
10401 predicate(UseAPX && UseIncDec);
10402 match(Set dst (AddL (LoadL src) val));
10403 effect(KILL cr);
10404
10405 format %{ "edecq $dst, $src\t# long ndd" %}
10406 ins_encode %{
10407 __ edecq($dst$$Register, $src$$Address, false);
10408 %}
10409 ins_pipe(ialu_reg);
10410 %}
10411
10412 // XXX why does that use AddL
10413 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10414 %{
10415 predicate(UseIncDec);
10416 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10417 effect(KILL cr);
10418
10419 ins_cost(125); // XXX
10420 format %{ "decq $dst\t# long" %}
10421 ins_encode %{
10422 __ decrementq($dst$$Address);
10423 %}
10424 ins_pipe(ialu_mem_imm);
10425 %}
10426
10427 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10428 %{
10429 predicate(VM_Version::supports_fast_2op_lea());
10430 match(Set dst (AddL (LShiftL index scale) disp));
10431
10432 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10433 ins_encode %{
10434 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10435 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10436 %}
10437 ins_pipe(ialu_reg_reg);
10438 %}
10439
10440 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10441 %{
10442 predicate(VM_Version::supports_fast_3op_lea());
10443 match(Set dst (AddL (AddL base index) disp));
10444
10445 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10446 ins_encode %{
10447 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10448 %}
10449 ins_pipe(ialu_reg_reg);
10450 %}
10451
10452 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10453 %{
10454 predicate(VM_Version::supports_fast_2op_lea());
10455 match(Set dst (AddL base (LShiftL index scale)));
10456
10457 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10458 ins_encode %{
10459 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10460 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10461 %}
10462 ins_pipe(ialu_reg_reg);
10463 %}
10464
10465 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10466 %{
10467 predicate(VM_Version::supports_fast_3op_lea());
10468 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10469
10470 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10471 ins_encode %{
10472 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10473 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10474 %}
10475 ins_pipe(ialu_reg_reg);
10476 %}
10477
10478 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10479 %{
10480 match(Set dst (AddP dst src));
10481 effect(KILL cr);
10482 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10483
10484 format %{ "addq $dst, $src\t# ptr" %}
10485 ins_encode %{
10486 __ addq($dst$$Register, $src$$Register);
10487 %}
10488 ins_pipe(ialu_reg_reg);
10489 %}
10490
10491 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10492 %{
10493 match(Set dst (AddP dst src));
10494 effect(KILL cr);
10495 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10496
10497 format %{ "addq $dst, $src\t# ptr" %}
10498 ins_encode %{
10499 __ addq($dst$$Register, $src$$constant);
10500 %}
10501 ins_pipe( ialu_reg );
10502 %}
10503
10504 // XXX addP mem ops ????
10505
10506 instruct checkCastPP(rRegP dst)
10507 %{
10508 match(Set dst (CheckCastPP dst));
10509
10510 size(0);
10511 format %{ "# checkcastPP of $dst" %}
10512 ins_encode(/* empty encoding */);
10513 ins_pipe(empty);
10514 %}
10515
10516 instruct castPP(rRegP dst)
10517 %{
10518 match(Set dst (CastPP dst));
10519
10520 size(0);
10521 format %{ "# castPP of $dst" %}
10522 ins_encode(/* empty encoding */);
10523 ins_pipe(empty);
10524 %}
10525
10526 instruct castII(rRegI dst)
10527 %{
10528 predicate(VerifyConstraintCasts == 0);
10529 match(Set dst (CastII dst));
10530
10531 size(0);
10532 format %{ "# castII of $dst" %}
10533 ins_encode(/* empty encoding */);
10534 ins_cost(0);
10535 ins_pipe(empty);
10536 %}
10537
10538 instruct castII_checked(rRegI dst, rFlagsReg cr)
10539 %{
10540 predicate(VerifyConstraintCasts > 0);
10541 match(Set dst (CastII dst));
10542
10543 effect(KILL cr);
10544 format %{ "# cast_checked_II $dst" %}
10545 ins_encode %{
10546 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10547 %}
10548 ins_pipe(pipe_slow);
10549 %}
10550
10551 instruct castLL(rRegL dst)
10552 %{
10553 predicate(VerifyConstraintCasts == 0);
10554 match(Set dst (CastLL dst));
10555
10556 size(0);
10557 format %{ "# castLL of $dst" %}
10558 ins_encode(/* empty encoding */);
10559 ins_cost(0);
10560 ins_pipe(empty);
10561 %}
10562
10563 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10564 %{
10565 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10566 match(Set dst (CastLL dst));
10567
10568 effect(KILL cr);
10569 format %{ "# cast_checked_LL $dst" %}
10570 ins_encode %{
10571 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10572 %}
10573 ins_pipe(pipe_slow);
10574 %}
10575
10576 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10577 %{
10578 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10579 match(Set dst (CastLL dst));
10580
10581 effect(KILL cr, TEMP tmp);
10582 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10583 ins_encode %{
10584 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10585 %}
10586 ins_pipe(pipe_slow);
10587 %}
10588
10589 instruct castFF(regF dst)
10590 %{
10591 match(Set dst (CastFF dst));
10592
10593 size(0);
10594 format %{ "# castFF of $dst" %}
10595 ins_encode(/* empty encoding */);
10596 ins_cost(0);
10597 ins_pipe(empty);
10598 %}
10599
10600 instruct castHH(regF dst)
10601 %{
10602 match(Set dst (CastHH dst));
10603
10604 size(0);
10605 format %{ "# castHH of $dst" %}
10606 ins_encode(/* empty encoding */);
10607 ins_cost(0);
10608 ins_pipe(empty);
10609 %}
10610
10611 instruct castDD(regD dst)
10612 %{
10613 match(Set dst (CastDD dst));
10614
10615 size(0);
10616 format %{ "# castDD of $dst" %}
10617 ins_encode(/* empty encoding */);
10618 ins_cost(0);
10619 ins_pipe(empty);
10620 %}
10621
10622 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10623 instruct compareAndSwapP(rRegI res,
10624 memory mem_ptr,
10625 rax_RegP oldval, rRegP newval,
10626 rFlagsReg cr)
10627 %{
10628 predicate(n->as_LoadStore()->barrier_data() == 0);
10629 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10630 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10631 effect(KILL cr, KILL oldval);
10632
10633 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10634 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10635 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10636 ins_encode %{
10637 __ lock();
10638 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10639 __ setcc(Assembler::equal, $res$$Register);
10640 %}
10641 ins_pipe( pipe_cmpxchg );
10642 %}
10643
10644 instruct compareAndSwapL(rRegI res,
10645 memory mem_ptr,
10646 rax_RegL oldval, rRegL newval,
10647 rFlagsReg cr)
10648 %{
10649 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10650 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10651 effect(KILL cr, KILL oldval);
10652
10653 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10654 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10655 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10656 ins_encode %{
10657 __ lock();
10658 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10659 __ setcc(Assembler::equal, $res$$Register);
10660 %}
10661 ins_pipe( pipe_cmpxchg );
10662 %}
10663
10664 instruct compareAndSwapI(rRegI res,
10665 memory mem_ptr,
10666 rax_RegI oldval, rRegI newval,
10667 rFlagsReg cr)
10668 %{
10669 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10670 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10671 effect(KILL cr, KILL oldval);
10672
10673 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10674 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10675 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10676 ins_encode %{
10677 __ lock();
10678 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10679 __ setcc(Assembler::equal, $res$$Register);
10680 %}
10681 ins_pipe( pipe_cmpxchg );
10682 %}
10683
10684 instruct compareAndSwapB(rRegI res,
10685 memory mem_ptr,
10686 rax_RegI oldval, rRegI newval,
10687 rFlagsReg cr)
10688 %{
10689 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10690 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10691 effect(KILL cr, KILL oldval);
10692
10693 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10694 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10695 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10696 ins_encode %{
10697 __ lock();
10698 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10699 __ setcc(Assembler::equal, $res$$Register);
10700 %}
10701 ins_pipe( pipe_cmpxchg );
10702 %}
10703
10704 instruct compareAndSwapS(rRegI res,
10705 memory mem_ptr,
10706 rax_RegI oldval, rRegI newval,
10707 rFlagsReg cr)
10708 %{
10709 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10710 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10711 effect(KILL cr, KILL oldval);
10712
10713 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10714 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10715 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10716 ins_encode %{
10717 __ lock();
10718 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10719 __ setcc(Assembler::equal, $res$$Register);
10720 %}
10721 ins_pipe( pipe_cmpxchg );
10722 %}
10723
10724 instruct compareAndSwapN(rRegI res,
10725 memory mem_ptr,
10726 rax_RegN oldval, rRegN newval,
10727 rFlagsReg cr) %{
10728 predicate(n->as_LoadStore()->barrier_data() == 0);
10729 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10730 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10731 effect(KILL cr, KILL oldval);
10732
10733 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10734 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10735 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10736 ins_encode %{
10737 __ lock();
10738 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10739 __ setcc(Assembler::equal, $res$$Register);
10740 %}
10741 ins_pipe( pipe_cmpxchg );
10742 %}
10743
10744 instruct compareAndExchangeB(
10745 memory mem_ptr,
10746 rax_RegI oldval, rRegI newval,
10747 rFlagsReg cr)
10748 %{
10749 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10750 effect(KILL cr);
10751
10752 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10753 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10754 ins_encode %{
10755 __ lock();
10756 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10757 %}
10758 ins_pipe( pipe_cmpxchg );
10759 %}
10760
10761 instruct compareAndExchangeS(
10762 memory mem_ptr,
10763 rax_RegI oldval, rRegI newval,
10764 rFlagsReg cr)
10765 %{
10766 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10767 effect(KILL cr);
10768
10769 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10770 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10771 ins_encode %{
10772 __ lock();
10773 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10774 %}
10775 ins_pipe( pipe_cmpxchg );
10776 %}
10777
10778 instruct compareAndExchangeI(
10779 memory mem_ptr,
10780 rax_RegI oldval, rRegI newval,
10781 rFlagsReg cr)
10782 %{
10783 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10784 effect(KILL cr);
10785
10786 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10787 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10788 ins_encode %{
10789 __ lock();
10790 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10791 %}
10792 ins_pipe( pipe_cmpxchg );
10793 %}
10794
10795 instruct compareAndExchangeL(
10796 memory mem_ptr,
10797 rax_RegL oldval, rRegL newval,
10798 rFlagsReg cr)
10799 %{
10800 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10801 effect(KILL cr);
10802
10803 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10804 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10805 ins_encode %{
10806 __ lock();
10807 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10808 %}
10809 ins_pipe( pipe_cmpxchg );
10810 %}
10811
10812 instruct compareAndExchangeN(
10813 memory mem_ptr,
10814 rax_RegN oldval, rRegN newval,
10815 rFlagsReg cr) %{
10816 predicate(n->as_LoadStore()->barrier_data() == 0);
10817 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10818 effect(KILL cr);
10819
10820 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10821 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10822 ins_encode %{
10823 __ lock();
10824 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10825 %}
10826 ins_pipe( pipe_cmpxchg );
10827 %}
10828
10829 instruct compareAndExchangeP(
10830 memory mem_ptr,
10831 rax_RegP oldval, rRegP newval,
10832 rFlagsReg cr)
10833 %{
10834 predicate(n->as_LoadStore()->barrier_data() == 0);
10835 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10836 effect(KILL cr);
10837
10838 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10839 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840 ins_encode %{
10841 __ lock();
10842 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10843 %}
10844 ins_pipe( pipe_cmpxchg );
10845 %}
10846
10847 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10848 predicate(n->as_LoadStore()->result_not_used());
10849 match(Set dummy (GetAndAddB mem add));
10850 effect(KILL cr);
10851 format %{ "addb_lock $mem, $add" %}
10852 ins_encode %{
10853 __ lock();
10854 __ addb($mem$$Address, $add$$Register);
10855 %}
10856 ins_pipe(pipe_cmpxchg);
10857 %}
10858
10859 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10860 predicate(n->as_LoadStore()->result_not_used());
10861 match(Set dummy (GetAndAddB mem add));
10862 effect(KILL cr);
10863 format %{ "addb_lock $mem, $add" %}
10864 ins_encode %{
10865 __ lock();
10866 __ addb($mem$$Address, $add$$constant);
10867 %}
10868 ins_pipe(pipe_cmpxchg);
10869 %}
10870
10871 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10872 predicate(!n->as_LoadStore()->result_not_used());
10873 match(Set newval (GetAndAddB mem newval));
10874 effect(KILL cr);
10875 format %{ "xaddb_lock $mem, $newval" %}
10876 ins_encode %{
10877 __ lock();
10878 __ xaddb($mem$$Address, $newval$$Register);
10879 %}
10880 ins_pipe(pipe_cmpxchg);
10881 %}
10882
10883 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10884 predicate(n->as_LoadStore()->result_not_used());
10885 match(Set dummy (GetAndAddS mem add));
10886 effect(KILL cr);
10887 format %{ "addw_lock $mem, $add" %}
10888 ins_encode %{
10889 __ lock();
10890 __ addw($mem$$Address, $add$$Register);
10891 %}
10892 ins_pipe(pipe_cmpxchg);
10893 %}
10894
10895 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10896 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10897 match(Set dummy (GetAndAddS mem add));
10898 effect(KILL cr);
10899 format %{ "addw_lock $mem, $add" %}
10900 ins_encode %{
10901 __ lock();
10902 __ addw($mem$$Address, $add$$constant);
10903 %}
10904 ins_pipe(pipe_cmpxchg);
10905 %}
10906
10907 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10908 predicate(!n->as_LoadStore()->result_not_used());
10909 match(Set newval (GetAndAddS mem newval));
10910 effect(KILL cr);
10911 format %{ "xaddw_lock $mem, $newval" %}
10912 ins_encode %{
10913 __ lock();
10914 __ xaddw($mem$$Address, $newval$$Register);
10915 %}
10916 ins_pipe(pipe_cmpxchg);
10917 %}
10918
10919 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10920 predicate(n->as_LoadStore()->result_not_used());
10921 match(Set dummy (GetAndAddI mem add));
10922 effect(KILL cr);
10923 format %{ "addl_lock $mem, $add" %}
10924 ins_encode %{
10925 __ lock();
10926 __ addl($mem$$Address, $add$$Register);
10927 %}
10928 ins_pipe(pipe_cmpxchg);
10929 %}
10930
10931 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10932 predicate(n->as_LoadStore()->result_not_used());
10933 match(Set dummy (GetAndAddI mem add));
10934 effect(KILL cr);
10935 format %{ "addl_lock $mem, $add" %}
10936 ins_encode %{
10937 __ lock();
10938 __ addl($mem$$Address, $add$$constant);
10939 %}
10940 ins_pipe(pipe_cmpxchg);
10941 %}
10942
10943 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10944 predicate(!n->as_LoadStore()->result_not_used());
10945 match(Set newval (GetAndAddI mem newval));
10946 effect(KILL cr);
10947 format %{ "xaddl_lock $mem, $newval" %}
10948 ins_encode %{
10949 __ lock();
10950 __ xaddl($mem$$Address, $newval$$Register);
10951 %}
10952 ins_pipe(pipe_cmpxchg);
10953 %}
10954
10955 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10956 predicate(n->as_LoadStore()->result_not_used());
10957 match(Set dummy (GetAndAddL mem add));
10958 effect(KILL cr);
10959 format %{ "addq_lock $mem, $add" %}
10960 ins_encode %{
10961 __ lock();
10962 __ addq($mem$$Address, $add$$Register);
10963 %}
10964 ins_pipe(pipe_cmpxchg);
10965 %}
10966
10967 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10968 predicate(n->as_LoadStore()->result_not_used());
10969 match(Set dummy (GetAndAddL mem add));
10970 effect(KILL cr);
10971 format %{ "addq_lock $mem, $add" %}
10972 ins_encode %{
10973 __ lock();
10974 __ addq($mem$$Address, $add$$constant);
10975 %}
10976 ins_pipe(pipe_cmpxchg);
10977 %}
10978
10979 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10980 predicate(!n->as_LoadStore()->result_not_used());
10981 match(Set newval (GetAndAddL mem newval));
10982 effect(KILL cr);
10983 format %{ "xaddq_lock $mem, $newval" %}
10984 ins_encode %{
10985 __ lock();
10986 __ xaddq($mem$$Address, $newval$$Register);
10987 %}
10988 ins_pipe(pipe_cmpxchg);
10989 %}
10990
10991 instruct xchgB( memory mem, rRegI newval) %{
10992 match(Set newval (GetAndSetB mem newval));
10993 format %{ "XCHGB $newval,[$mem]" %}
10994 ins_encode %{
10995 __ xchgb($newval$$Register, $mem$$Address);
10996 %}
10997 ins_pipe( pipe_cmpxchg );
10998 %}
10999
11000 instruct xchgS( memory mem, rRegI newval) %{
11001 match(Set newval (GetAndSetS mem newval));
11002 format %{ "XCHGW $newval,[$mem]" %}
11003 ins_encode %{
11004 __ xchgw($newval$$Register, $mem$$Address);
11005 %}
11006 ins_pipe( pipe_cmpxchg );
11007 %}
11008
11009 instruct xchgI( memory mem, rRegI newval) %{
11010 match(Set newval (GetAndSetI mem newval));
11011 format %{ "XCHGL $newval,[$mem]" %}
11012 ins_encode %{
11013 __ xchgl($newval$$Register, $mem$$Address);
11014 %}
11015 ins_pipe( pipe_cmpxchg );
11016 %}
11017
11018 instruct xchgL( memory mem, rRegL newval) %{
11019 match(Set newval (GetAndSetL mem newval));
11020 format %{ "XCHGL $newval,[$mem]" %}
11021 ins_encode %{
11022 __ xchgq($newval$$Register, $mem$$Address);
11023 %}
11024 ins_pipe( pipe_cmpxchg );
11025 %}
11026
11027 instruct xchgP( memory mem, rRegP newval) %{
11028 match(Set newval (GetAndSetP mem newval));
11029 predicate(n->as_LoadStore()->barrier_data() == 0);
11030 format %{ "XCHGQ $newval,[$mem]" %}
11031 ins_encode %{
11032 __ xchgq($newval$$Register, $mem$$Address);
11033 %}
11034 ins_pipe( pipe_cmpxchg );
11035 %}
11036
11037 instruct xchgN( memory mem, rRegN newval) %{
11038 predicate(n->as_LoadStore()->barrier_data() == 0);
11039 match(Set newval (GetAndSetN mem newval));
11040 format %{ "XCHGL $newval,$mem]" %}
11041 ins_encode %{
11042 __ xchgl($newval$$Register, $mem$$Address);
11043 %}
11044 ins_pipe( pipe_cmpxchg );
11045 %}
11046
11047 //----------Abs Instructions-------------------------------------------
11048
11049 // Integer Absolute Instructions
11050 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11051 %{
11052 match(Set dst (AbsI src));
11053 effect(TEMP dst, KILL cr);
11054 format %{ "xorl $dst, $dst\t# abs int\n\t"
11055 "subl $dst, $src\n\t"
11056 "cmovll $dst, $src" %}
11057 ins_encode %{
11058 __ xorl($dst$$Register, $dst$$Register);
11059 __ subl($dst$$Register, $src$$Register);
11060 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11061 %}
11062
11063 ins_pipe(ialu_reg_reg);
11064 %}
11065
11066 // Long Absolute Instructions
11067 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11068 %{
11069 match(Set dst (AbsL src));
11070 effect(TEMP dst, KILL cr);
11071 format %{ "xorl $dst, $dst\t# abs long\n\t"
11072 "subq $dst, $src\n\t"
11073 "cmovlq $dst, $src" %}
11074 ins_encode %{
11075 __ xorl($dst$$Register, $dst$$Register);
11076 __ subq($dst$$Register, $src$$Register);
11077 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11078 %}
11079
11080 ins_pipe(ialu_reg_reg);
11081 %}
11082
11083 //----------Subtraction Instructions-------------------------------------------
11084
11085 // Integer Subtraction Instructions
11086 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11087 %{
11088 predicate(!UseAPX);
11089 match(Set dst (SubI dst src));
11090 effect(KILL cr);
11091 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092
11093 format %{ "subl $dst, $src\t# int" %}
11094 ins_encode %{
11095 __ subl($dst$$Register, $src$$Register);
11096 %}
11097 ins_pipe(ialu_reg_reg);
11098 %}
11099
11100 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11101 %{
11102 predicate(UseAPX);
11103 match(Set dst (SubI src1 src2));
11104 effect(KILL cr);
11105 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11106
11107 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11108 ins_encode %{
11109 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11110 %}
11111 ins_pipe(ialu_reg_reg);
11112 %}
11113
11114 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11115 %{
11116 predicate(UseAPX);
11117 match(Set dst (SubI src1 src2));
11118 effect(KILL cr);
11119 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11120
11121 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11122 ins_encode %{
11123 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11124 %}
11125 ins_pipe(ialu_reg_reg);
11126 %}
11127
11128 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11129 %{
11130 predicate(UseAPX);
11131 match(Set dst (SubI (LoadI src1) src2));
11132 effect(KILL cr);
11133 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11134
11135 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11136 ins_encode %{
11137 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11138 %}
11139 ins_pipe(ialu_reg_reg);
11140 %}
11141
11142 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11143 %{
11144 predicate(!UseAPX);
11145 match(Set dst (SubI dst (LoadI src)));
11146 effect(KILL cr);
11147 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11148
11149 ins_cost(150);
11150 format %{ "subl $dst, $src\t# int" %}
11151 ins_encode %{
11152 __ subl($dst$$Register, $src$$Address);
11153 %}
11154 ins_pipe(ialu_reg_mem);
11155 %}
11156
11157 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11158 %{
11159 predicate(UseAPX);
11160 match(Set dst (SubI src1 (LoadI src2)));
11161 effect(KILL cr);
11162 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11163
11164 ins_cost(150);
11165 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11166 ins_encode %{
11167 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11168 %}
11169 ins_pipe(ialu_reg_mem);
11170 %}
11171
11172 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11173 %{
11174 predicate(UseAPX);
11175 match(Set dst (SubI (LoadI src1) src2));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 ins_cost(150);
11180 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11181 ins_encode %{
11182 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11183 %}
11184 ins_pipe(ialu_reg_mem);
11185 %}
11186
11187 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11188 %{
11189 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192
11193 ins_cost(150);
11194 format %{ "subl $dst, $src\t# int" %}
11195 ins_encode %{
11196 __ subl($dst$$Address, $src$$Register);
11197 %}
11198 ins_pipe(ialu_mem_reg);
11199 %}
11200
11201 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11202 %{
11203 predicate(!UseAPX);
11204 match(Set dst (SubL dst src));
11205 effect(KILL cr);
11206 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11207
11208 format %{ "subq $dst, $src\t# long" %}
11209 ins_encode %{
11210 __ subq($dst$$Register, $src$$Register);
11211 %}
11212 ins_pipe(ialu_reg_reg);
11213 %}
11214
11215 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11216 %{
11217 predicate(UseAPX);
11218 match(Set dst (SubL src1 src2));
11219 effect(KILL cr);
11220 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11221
11222 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11223 ins_encode %{
11224 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11225 %}
11226 ins_pipe(ialu_reg_reg);
11227 %}
11228
11229 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11230 %{
11231 predicate(UseAPX);
11232 match(Set dst (SubL src1 src2));
11233 effect(KILL cr);
11234 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11235
11236 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11237 ins_encode %{
11238 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11239 %}
11240 ins_pipe(ialu_reg_reg);
11241 %}
11242
11243 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11244 %{
11245 predicate(UseAPX);
11246 match(Set dst (SubL (LoadL src1) src2));
11247 effect(KILL cr);
11248 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11249
11250 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11251 ins_encode %{
11252 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11253 %}
11254 ins_pipe(ialu_reg_reg);
11255 %}
11256
11257 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11258 %{
11259 predicate(!UseAPX);
11260 match(Set dst (SubL dst (LoadL src)));
11261 effect(KILL cr);
11262 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11263
11264 ins_cost(150);
11265 format %{ "subq $dst, $src\t# long" %}
11266 ins_encode %{
11267 __ subq($dst$$Register, $src$$Address);
11268 %}
11269 ins_pipe(ialu_reg_mem);
11270 %}
11271
11272 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11273 %{
11274 predicate(UseAPX);
11275 match(Set dst (SubL src1 (LoadL src2)));
11276 effect(KILL cr);
11277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11278
11279 ins_cost(150);
11280 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11281 ins_encode %{
11282 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11283 %}
11284 ins_pipe(ialu_reg_mem);
11285 %}
11286
11287 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11288 %{
11289 predicate(UseAPX);
11290 match(Set dst (SubL (LoadL src1) src2));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293
11294 ins_cost(150);
11295 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11296 ins_encode %{
11297 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11298 %}
11299 ins_pipe(ialu_reg_mem);
11300 %}
11301
11302 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11303 %{
11304 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11305 effect(KILL cr);
11306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11307
11308 ins_cost(150);
11309 format %{ "subq $dst, $src\t# long" %}
11310 ins_encode %{
11311 __ subq($dst$$Address, $src$$Register);
11312 %}
11313 ins_pipe(ialu_mem_reg);
11314 %}
11315
11316 // Subtract from a pointer
11317 // XXX hmpf???
11318 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11319 %{
11320 match(Set dst (AddP dst (SubI zero src)));
11321 effect(KILL cr);
11322
11323 format %{ "subq $dst, $src\t# ptr - int" %}
11324 ins_encode %{
11325 __ subq($dst$$Register, $src$$Register);
11326 %}
11327 ins_pipe(ialu_reg_reg);
11328 %}
11329
11330 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11331 %{
11332 predicate(!UseAPX);
11333 match(Set dst (SubI zero dst));
11334 effect(KILL cr);
11335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11336
11337 format %{ "negl $dst\t# int" %}
11338 ins_encode %{
11339 __ negl($dst$$Register);
11340 %}
11341 ins_pipe(ialu_reg);
11342 %}
11343
11344 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11345 %{
11346 predicate(UseAPX);
11347 match(Set dst (SubI zero src));
11348 effect(KILL cr);
11349 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11350
11351 format %{ "enegl $dst, $src\t# int ndd" %}
11352 ins_encode %{
11353 __ enegl($dst$$Register, $src$$Register, false);
11354 %}
11355 ins_pipe(ialu_reg);
11356 %}
11357
11358 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11359 %{
11360 predicate(!UseAPX);
11361 match(Set dst (NegI dst));
11362 effect(KILL cr);
11363 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11364
11365 format %{ "negl $dst\t# int" %}
11366 ins_encode %{
11367 __ negl($dst$$Register);
11368 %}
11369 ins_pipe(ialu_reg);
11370 %}
11371
11372 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11373 %{
11374 predicate(UseAPX);
11375 match(Set dst (NegI src));
11376 effect(KILL cr);
11377 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11378
11379 format %{ "enegl $dst, $src\t# int ndd" %}
11380 ins_encode %{
11381 __ enegl($dst$$Register, $src$$Register, false);
11382 %}
11383 ins_pipe(ialu_reg);
11384 %}
11385
11386 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11387 %{
11388 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391
11392 format %{ "negl $dst\t# int" %}
11393 ins_encode %{
11394 __ negl($dst$$Address);
11395 %}
11396 ins_pipe(ialu_reg);
11397 %}
11398
11399 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11400 %{
11401 predicate(!UseAPX);
11402 match(Set dst (SubL zero dst));
11403 effect(KILL cr);
11404 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11405
11406 format %{ "negq $dst\t# long" %}
11407 ins_encode %{
11408 __ negq($dst$$Register);
11409 %}
11410 ins_pipe(ialu_reg);
11411 %}
11412
11413 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11414 %{
11415 predicate(UseAPX);
11416 match(Set dst (SubL zero src));
11417 effect(KILL cr);
11418 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11419
11420 format %{ "enegq $dst, $src\t# long ndd" %}
11421 ins_encode %{
11422 __ enegq($dst$$Register, $src$$Register, false);
11423 %}
11424 ins_pipe(ialu_reg);
11425 %}
11426
11427 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11428 %{
11429 predicate(!UseAPX);
11430 match(Set dst (NegL dst));
11431 effect(KILL cr);
11432 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11433
11434 format %{ "negq $dst\t# int" %}
11435 ins_encode %{
11436 __ negq($dst$$Register);
11437 %}
11438 ins_pipe(ialu_reg);
11439 %}
11440
11441 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11442 %{
11443 predicate(UseAPX);
11444 match(Set dst (NegL src));
11445 effect(KILL cr);
11446 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11447
11448 format %{ "enegq $dst, $src\t# long ndd" %}
11449 ins_encode %{
11450 __ enegq($dst$$Register, $src$$Register, false);
11451 %}
11452 ins_pipe(ialu_reg);
11453 %}
11454
11455 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11456 %{
11457 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11458 effect(KILL cr);
11459 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11460
11461 format %{ "negq $dst\t# long" %}
11462 ins_encode %{
11463 __ negq($dst$$Address);
11464 %}
11465 ins_pipe(ialu_reg);
11466 %}
11467
11468 //----------Multiplication/Division Instructions-------------------------------
11469 // Integer Multiplication Instructions
11470 // Multiply Register
11471
11472 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11473 %{
11474 predicate(!UseAPX);
11475 match(Set dst (MulI dst src));
11476 effect(KILL cr);
11477
11478 ins_cost(300);
11479 format %{ "imull $dst, $src\t# int" %}
11480 ins_encode %{
11481 __ imull($dst$$Register, $src$$Register);
11482 %}
11483 ins_pipe(ialu_reg_reg_alu0);
11484 %}
11485
11486 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11487 %{
11488 predicate(UseAPX);
11489 match(Set dst (MulI src1 src2));
11490 effect(KILL cr);
11491 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11492
11493 ins_cost(300);
11494 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11495 ins_encode %{
11496 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11497 %}
11498 ins_pipe(ialu_reg_reg_alu0);
11499 %}
11500
11501 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11502 %{
11503 match(Set dst (MulI src imm));
11504 effect(KILL cr);
11505
11506 ins_cost(300);
11507 format %{ "imull $dst, $src, $imm\t# int" %}
11508 ins_encode %{
11509 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11510 %}
11511 ins_pipe(ialu_reg_reg_alu0);
11512 %}
11513
11514 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11515 %{
11516 predicate(!UseAPX);
11517 match(Set dst (MulI dst (LoadI src)));
11518 effect(KILL cr);
11519
11520 ins_cost(350);
11521 format %{ "imull $dst, $src\t# int" %}
11522 ins_encode %{
11523 __ imull($dst$$Register, $src$$Address);
11524 %}
11525 ins_pipe(ialu_reg_mem_alu0);
11526 %}
11527
11528 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11529 %{
11530 predicate(UseAPX);
11531 match(Set dst (MulI src1 (LoadI src2)));
11532 effect(KILL cr);
11533 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11534
11535 ins_cost(350);
11536 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11537 ins_encode %{
11538 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11539 %}
11540 ins_pipe(ialu_reg_mem_alu0);
11541 %}
11542
11543 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11544 %{
11545 match(Set dst (MulI (LoadI src) imm));
11546 effect(KILL cr);
11547
11548 ins_cost(300);
11549 format %{ "imull $dst, $src, $imm\t# int" %}
11550 ins_encode %{
11551 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11552 %}
11553 ins_pipe(ialu_reg_mem_alu0);
11554 %}
11555
11556 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11557 %{
11558 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11559 effect(KILL cr, KILL src2);
11560
11561 expand %{ mulI_rReg(dst, src1, cr);
11562 mulI_rReg(src2, src3, cr);
11563 addI_rReg(dst, src2, cr); %}
11564 %}
11565
11566 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11567 %{
11568 predicate(!UseAPX);
11569 match(Set dst (MulL dst src));
11570 effect(KILL cr);
11571
11572 ins_cost(300);
11573 format %{ "imulq $dst, $src\t# long" %}
11574 ins_encode %{
11575 __ imulq($dst$$Register, $src$$Register);
11576 %}
11577 ins_pipe(ialu_reg_reg_alu0);
11578 %}
11579
11580 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11581 %{
11582 predicate(UseAPX);
11583 match(Set dst (MulL src1 src2));
11584 effect(KILL cr);
11585 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11586
11587 ins_cost(300);
11588 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11589 ins_encode %{
11590 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11591 %}
11592 ins_pipe(ialu_reg_reg_alu0);
11593 %}
11594
11595 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11596 %{
11597 match(Set dst (MulL src imm));
11598 effect(KILL cr);
11599
11600 ins_cost(300);
11601 format %{ "imulq $dst, $src, $imm\t# long" %}
11602 ins_encode %{
11603 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11604 %}
11605 ins_pipe(ialu_reg_reg_alu0);
11606 %}
11607
11608 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11609 %{
11610 predicate(!UseAPX);
11611 match(Set dst (MulL dst (LoadL src)));
11612 effect(KILL cr);
11613
11614 ins_cost(350);
11615 format %{ "imulq $dst, $src\t# long" %}
11616 ins_encode %{
11617 __ imulq($dst$$Register, $src$$Address);
11618 %}
11619 ins_pipe(ialu_reg_mem_alu0);
11620 %}
11621
11622 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11623 %{
11624 predicate(UseAPX);
11625 match(Set dst (MulL src1 (LoadL src2)));
11626 effect(KILL cr);
11627 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11628
11629 ins_cost(350);
11630 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11631 ins_encode %{
11632 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11633 %}
11634 ins_pipe(ialu_reg_mem_alu0);
11635 %}
11636
11637 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11638 %{
11639 match(Set dst (MulL (LoadL src) imm));
11640 effect(KILL cr);
11641
11642 ins_cost(300);
11643 format %{ "imulq $dst, $src, $imm\t# long" %}
11644 ins_encode %{
11645 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11646 %}
11647 ins_pipe(ialu_reg_mem_alu0);
11648 %}
11649
11650 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11651 %{
11652 match(Set dst (MulHiL src rax));
11653 effect(USE_KILL rax, KILL cr);
11654
11655 ins_cost(300);
11656 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11657 ins_encode %{
11658 __ imulq($src$$Register);
11659 %}
11660 ins_pipe(ialu_reg_reg_alu0);
11661 %}
11662
11663 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11664 %{
11665 match(Set dst (UMulHiL src rax));
11666 effect(USE_KILL rax, KILL cr);
11667
11668 ins_cost(300);
11669 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11670 ins_encode %{
11671 __ mulq($src$$Register);
11672 %}
11673 ins_pipe(ialu_reg_reg_alu0);
11674 %}
11675
11676 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11677 rFlagsReg cr)
11678 %{
11679 match(Set rax (DivI rax div));
11680 effect(KILL rdx, KILL cr);
11681
11682 ins_cost(30*100+10*100); // XXX
11683 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11684 "jne,s normal\n\t"
11685 "xorl rdx, rdx\n\t"
11686 "cmpl $div, -1\n\t"
11687 "je,s done\n"
11688 "normal: cdql\n\t"
11689 "idivl $div\n"
11690 "done:" %}
11691 ins_encode(cdql_enc(div));
11692 ins_pipe(ialu_reg_reg_alu0);
11693 %}
11694
11695 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11696 rFlagsReg cr)
11697 %{
11698 match(Set rax (DivL rax div));
11699 effect(KILL rdx, KILL cr);
11700
11701 ins_cost(30*100+10*100); // XXX
11702 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11703 "cmpq rax, rdx\n\t"
11704 "jne,s normal\n\t"
11705 "xorl rdx, rdx\n\t"
11706 "cmpq $div, -1\n\t"
11707 "je,s done\n"
11708 "normal: cdqq\n\t"
11709 "idivq $div\n"
11710 "done:" %}
11711 ins_encode(cdqq_enc(div));
11712 ins_pipe(ialu_reg_reg_alu0);
11713 %}
11714
11715 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11716 %{
11717 match(Set rax (UDivI rax div));
11718 effect(KILL rdx, KILL cr);
11719
11720 ins_cost(300);
11721 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11722 ins_encode %{
11723 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11724 %}
11725 ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727
11728 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11729 %{
11730 match(Set rax (UDivL rax div));
11731 effect(KILL rdx, KILL cr);
11732
11733 ins_cost(300);
11734 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11735 ins_encode %{
11736 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11737 %}
11738 ins_pipe(ialu_reg_reg_alu0);
11739 %}
11740
11741 // Integer DIVMOD with Register, both quotient and mod results
11742 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11743 rFlagsReg cr)
11744 %{
11745 match(DivModI rax div);
11746 effect(KILL cr);
11747
11748 ins_cost(30*100+10*100); // XXX
11749 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11750 "jne,s normal\n\t"
11751 "xorl rdx, rdx\n\t"
11752 "cmpl $div, -1\n\t"
11753 "je,s done\n"
11754 "normal: cdql\n\t"
11755 "idivl $div\n"
11756 "done:" %}
11757 ins_encode(cdql_enc(div));
11758 ins_pipe(pipe_slow);
11759 %}
11760
11761 // Long DIVMOD with Register, both quotient and mod results
11762 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11763 rFlagsReg cr)
11764 %{
11765 match(DivModL rax div);
11766 effect(KILL cr);
11767
11768 ins_cost(30*100+10*100); // XXX
11769 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11770 "cmpq rax, rdx\n\t"
11771 "jne,s normal\n\t"
11772 "xorl rdx, rdx\n\t"
11773 "cmpq $div, -1\n\t"
11774 "je,s done\n"
11775 "normal: cdqq\n\t"
11776 "idivq $div\n"
11777 "done:" %}
11778 ins_encode(cdqq_enc(div));
11779 ins_pipe(pipe_slow);
11780 %}
11781
11782 // Unsigned integer DIVMOD with Register, both quotient and mod results
11783 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11784 no_rax_rdx_RegI div, rFlagsReg cr)
11785 %{
11786 match(UDivModI rax div);
11787 effect(TEMP tmp, KILL cr);
11788
11789 ins_cost(300);
11790 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11791 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11792 %}
11793 ins_encode %{
11794 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11795 %}
11796 ins_pipe(pipe_slow);
11797 %}
11798
11799 // Unsigned long DIVMOD with Register, both quotient and mod results
11800 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11801 no_rax_rdx_RegL div, rFlagsReg cr)
11802 %{
11803 match(UDivModL rax div);
11804 effect(TEMP tmp, KILL cr);
11805
11806 ins_cost(300);
11807 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11808 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11809 %}
11810 ins_encode %{
11811 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11812 %}
11813 ins_pipe(pipe_slow);
11814 %}
11815
11816 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11817 rFlagsReg cr)
11818 %{
11819 match(Set rdx (ModI rax div));
11820 effect(KILL rax, KILL cr);
11821
11822 ins_cost(300); // XXX
11823 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11824 "jne,s normal\n\t"
11825 "xorl rdx, rdx\n\t"
11826 "cmpl $div, -1\n\t"
11827 "je,s done\n"
11828 "normal: cdql\n\t"
11829 "idivl $div\n"
11830 "done:" %}
11831 ins_encode(cdql_enc(div));
11832 ins_pipe(ialu_reg_reg_alu0);
11833 %}
11834
11835 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11836 rFlagsReg cr)
11837 %{
11838 match(Set rdx (ModL rax div));
11839 effect(KILL rax, KILL cr);
11840
11841 ins_cost(300); // XXX
11842 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11843 "cmpq rax, rdx\n\t"
11844 "jne,s normal\n\t"
11845 "xorl rdx, rdx\n\t"
11846 "cmpq $div, -1\n\t"
11847 "je,s done\n"
11848 "normal: cdqq\n\t"
11849 "idivq $div\n"
11850 "done:" %}
11851 ins_encode(cdqq_enc(div));
11852 ins_pipe(ialu_reg_reg_alu0);
11853 %}
11854
11855 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11856 %{
11857 match(Set rdx (UModI rax div));
11858 effect(KILL rax, KILL cr);
11859
11860 ins_cost(300);
11861 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11862 ins_encode %{
11863 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11864 %}
11865 ins_pipe(ialu_reg_reg_alu0);
11866 %}
11867
11868 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11869 %{
11870 match(Set rdx (UModL rax div));
11871 effect(KILL rax, KILL cr);
11872
11873 ins_cost(300);
11874 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11875 ins_encode %{
11876 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11877 %}
11878 ins_pipe(ialu_reg_reg_alu0);
11879 %}
11880
11881 // Integer Shift Instructions
11882 // Shift Left by one, two, three
11883 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11884 %{
11885 predicate(!UseAPX);
11886 match(Set dst (LShiftI dst shift));
11887 effect(KILL cr);
11888
11889 format %{ "sall $dst, $shift" %}
11890 ins_encode %{
11891 __ sall($dst$$Register, $shift$$constant);
11892 %}
11893 ins_pipe(ialu_reg);
11894 %}
11895
11896 // Shift Left by one, two, three
11897 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11898 %{
11899 predicate(UseAPX);
11900 match(Set dst (LShiftI src shift));
11901 effect(KILL cr);
11902 flag(PD::Flag_ndd_demotable_opr1);
11903
11904 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11905 ins_encode %{
11906 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11907 %}
11908 ins_pipe(ialu_reg);
11909 %}
11910
11911 // Shift Left by 8-bit immediate
11912 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11913 %{
11914 predicate(!UseAPX);
11915 match(Set dst (LShiftI dst shift));
11916 effect(KILL cr);
11917
11918 format %{ "sall $dst, $shift" %}
11919 ins_encode %{
11920 __ sall($dst$$Register, $shift$$constant);
11921 %}
11922 ins_pipe(ialu_reg);
11923 %}
11924
11925 // Shift Left by 8-bit immediate
11926 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11927 %{
11928 predicate(UseAPX);
11929 match(Set dst (LShiftI src shift));
11930 effect(KILL cr);
11931 flag(PD::Flag_ndd_demotable_opr1);
11932
11933 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11934 ins_encode %{
11935 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11936 %}
11937 ins_pipe(ialu_reg);
11938 %}
11939
11940 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11941 %{
11942 predicate(UseAPX);
11943 match(Set dst (LShiftI (LoadI src) shift));
11944 effect(KILL cr);
11945
11946 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11947 ins_encode %{
11948 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11949 %}
11950 ins_pipe(ialu_reg);
11951 %}
11952
11953 // Shift Left by 8-bit immediate
11954 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11955 %{
11956 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11957 effect(KILL cr);
11958
11959 format %{ "sall $dst, $shift" %}
11960 ins_encode %{
11961 __ sall($dst$$Address, $shift$$constant);
11962 %}
11963 ins_pipe(ialu_mem_imm);
11964 %}
11965
11966 // Shift Left by variable
11967 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11968 %{
11969 predicate(!VM_Version::supports_bmi2());
11970 match(Set dst (LShiftI dst shift));
11971 effect(KILL cr);
11972
11973 format %{ "sall $dst, $shift" %}
11974 ins_encode %{
11975 __ sall($dst$$Register);
11976 %}
11977 ins_pipe(ialu_reg_reg);
11978 %}
11979
11980 // Shift Left by variable
11981 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11982 %{
11983 predicate(!VM_Version::supports_bmi2());
11984 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11985 effect(KILL cr);
11986
11987 format %{ "sall $dst, $shift" %}
11988 ins_encode %{
11989 __ sall($dst$$Address);
11990 %}
11991 ins_pipe(ialu_mem_reg);
11992 %}
11993
11994 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11995 %{
11996 predicate(VM_Version::supports_bmi2());
11997 match(Set dst (LShiftI src shift));
11998
11999 format %{ "shlxl $dst, $src, $shift" %}
12000 ins_encode %{
12001 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12002 %}
12003 ins_pipe(ialu_reg_reg);
12004 %}
12005
12006 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12007 %{
12008 predicate(VM_Version::supports_bmi2());
12009 match(Set dst (LShiftI (LoadI src) shift));
12010 ins_cost(175);
12011 format %{ "shlxl $dst, $src, $shift" %}
12012 ins_encode %{
12013 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12014 %}
12015 ins_pipe(ialu_reg_mem);
12016 %}
12017
12018 // Arithmetic Shift Right by 8-bit immediate
12019 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12020 %{
12021 predicate(!UseAPX);
12022 match(Set dst (RShiftI dst shift));
12023 effect(KILL cr);
12024
12025 format %{ "sarl $dst, $shift" %}
12026 ins_encode %{
12027 __ sarl($dst$$Register, $shift$$constant);
12028 %}
12029 ins_pipe(ialu_mem_imm);
12030 %}
12031
12032 // Arithmetic Shift Right by 8-bit immediate
12033 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12034 %{
12035 predicate(UseAPX);
12036 match(Set dst (RShiftI src shift));
12037 effect(KILL cr);
12038 flag(PD::Flag_ndd_demotable_opr1);
12039
12040 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12041 ins_encode %{
12042 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12043 %}
12044 ins_pipe(ialu_mem_imm);
12045 %}
12046
12047 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12048 %{
12049 predicate(UseAPX);
12050 match(Set dst (RShiftI (LoadI src) shift));
12051 effect(KILL cr);
12052
12053 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12054 ins_encode %{
12055 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12056 %}
12057 ins_pipe(ialu_mem_imm);
12058 %}
12059
12060 // Arithmetic Shift Right by 8-bit immediate
12061 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12062 %{
12063 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12064 effect(KILL cr);
12065
12066 format %{ "sarl $dst, $shift" %}
12067 ins_encode %{
12068 __ sarl($dst$$Address, $shift$$constant);
12069 %}
12070 ins_pipe(ialu_mem_imm);
12071 %}
12072
12073 // Arithmetic Shift Right by variable
12074 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12075 %{
12076 predicate(!VM_Version::supports_bmi2());
12077 match(Set dst (RShiftI dst shift));
12078 effect(KILL cr);
12079
12080 format %{ "sarl $dst, $shift" %}
12081 ins_encode %{
12082 __ sarl($dst$$Register);
12083 %}
12084 ins_pipe(ialu_reg_reg);
12085 %}
12086
12087 // Arithmetic Shift Right by variable
12088 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12089 %{
12090 predicate(!VM_Version::supports_bmi2());
12091 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12092 effect(KILL cr);
12093
12094 format %{ "sarl $dst, $shift" %}
12095 ins_encode %{
12096 __ sarl($dst$$Address);
12097 %}
12098 ins_pipe(ialu_mem_reg);
12099 %}
12100
12101 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12102 %{
12103 predicate(VM_Version::supports_bmi2());
12104 match(Set dst (RShiftI src shift));
12105
12106 format %{ "sarxl $dst, $src, $shift" %}
12107 ins_encode %{
12108 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12109 %}
12110 ins_pipe(ialu_reg_reg);
12111 %}
12112
12113 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12114 %{
12115 predicate(VM_Version::supports_bmi2());
12116 match(Set dst (RShiftI (LoadI src) shift));
12117 ins_cost(175);
12118 format %{ "sarxl $dst, $src, $shift" %}
12119 ins_encode %{
12120 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12121 %}
12122 ins_pipe(ialu_reg_mem);
12123 %}
12124
12125 // Logical Shift Right by 8-bit immediate
12126 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12127 %{
12128 predicate(!UseAPX);
12129 match(Set dst (URShiftI dst shift));
12130 effect(KILL cr);
12131
12132 format %{ "shrl $dst, $shift" %}
12133 ins_encode %{
12134 __ shrl($dst$$Register, $shift$$constant);
12135 %}
12136 ins_pipe(ialu_reg);
12137 %}
12138
12139 // Logical Shift Right by 8-bit immediate
12140 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12141 %{
12142 predicate(UseAPX);
12143 match(Set dst (URShiftI src shift));
12144 effect(KILL cr);
12145 flag(PD::Flag_ndd_demotable_opr1);
12146
12147 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12148 ins_encode %{
12149 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12150 %}
12151 ins_pipe(ialu_reg);
12152 %}
12153
12154 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12155 %{
12156 predicate(UseAPX);
12157 match(Set dst (URShiftI (LoadI src) shift));
12158 effect(KILL cr);
12159
12160 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12161 ins_encode %{
12162 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12163 %}
12164 ins_pipe(ialu_reg);
12165 %}
12166
12167 // Logical Shift Right by 8-bit immediate
12168 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12169 %{
12170 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12171 effect(KILL cr);
12172
12173 format %{ "shrl $dst, $shift" %}
12174 ins_encode %{
12175 __ shrl($dst$$Address, $shift$$constant);
12176 %}
12177 ins_pipe(ialu_mem_imm);
12178 %}
12179
12180 // Logical Shift Right by variable
12181 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12182 %{
12183 predicate(!VM_Version::supports_bmi2());
12184 match(Set dst (URShiftI dst shift));
12185 effect(KILL cr);
12186
12187 format %{ "shrl $dst, $shift" %}
12188 ins_encode %{
12189 __ shrl($dst$$Register);
12190 %}
12191 ins_pipe(ialu_reg_reg);
12192 %}
12193
12194 // Logical Shift Right by variable
12195 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12196 %{
12197 predicate(!VM_Version::supports_bmi2());
12198 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12199 effect(KILL cr);
12200
12201 format %{ "shrl $dst, $shift" %}
12202 ins_encode %{
12203 __ shrl($dst$$Address);
12204 %}
12205 ins_pipe(ialu_mem_reg);
12206 %}
12207
12208 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12209 %{
12210 predicate(VM_Version::supports_bmi2());
12211 match(Set dst (URShiftI src shift));
12212
12213 format %{ "shrxl $dst, $src, $shift" %}
12214 ins_encode %{
12215 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12216 %}
12217 ins_pipe(ialu_reg_reg);
12218 %}
12219
12220 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12221 %{
12222 predicate(VM_Version::supports_bmi2());
12223 match(Set dst (URShiftI (LoadI src) shift));
12224 ins_cost(175);
12225 format %{ "shrxl $dst, $src, $shift" %}
12226 ins_encode %{
12227 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12228 %}
12229 ins_pipe(ialu_reg_mem);
12230 %}
12231
12232 // Long Shift Instructions
12233 // Shift Left by one, two, three
12234 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12235 %{
12236 predicate(!UseAPX);
12237 match(Set dst (LShiftL dst shift));
12238 effect(KILL cr);
12239
12240 format %{ "salq $dst, $shift" %}
12241 ins_encode %{
12242 __ salq($dst$$Register, $shift$$constant);
12243 %}
12244 ins_pipe(ialu_reg);
12245 %}
12246
12247 // Shift Left by one, two, three
12248 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12249 %{
12250 predicate(UseAPX);
12251 match(Set dst (LShiftL src shift));
12252 effect(KILL cr);
12253 flag(PD::Flag_ndd_demotable_opr1);
12254
12255 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12256 ins_encode %{
12257 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12258 %}
12259 ins_pipe(ialu_reg);
12260 %}
12261
12262 // Shift Left by 8-bit immediate
12263 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12264 %{
12265 predicate(!UseAPX);
12266 match(Set dst (LShiftL dst shift));
12267 effect(KILL cr);
12268
12269 format %{ "salq $dst, $shift" %}
12270 ins_encode %{
12271 __ salq($dst$$Register, $shift$$constant);
12272 %}
12273 ins_pipe(ialu_reg);
12274 %}
12275
12276 // Shift Left by 8-bit immediate
12277 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12278 %{
12279 predicate(UseAPX);
12280 match(Set dst (LShiftL src shift));
12281 effect(KILL cr);
12282 flag(PD::Flag_ndd_demotable_opr1);
12283
12284 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12285 ins_encode %{
12286 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12287 %}
12288 ins_pipe(ialu_reg);
12289 %}
12290
12291 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12292 %{
12293 predicate(UseAPX);
12294 match(Set dst (LShiftL (LoadL src) shift));
12295 effect(KILL cr);
12296
12297 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12298 ins_encode %{
12299 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12300 %}
12301 ins_pipe(ialu_reg);
12302 %}
12303
12304 // Shift Left by 8-bit immediate
12305 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12306 %{
12307 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12308 effect(KILL cr);
12309
12310 format %{ "salq $dst, $shift" %}
12311 ins_encode %{
12312 __ salq($dst$$Address, $shift$$constant);
12313 %}
12314 ins_pipe(ialu_mem_imm);
12315 %}
12316
12317 // Shift Left by variable
12318 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12319 %{
12320 predicate(!VM_Version::supports_bmi2());
12321 match(Set dst (LShiftL dst shift));
12322 effect(KILL cr);
12323
12324 format %{ "salq $dst, $shift" %}
12325 ins_encode %{
12326 __ salq($dst$$Register);
12327 %}
12328 ins_pipe(ialu_reg_reg);
12329 %}
12330
12331 // Shift Left by variable
12332 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12333 %{
12334 predicate(!VM_Version::supports_bmi2());
12335 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12336 effect(KILL cr);
12337
12338 format %{ "salq $dst, $shift" %}
12339 ins_encode %{
12340 __ salq($dst$$Address);
12341 %}
12342 ins_pipe(ialu_mem_reg);
12343 %}
12344
12345 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12346 %{
12347 predicate(VM_Version::supports_bmi2());
12348 match(Set dst (LShiftL src shift));
12349
12350 format %{ "shlxq $dst, $src, $shift" %}
12351 ins_encode %{
12352 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12353 %}
12354 ins_pipe(ialu_reg_reg);
12355 %}
12356
12357 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12358 %{
12359 predicate(VM_Version::supports_bmi2());
12360 match(Set dst (LShiftL (LoadL src) shift));
12361 ins_cost(175);
12362 format %{ "shlxq $dst, $src, $shift" %}
12363 ins_encode %{
12364 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12365 %}
12366 ins_pipe(ialu_reg_mem);
12367 %}
12368
12369 // Arithmetic Shift Right by 8-bit immediate
12370 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12371 %{
12372 predicate(!UseAPX);
12373 match(Set dst (RShiftL dst shift));
12374 effect(KILL cr);
12375
12376 format %{ "sarq $dst, $shift" %}
12377 ins_encode %{
12378 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12379 %}
12380 ins_pipe(ialu_mem_imm);
12381 %}
12382
12383 // Arithmetic Shift Right by 8-bit immediate
12384 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12385 %{
12386 predicate(UseAPX);
12387 match(Set dst (RShiftL src shift));
12388 effect(KILL cr);
12389 flag(PD::Flag_ndd_demotable_opr1);
12390
12391 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12392 ins_encode %{
12393 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12394 %}
12395 ins_pipe(ialu_mem_imm);
12396 %}
12397
12398 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12399 %{
12400 predicate(UseAPX);
12401 match(Set dst (RShiftL (LoadL src) shift));
12402 effect(KILL cr);
12403
12404 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12405 ins_encode %{
12406 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12407 %}
12408 ins_pipe(ialu_mem_imm);
12409 %}
12410
12411 // Arithmetic Shift Right by 8-bit immediate
12412 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12413 %{
12414 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12415 effect(KILL cr);
12416
12417 format %{ "sarq $dst, $shift" %}
12418 ins_encode %{
12419 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12420 %}
12421 ins_pipe(ialu_mem_imm);
12422 %}
12423
12424 // Arithmetic Shift Right by variable
12425 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12426 %{
12427 predicate(!VM_Version::supports_bmi2());
12428 match(Set dst (RShiftL dst shift));
12429 effect(KILL cr);
12430
12431 format %{ "sarq $dst, $shift" %}
12432 ins_encode %{
12433 __ sarq($dst$$Register);
12434 %}
12435 ins_pipe(ialu_reg_reg);
12436 %}
12437
12438 // Arithmetic Shift Right by variable
12439 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12440 %{
12441 predicate(!VM_Version::supports_bmi2());
12442 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12443 effect(KILL cr);
12444
12445 format %{ "sarq $dst, $shift" %}
12446 ins_encode %{
12447 __ sarq($dst$$Address);
12448 %}
12449 ins_pipe(ialu_mem_reg);
12450 %}
12451
12452 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12453 %{
12454 predicate(VM_Version::supports_bmi2());
12455 match(Set dst (RShiftL src shift));
12456
12457 format %{ "sarxq $dst, $src, $shift" %}
12458 ins_encode %{
12459 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12460 %}
12461 ins_pipe(ialu_reg_reg);
12462 %}
12463
12464 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12465 %{
12466 predicate(VM_Version::supports_bmi2());
12467 match(Set dst (RShiftL (LoadL src) shift));
12468 ins_cost(175);
12469 format %{ "sarxq $dst, $src, $shift" %}
12470 ins_encode %{
12471 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12472 %}
12473 ins_pipe(ialu_reg_mem);
12474 %}
12475
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12478 %{
12479 predicate(!UseAPX);
12480 match(Set dst (URShiftL dst shift));
12481 effect(KILL cr);
12482
12483 format %{ "shrq $dst, $shift" %}
12484 ins_encode %{
12485 __ shrq($dst$$Register, $shift$$constant);
12486 %}
12487 ins_pipe(ialu_reg);
12488 %}
12489
12490 // Logical Shift Right by 8-bit immediate
12491 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12492 %{
12493 predicate(UseAPX);
12494 match(Set dst (URShiftL src shift));
12495 effect(KILL cr);
12496 flag(PD::Flag_ndd_demotable_opr1);
12497
12498 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12499 ins_encode %{
12500 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12501 %}
12502 ins_pipe(ialu_reg);
12503 %}
12504
12505 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12506 %{
12507 predicate(UseAPX);
12508 match(Set dst (URShiftL (LoadL src) shift));
12509 effect(KILL cr);
12510
12511 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12512 ins_encode %{
12513 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12514 %}
12515 ins_pipe(ialu_reg);
12516 %}
12517
12518 // Logical Shift Right by 8-bit immediate
12519 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12520 %{
12521 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12522 effect(KILL cr);
12523
12524 format %{ "shrq $dst, $shift" %}
12525 ins_encode %{
12526 __ shrq($dst$$Address, $shift$$constant);
12527 %}
12528 ins_pipe(ialu_mem_imm);
12529 %}
12530
12531 // Logical Shift Right by variable
12532 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12533 %{
12534 predicate(!VM_Version::supports_bmi2());
12535 match(Set dst (URShiftL dst shift));
12536 effect(KILL cr);
12537
12538 format %{ "shrq $dst, $shift" %}
12539 ins_encode %{
12540 __ shrq($dst$$Register);
12541 %}
12542 ins_pipe(ialu_reg_reg);
12543 %}
12544
12545 // Logical Shift Right by variable
12546 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12547 %{
12548 predicate(!VM_Version::supports_bmi2());
12549 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12550 effect(KILL cr);
12551
12552 format %{ "shrq $dst, $shift" %}
12553 ins_encode %{
12554 __ shrq($dst$$Address);
12555 %}
12556 ins_pipe(ialu_mem_reg);
12557 %}
12558
12559 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12560 %{
12561 predicate(VM_Version::supports_bmi2());
12562 match(Set dst (URShiftL src shift));
12563
12564 format %{ "shrxq $dst, $src, $shift" %}
12565 ins_encode %{
12566 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12567 %}
12568 ins_pipe(ialu_reg_reg);
12569 %}
12570
12571 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12572 %{
12573 predicate(VM_Version::supports_bmi2());
12574 match(Set dst (URShiftL (LoadL src) shift));
12575 ins_cost(175);
12576 format %{ "shrxq $dst, $src, $shift" %}
12577 ins_encode %{
12578 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12579 %}
12580 ins_pipe(ialu_reg_mem);
12581 %}
12582
12583 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12584 // This idiom is used by the compiler for the i2b bytecode.
12585 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12586 %{
12587 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12588
12589 format %{ "movsbl $dst, $src\t# i2b" %}
12590 ins_encode %{
12591 __ movsbl($dst$$Register, $src$$Register);
12592 %}
12593 ins_pipe(ialu_reg_reg);
12594 %}
12595
12596 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12597 // This idiom is used by the compiler the i2s bytecode.
12598 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12599 %{
12600 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12601
12602 format %{ "movswl $dst, $src\t# i2s" %}
12603 ins_encode %{
12604 __ movswl($dst$$Register, $src$$Register);
12605 %}
12606 ins_pipe(ialu_reg_reg);
12607 %}
12608
12609 // ROL/ROR instructions
12610
12611 // Rotate left by constant.
12612 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12613 %{
12614 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12615 match(Set dst (RotateLeft dst shift));
12616 effect(KILL cr);
12617 format %{ "roll $dst, $shift" %}
12618 ins_encode %{
12619 __ roll($dst$$Register, $shift$$constant);
12620 %}
12621 ins_pipe(ialu_reg);
12622 %}
12623
12624 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12625 %{
12626 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12627 match(Set dst (RotateLeft src shift));
12628 format %{ "rolxl $dst, $src, $shift" %}
12629 ins_encode %{
12630 int shift = 32 - ($shift$$constant & 31);
12631 __ rorxl($dst$$Register, $src$$Register, shift);
12632 %}
12633 ins_pipe(ialu_reg_reg);
12634 %}
12635
12636 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12637 %{
12638 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12639 match(Set dst (RotateLeft (LoadI src) shift));
12640 ins_cost(175);
12641 format %{ "rolxl $dst, $src, $shift" %}
12642 ins_encode %{
12643 int shift = 32 - ($shift$$constant & 31);
12644 __ rorxl($dst$$Register, $src$$Address, shift);
12645 %}
12646 ins_pipe(ialu_reg_mem);
12647 %}
12648
12649 // Rotate Left by variable
12650 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12651 %{
12652 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12653 match(Set dst (RotateLeft dst shift));
12654 effect(KILL cr);
12655 format %{ "roll $dst, $shift" %}
12656 ins_encode %{
12657 __ roll($dst$$Register);
12658 %}
12659 ins_pipe(ialu_reg_reg);
12660 %}
12661
12662 // Rotate Left by variable
12663 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12664 %{
12665 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12666 match(Set dst (RotateLeft src shift));
12667 effect(KILL cr);
12668 flag(PD::Flag_ndd_demotable_opr1);
12669
12670 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12671 ins_encode %{
12672 __ eroll($dst$$Register, $src$$Register, false);
12673 %}
12674 ins_pipe(ialu_reg_reg);
12675 %}
12676
12677 // Rotate Right by constant.
12678 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12679 %{
12680 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12681 match(Set dst (RotateRight dst shift));
12682 effect(KILL cr);
12683 format %{ "rorl $dst, $shift" %}
12684 ins_encode %{
12685 __ rorl($dst$$Register, $shift$$constant);
12686 %}
12687 ins_pipe(ialu_reg);
12688 %}
12689
12690 // Rotate Right by constant.
12691 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12692 %{
12693 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12694 match(Set dst (RotateRight src shift));
12695 format %{ "rorxl $dst, $src, $shift" %}
12696 ins_encode %{
12697 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12698 %}
12699 ins_pipe(ialu_reg_reg);
12700 %}
12701
12702 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12703 %{
12704 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12705 match(Set dst (RotateRight (LoadI src) shift));
12706 ins_cost(175);
12707 format %{ "rorxl $dst, $src, $shift" %}
12708 ins_encode %{
12709 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12710 %}
12711 ins_pipe(ialu_reg_mem);
12712 %}
12713
12714 // Rotate Right by variable
12715 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12716 %{
12717 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12718 match(Set dst (RotateRight dst shift));
12719 effect(KILL cr);
12720 format %{ "rorl $dst, $shift" %}
12721 ins_encode %{
12722 __ rorl($dst$$Register);
12723 %}
12724 ins_pipe(ialu_reg_reg);
12725 %}
12726
12727 // Rotate Right by variable
12728 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12729 %{
12730 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12731 match(Set dst (RotateRight src shift));
12732 effect(KILL cr);
12733 flag(PD::Flag_ndd_demotable_opr1);
12734
12735 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12736 ins_encode %{
12737 __ erorl($dst$$Register, $src$$Register, false);
12738 %}
12739 ins_pipe(ialu_reg_reg);
12740 %}
12741
12742 // Rotate Left by constant.
12743 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12744 %{
12745 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12746 match(Set dst (RotateLeft dst shift));
12747 effect(KILL cr);
12748 format %{ "rolq $dst, $shift" %}
12749 ins_encode %{
12750 __ rolq($dst$$Register, $shift$$constant);
12751 %}
12752 ins_pipe(ialu_reg);
12753 %}
12754
12755 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12756 %{
12757 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12758 match(Set dst (RotateLeft src shift));
12759 format %{ "rolxq $dst, $src, $shift" %}
12760 ins_encode %{
12761 int shift = 64 - ($shift$$constant & 63);
12762 __ rorxq($dst$$Register, $src$$Register, shift);
12763 %}
12764 ins_pipe(ialu_reg_reg);
12765 %}
12766
12767 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12768 %{
12769 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12770 match(Set dst (RotateLeft (LoadL src) shift));
12771 ins_cost(175);
12772 format %{ "rolxq $dst, $src, $shift" %}
12773 ins_encode %{
12774 int shift = 64 - ($shift$$constant & 63);
12775 __ rorxq($dst$$Register, $src$$Address, shift);
12776 %}
12777 ins_pipe(ialu_reg_mem);
12778 %}
12779
12780 // Rotate Left by variable
12781 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12782 %{
12783 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12784 match(Set dst (RotateLeft dst shift));
12785 effect(KILL cr);
12786
12787 format %{ "rolq $dst, $shift" %}
12788 ins_encode %{
12789 __ rolq($dst$$Register);
12790 %}
12791 ins_pipe(ialu_reg_reg);
12792 %}
12793
12794 // Rotate Left by variable
12795 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12796 %{
12797 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12798 match(Set dst (RotateLeft src shift));
12799 effect(KILL cr);
12800 flag(PD::Flag_ndd_demotable_opr1);
12801
12802 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12803 ins_encode %{
12804 __ erolq($dst$$Register, $src$$Register, false);
12805 %}
12806 ins_pipe(ialu_reg_reg);
12807 %}
12808
12809 // Rotate Right by constant.
12810 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12811 %{
12812 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12813 match(Set dst (RotateRight dst shift));
12814 effect(KILL cr);
12815 format %{ "rorq $dst, $shift" %}
12816 ins_encode %{
12817 __ rorq($dst$$Register, $shift$$constant);
12818 %}
12819 ins_pipe(ialu_reg);
12820 %}
12821
12822 // Rotate Right by constant
12823 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12824 %{
12825 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12826 match(Set dst (RotateRight src shift));
12827 format %{ "rorxq $dst, $src, $shift" %}
12828 ins_encode %{
12829 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12830 %}
12831 ins_pipe(ialu_reg_reg);
12832 %}
12833
12834 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12835 %{
12836 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12837 match(Set dst (RotateRight (LoadL src) shift));
12838 ins_cost(175);
12839 format %{ "rorxq $dst, $src, $shift" %}
12840 ins_encode %{
12841 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12842 %}
12843 ins_pipe(ialu_reg_mem);
12844 %}
12845
12846 // Rotate Right by variable
12847 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12848 %{
12849 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12850 match(Set dst (RotateRight dst shift));
12851 effect(KILL cr);
12852 format %{ "rorq $dst, $shift" %}
12853 ins_encode %{
12854 __ rorq($dst$$Register);
12855 %}
12856 ins_pipe(ialu_reg_reg);
12857 %}
12858
12859 // Rotate Right by variable
12860 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12861 %{
12862 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12863 match(Set dst (RotateRight src shift));
12864 effect(KILL cr);
12865 flag(PD::Flag_ndd_demotable_opr1);
12866
12867 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12868 ins_encode %{
12869 __ erorq($dst$$Register, $src$$Register, false);
12870 %}
12871 ins_pipe(ialu_reg_reg);
12872 %}
12873
12874 //----------------------------- CompressBits/ExpandBits ------------------------
12875
12876 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12877 predicate(n->bottom_type()->isa_long());
12878 match(Set dst (CompressBits src mask));
12879 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12880 ins_encode %{
12881 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12882 %}
12883 ins_pipe( pipe_slow );
12884 %}
12885
12886 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12887 predicate(n->bottom_type()->isa_long());
12888 match(Set dst (ExpandBits src mask));
12889 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12890 ins_encode %{
12891 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12892 %}
12893 ins_pipe( pipe_slow );
12894 %}
12895
12896 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12897 predicate(n->bottom_type()->isa_long());
12898 match(Set dst (CompressBits src (LoadL mask)));
12899 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12900 ins_encode %{
12901 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12902 %}
12903 ins_pipe( pipe_slow );
12904 %}
12905
12906 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12907 predicate(n->bottom_type()->isa_long());
12908 match(Set dst (ExpandBits src (LoadL mask)));
12909 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12910 ins_encode %{
12911 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12912 %}
12913 ins_pipe( pipe_slow );
12914 %}
12915
12916
12917 // Logical Instructions
12918
12919 // Integer Logical Instructions
12920
12921 // And Instructions
12922 // And Register with Register
12923 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12924 %{
12925 predicate(!UseAPX);
12926 match(Set dst (AndI dst src));
12927 effect(KILL cr);
12928 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12929
12930 format %{ "andl $dst, $src\t# int" %}
12931 ins_encode %{
12932 __ andl($dst$$Register, $src$$Register);
12933 %}
12934 ins_pipe(ialu_reg_reg);
12935 %}
12936
12937 // And Register with Register using New Data Destination (NDD)
12938 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12939 %{
12940 predicate(UseAPX);
12941 match(Set dst (AndI src1 src2));
12942 effect(KILL cr);
12943 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12944
12945 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12946 ins_encode %{
12947 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12948
12949 %}
12950 ins_pipe(ialu_reg_reg);
12951 %}
12952
12953 // And Register with Immediate 255
12954 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12955 %{
12956 match(Set dst (AndI src mask));
12957
12958 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12959 ins_encode %{
12960 __ movzbl($dst$$Register, $src$$Register);
12961 %}
12962 ins_pipe(ialu_reg);
12963 %}
12964
12965 // And Register with Immediate 255 and promote to long
12966 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12967 %{
12968 match(Set dst (ConvI2L (AndI src mask)));
12969
12970 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12971 ins_encode %{
12972 __ movzbl($dst$$Register, $src$$Register);
12973 %}
12974 ins_pipe(ialu_reg);
12975 %}
12976
12977 // And Register with Immediate 65535
12978 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12979 %{
12980 match(Set dst (AndI src mask));
12981
12982 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12983 ins_encode %{
12984 __ movzwl($dst$$Register, $src$$Register);
12985 %}
12986 ins_pipe(ialu_reg);
12987 %}
12988
12989 // And Register with Immediate 65535 and promote to long
12990 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12991 %{
12992 match(Set dst (ConvI2L (AndI src mask)));
12993
12994 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12995 ins_encode %{
12996 __ movzwl($dst$$Register, $src$$Register);
12997 %}
12998 ins_pipe(ialu_reg);
12999 %}
13000
13001 // Can skip int2long conversions after AND with small bitmask
13002 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13003 %{
13004 predicate(VM_Version::supports_bmi2());
13005 ins_cost(125);
13006 effect(TEMP tmp, KILL cr);
13007 match(Set dst (ConvI2L (AndI src mask)));
13008 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13009 ins_encode %{
13010 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13011 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13012 %}
13013 ins_pipe(ialu_reg_reg);
13014 %}
13015
13016 // And Register with Immediate
13017 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13018 %{
13019 predicate(!UseAPX);
13020 match(Set dst (AndI dst src));
13021 effect(KILL cr);
13022 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023
13024 format %{ "andl $dst, $src\t# int" %}
13025 ins_encode %{
13026 __ andl($dst$$Register, $src$$constant);
13027 %}
13028 ins_pipe(ialu_reg);
13029 %}
13030
13031 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13032 %{
13033 predicate(UseAPX);
13034 match(Set dst (AndI src1 src2));
13035 effect(KILL cr);
13036 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13037
13038 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13039 ins_encode %{
13040 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13041 %}
13042 ins_pipe(ialu_reg);
13043 %}
13044
13045 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13046 %{
13047 predicate(UseAPX);
13048 match(Set dst (AndI (LoadI src1) src2));
13049 effect(KILL cr);
13050 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13051
13052 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13053 ins_encode %{
13054 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13055 %}
13056 ins_pipe(ialu_reg);
13057 %}
13058
13059 // And Register with Memory
13060 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13061 %{
13062 predicate(!UseAPX);
13063 match(Set dst (AndI dst (LoadI src)));
13064 effect(KILL cr);
13065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13066
13067 ins_cost(150);
13068 format %{ "andl $dst, $src\t# int" %}
13069 ins_encode %{
13070 __ andl($dst$$Register, $src$$Address);
13071 %}
13072 ins_pipe(ialu_reg_mem);
13073 %}
13074
13075 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13076 %{
13077 predicate(UseAPX);
13078 match(Set dst (AndI src1 (LoadI src2)));
13079 effect(KILL cr);
13080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13081
13082 ins_cost(150);
13083 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13084 ins_encode %{
13085 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13086 %}
13087 ins_pipe(ialu_reg_mem);
13088 %}
13089
13090 // And Memory with Register
13091 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13092 %{
13093 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13094 effect(KILL cr);
13095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13096
13097 ins_cost(150);
13098 format %{ "andb $dst, $src\t# byte" %}
13099 ins_encode %{
13100 __ andb($dst$$Address, $src$$Register);
13101 %}
13102 ins_pipe(ialu_mem_reg);
13103 %}
13104
13105 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13106 %{
13107 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13108 effect(KILL cr);
13109 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110
13111 ins_cost(150);
13112 format %{ "andl $dst, $src\t# int" %}
13113 ins_encode %{
13114 __ andl($dst$$Address, $src$$Register);
13115 %}
13116 ins_pipe(ialu_mem_reg);
13117 %}
13118
13119 // And Memory with Immediate
13120 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13121 %{
13122 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13123 effect(KILL cr);
13124 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13125
13126 ins_cost(125);
13127 format %{ "andl $dst, $src\t# int" %}
13128 ins_encode %{
13129 __ andl($dst$$Address, $src$$constant);
13130 %}
13131 ins_pipe(ialu_mem_imm);
13132 %}
13133
13134 // BMI1 instructions
13135 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13136 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13137 predicate(UseBMI1Instructions);
13138 effect(KILL cr);
13139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140
13141 ins_cost(125);
13142 format %{ "andnl $dst, $src1, $src2" %}
13143
13144 ins_encode %{
13145 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13146 %}
13147 ins_pipe(ialu_reg_mem);
13148 %}
13149
13150 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13151 match(Set dst (AndI (XorI src1 minus_1) src2));
13152 predicate(UseBMI1Instructions);
13153 effect(KILL cr);
13154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155
13156 format %{ "andnl $dst, $src1, $src2" %}
13157
13158 ins_encode %{
13159 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13160 %}
13161 ins_pipe(ialu_reg);
13162 %}
13163
13164 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13165 match(Set dst (AndI (SubI imm_zero src) src));
13166 predicate(UseBMI1Instructions);
13167 effect(KILL cr);
13168 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13169
13170 format %{ "blsil $dst, $src" %}
13171
13172 ins_encode %{
13173 __ blsil($dst$$Register, $src$$Register);
13174 %}
13175 ins_pipe(ialu_reg);
13176 %}
13177
13178 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13179 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13180 predicate(UseBMI1Instructions);
13181 effect(KILL cr);
13182 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13183
13184 ins_cost(125);
13185 format %{ "blsil $dst, $src" %}
13186
13187 ins_encode %{
13188 __ blsil($dst$$Register, $src$$Address);
13189 %}
13190 ins_pipe(ialu_reg_mem);
13191 %}
13192
13193 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13194 %{
13195 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13196 predicate(UseBMI1Instructions);
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13199
13200 ins_cost(125);
13201 format %{ "blsmskl $dst, $src" %}
13202
13203 ins_encode %{
13204 __ blsmskl($dst$$Register, $src$$Address);
13205 %}
13206 ins_pipe(ialu_reg_mem);
13207 %}
13208
13209 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13210 %{
13211 match(Set dst (XorI (AddI src minus_1) src));
13212 predicate(UseBMI1Instructions);
13213 effect(KILL cr);
13214 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13215
13216 format %{ "blsmskl $dst, $src" %}
13217
13218 ins_encode %{
13219 __ blsmskl($dst$$Register, $src$$Register);
13220 %}
13221
13222 ins_pipe(ialu_reg);
13223 %}
13224
13225 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13226 %{
13227 match(Set dst (AndI (AddI src minus_1) src) );
13228 predicate(UseBMI1Instructions);
13229 effect(KILL cr);
13230 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13231
13232 format %{ "blsrl $dst, $src" %}
13233
13234 ins_encode %{
13235 __ blsrl($dst$$Register, $src$$Register);
13236 %}
13237
13238 ins_pipe(ialu_reg_mem);
13239 %}
13240
13241 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13242 %{
13243 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13244 predicate(UseBMI1Instructions);
13245 effect(KILL cr);
13246 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13247
13248 ins_cost(125);
13249 format %{ "blsrl $dst, $src" %}
13250
13251 ins_encode %{
13252 __ blsrl($dst$$Register, $src$$Address);
13253 %}
13254
13255 ins_pipe(ialu_reg);
13256 %}
13257
13258 // Or Instructions
13259 // Or Register with Register
13260 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13261 %{
13262 predicate(!UseAPX);
13263 match(Set dst (OrI dst src));
13264 effect(KILL cr);
13265 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13266
13267 format %{ "orl $dst, $src\t# int" %}
13268 ins_encode %{
13269 __ orl($dst$$Register, $src$$Register);
13270 %}
13271 ins_pipe(ialu_reg_reg);
13272 %}
13273
13274 // Or Register with Register using New Data Destination (NDD)
13275 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13276 %{
13277 predicate(UseAPX);
13278 match(Set dst (OrI src1 src2));
13279 effect(KILL cr);
13280 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13281
13282 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13283 ins_encode %{
13284 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13285 %}
13286 ins_pipe(ialu_reg_reg);
13287 %}
13288
13289 // Or Register with Immediate
13290 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13291 %{
13292 predicate(!UseAPX);
13293 match(Set dst (OrI dst src));
13294 effect(KILL cr);
13295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296
13297 format %{ "orl $dst, $src\t# int" %}
13298 ins_encode %{
13299 __ orl($dst$$Register, $src$$constant);
13300 %}
13301 ins_pipe(ialu_reg);
13302 %}
13303
13304 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13305 %{
13306 predicate(UseAPX);
13307 match(Set dst (OrI src1 src2));
13308 effect(KILL cr);
13309 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13310
13311 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13312 ins_encode %{
13313 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13314 %}
13315 ins_pipe(ialu_reg);
13316 %}
13317
13318 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13319 %{
13320 predicate(UseAPX);
13321 match(Set dst (OrI src1 src2));
13322 effect(KILL cr);
13323 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13324
13325 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13326 ins_encode %{
13327 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13328 %}
13329 ins_pipe(ialu_reg);
13330 %}
13331
13332 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13333 %{
13334 predicate(UseAPX);
13335 match(Set dst (OrI (LoadI src1) src2));
13336 effect(KILL cr);
13337 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13338
13339 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13340 ins_encode %{
13341 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13342 %}
13343 ins_pipe(ialu_reg);
13344 %}
13345
13346 // Or Register with Memory
13347 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13348 %{
13349 predicate(!UseAPX);
13350 match(Set dst (OrI dst (LoadI src)));
13351 effect(KILL cr);
13352 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13353
13354 ins_cost(150);
13355 format %{ "orl $dst, $src\t# int" %}
13356 ins_encode %{
13357 __ orl($dst$$Register, $src$$Address);
13358 %}
13359 ins_pipe(ialu_reg_mem);
13360 %}
13361
13362 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13363 %{
13364 predicate(UseAPX);
13365 match(Set dst (OrI src1 (LoadI src2)));
13366 effect(KILL cr);
13367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13368
13369 ins_cost(150);
13370 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13371 ins_encode %{
13372 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13373 %}
13374 ins_pipe(ialu_reg_mem);
13375 %}
13376
13377 // Or Memory with Register
13378 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13379 %{
13380 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13381 effect(KILL cr);
13382 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13383
13384 ins_cost(150);
13385 format %{ "orb $dst, $src\t# byte" %}
13386 ins_encode %{
13387 __ orb($dst$$Address, $src$$Register);
13388 %}
13389 ins_pipe(ialu_mem_reg);
13390 %}
13391
13392 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13393 %{
13394 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13395 effect(KILL cr);
13396 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13397
13398 ins_cost(150);
13399 format %{ "orl $dst, $src\t# int" %}
13400 ins_encode %{
13401 __ orl($dst$$Address, $src$$Register);
13402 %}
13403 ins_pipe(ialu_mem_reg);
13404 %}
13405
13406 // Or Memory with Immediate
13407 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13408 %{
13409 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13410 effect(KILL cr);
13411 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13412
13413 ins_cost(125);
13414 format %{ "orl $dst, $src\t# int" %}
13415 ins_encode %{
13416 __ orl($dst$$Address, $src$$constant);
13417 %}
13418 ins_pipe(ialu_mem_imm);
13419 %}
13420
13421 // Xor Instructions
13422 // Xor Register with Register
13423 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13424 %{
13425 predicate(!UseAPX);
13426 match(Set dst (XorI dst src));
13427 effect(KILL cr);
13428 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13429
13430 format %{ "xorl $dst, $src\t# int" %}
13431 ins_encode %{
13432 __ xorl($dst$$Register, $src$$Register);
13433 %}
13434 ins_pipe(ialu_reg_reg);
13435 %}
13436
13437 // Xor Register with Register using New Data Destination (NDD)
13438 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13439 %{
13440 predicate(UseAPX);
13441 match(Set dst (XorI src1 src2));
13442 effect(KILL cr);
13443 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13444
13445 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13446 ins_encode %{
13447 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13448 %}
13449 ins_pipe(ialu_reg_reg);
13450 %}
13451
13452 // Xor Register with Immediate -1
13453 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13454 %{
13455 predicate(!UseAPX);
13456 match(Set dst (XorI dst imm));
13457
13458 format %{ "notl $dst" %}
13459 ins_encode %{
13460 __ notl($dst$$Register);
13461 %}
13462 ins_pipe(ialu_reg);
13463 %}
13464
13465 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13466 %{
13467 match(Set dst (XorI src imm));
13468 predicate(UseAPX);
13469 flag(PD::Flag_ndd_demotable_opr1);
13470
13471 format %{ "enotl $dst, $src" %}
13472 ins_encode %{
13473 __ enotl($dst$$Register, $src$$Register);
13474 %}
13475 ins_pipe(ialu_reg);
13476 %}
13477
13478 // Xor Register with Immediate
13479 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13480 %{
13481 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13482 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13483 match(Set dst (XorI dst src));
13484 effect(KILL cr);
13485 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13486
13487 format %{ "xorl $dst, $src\t# int" %}
13488 ins_encode %{
13489 __ xorl($dst$$Register, $src$$constant);
13490 %}
13491 ins_pipe(ialu_reg);
13492 %}
13493
13494 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13495 %{
13496 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13497 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13498 match(Set dst (XorI src1 src2));
13499 effect(KILL cr);
13500 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13501
13502 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13503 ins_encode %{
13504 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13505 %}
13506 ins_pipe(ialu_reg);
13507 %}
13508
13509 // Xor Memory with Immediate
13510 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13511 %{
13512 predicate(UseAPX);
13513 match(Set dst (XorI (LoadI src1) src2));
13514 effect(KILL cr);
13515 ins_cost(150);
13516 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13517
13518 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13519 ins_encode %{
13520 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13521 %}
13522 ins_pipe(ialu_reg);
13523 %}
13524
13525 // Xor Register with Memory
13526 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13527 %{
13528 predicate(!UseAPX);
13529 match(Set dst (XorI dst (LoadI src)));
13530 effect(KILL cr);
13531 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13532
13533 ins_cost(150);
13534 format %{ "xorl $dst, $src\t# int" %}
13535 ins_encode %{
13536 __ xorl($dst$$Register, $src$$Address);
13537 %}
13538 ins_pipe(ialu_reg_mem);
13539 %}
13540
13541 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13542 %{
13543 predicate(UseAPX);
13544 match(Set dst (XorI src1 (LoadI src2)));
13545 effect(KILL cr);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13547
13548 ins_cost(150);
13549 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13550 ins_encode %{
13551 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13552 %}
13553 ins_pipe(ialu_reg_mem);
13554 %}
13555
13556 // Xor Memory with Register
13557 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13558 %{
13559 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562
13563 ins_cost(150);
13564 format %{ "xorb $dst, $src\t# byte" %}
13565 ins_encode %{
13566 __ xorb($dst$$Address, $src$$Register);
13567 %}
13568 ins_pipe(ialu_mem_reg);
13569 %}
13570
13571 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13572 %{
13573 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13574 effect(KILL cr);
13575 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13576
13577 ins_cost(150);
13578 format %{ "xorl $dst, $src\t# int" %}
13579 ins_encode %{
13580 __ xorl($dst$$Address, $src$$Register);
13581 %}
13582 ins_pipe(ialu_mem_reg);
13583 %}
13584
13585 // Xor Memory with Immediate
13586 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13587 %{
13588 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13589 effect(KILL cr);
13590 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13591
13592 ins_cost(125);
13593 format %{ "xorl $dst, $src\t# int" %}
13594 ins_encode %{
13595 __ xorl($dst$$Address, $src$$constant);
13596 %}
13597 ins_pipe(ialu_mem_imm);
13598 %}
13599
13600
13601 // Long Logical Instructions
13602
13603 // And Instructions
13604 // And Register with Register
13605 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13606 %{
13607 predicate(!UseAPX);
13608 match(Set dst (AndL dst src));
13609 effect(KILL cr);
13610 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13611
13612 format %{ "andq $dst, $src\t# long" %}
13613 ins_encode %{
13614 __ andq($dst$$Register, $src$$Register);
13615 %}
13616 ins_pipe(ialu_reg_reg);
13617 %}
13618
13619 // And Register with Register using New Data Destination (NDD)
13620 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13621 %{
13622 predicate(UseAPX);
13623 match(Set dst (AndL src1 src2));
13624 effect(KILL cr);
13625 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13626
13627 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13628 ins_encode %{
13629 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13630
13631 %}
13632 ins_pipe(ialu_reg_reg);
13633 %}
13634
13635 // And Register with Immediate 255
13636 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13637 %{
13638 match(Set dst (AndL src mask));
13639
13640 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13641 ins_encode %{
13642 // movzbl zeroes out the upper 32-bit and does not need REX.W
13643 __ movzbl($dst$$Register, $src$$Register);
13644 %}
13645 ins_pipe(ialu_reg);
13646 %}
13647
13648 // And Register with Immediate 65535
13649 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13650 %{
13651 match(Set dst (AndL src mask));
13652
13653 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13654 ins_encode %{
13655 // movzwl zeroes out the upper 32-bit and does not need REX.W
13656 __ movzwl($dst$$Register, $src$$Register);
13657 %}
13658 ins_pipe(ialu_reg);
13659 %}
13660
13661 // And Register with Immediate
13662 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13663 %{
13664 predicate(!UseAPX);
13665 match(Set dst (AndL dst src));
13666 effect(KILL cr);
13667 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668
13669 format %{ "andq $dst, $src\t# long" %}
13670 ins_encode %{
13671 __ andq($dst$$Register, $src$$constant);
13672 %}
13673 ins_pipe(ialu_reg);
13674 %}
13675
13676 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13677 %{
13678 predicate(UseAPX);
13679 match(Set dst (AndL src1 src2));
13680 effect(KILL cr);
13681 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13682
13683 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13684 ins_encode %{
13685 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13686 %}
13687 ins_pipe(ialu_reg);
13688 %}
13689
13690 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13691 %{
13692 predicate(UseAPX);
13693 match(Set dst (AndL (LoadL src1) src2));
13694 effect(KILL cr);
13695 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13696
13697 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13698 ins_encode %{
13699 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13700 %}
13701 ins_pipe(ialu_reg);
13702 %}
13703
13704 // And Register with Memory
13705 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13706 %{
13707 predicate(!UseAPX);
13708 match(Set dst (AndL dst (LoadL src)));
13709 effect(KILL cr);
13710 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13711
13712 ins_cost(150);
13713 format %{ "andq $dst, $src\t# long" %}
13714 ins_encode %{
13715 __ andq($dst$$Register, $src$$Address);
13716 %}
13717 ins_pipe(ialu_reg_mem);
13718 %}
13719
13720 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13721 %{
13722 predicate(UseAPX);
13723 match(Set dst (AndL src1 (LoadL src2)));
13724 effect(KILL cr);
13725 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13726
13727 ins_cost(150);
13728 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13729 ins_encode %{
13730 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13731 %}
13732 ins_pipe(ialu_reg_mem);
13733 %}
13734
13735 // And Memory with Register
13736 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13737 %{
13738 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13739 effect(KILL cr);
13740 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13741
13742 ins_cost(150);
13743 format %{ "andq $dst, $src\t# long" %}
13744 ins_encode %{
13745 __ andq($dst$$Address, $src$$Register);
13746 %}
13747 ins_pipe(ialu_mem_reg);
13748 %}
13749
13750 // And Memory with Immediate
13751 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13752 %{
13753 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13754 effect(KILL cr);
13755 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13756
13757 ins_cost(125);
13758 format %{ "andq $dst, $src\t# long" %}
13759 ins_encode %{
13760 __ andq($dst$$Address, $src$$constant);
13761 %}
13762 ins_pipe(ialu_mem_imm);
13763 %}
13764
13765 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13766 %{
13767 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13768 // because AND/OR works well enough for 8/32-bit values.
13769 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13770
13771 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13772 effect(KILL cr);
13773
13774 ins_cost(125);
13775 format %{ "btrq $dst, log2(not($con))\t# long" %}
13776 ins_encode %{
13777 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13778 %}
13779 ins_pipe(ialu_mem_imm);
13780 %}
13781
13782 // BMI1 instructions
13783 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13784 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13785 predicate(UseBMI1Instructions);
13786 effect(KILL cr);
13787 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13788
13789 ins_cost(125);
13790 format %{ "andnq $dst, $src1, $src2" %}
13791
13792 ins_encode %{
13793 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13794 %}
13795 ins_pipe(ialu_reg_mem);
13796 %}
13797
13798 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13799 match(Set dst (AndL (XorL src1 minus_1) src2));
13800 predicate(UseBMI1Instructions);
13801 effect(KILL cr);
13802 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13803
13804 format %{ "andnq $dst, $src1, $src2" %}
13805
13806 ins_encode %{
13807 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13808 %}
13809 ins_pipe(ialu_reg_mem);
13810 %}
13811
13812 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13813 match(Set dst (AndL (SubL imm_zero src) src));
13814 predicate(UseBMI1Instructions);
13815 effect(KILL cr);
13816 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13817
13818 format %{ "blsiq $dst, $src" %}
13819
13820 ins_encode %{
13821 __ blsiq($dst$$Register, $src$$Register);
13822 %}
13823 ins_pipe(ialu_reg);
13824 %}
13825
13826 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13827 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13828 predicate(UseBMI1Instructions);
13829 effect(KILL cr);
13830 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13831
13832 ins_cost(125);
13833 format %{ "blsiq $dst, $src" %}
13834
13835 ins_encode %{
13836 __ blsiq($dst$$Register, $src$$Address);
13837 %}
13838 ins_pipe(ialu_reg_mem);
13839 %}
13840
13841 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13842 %{
13843 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13844 predicate(UseBMI1Instructions);
13845 effect(KILL cr);
13846 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13847
13848 ins_cost(125);
13849 format %{ "blsmskq $dst, $src" %}
13850
13851 ins_encode %{
13852 __ blsmskq($dst$$Register, $src$$Address);
13853 %}
13854 ins_pipe(ialu_reg_mem);
13855 %}
13856
13857 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13858 %{
13859 match(Set dst (XorL (AddL src minus_1) src));
13860 predicate(UseBMI1Instructions);
13861 effect(KILL cr);
13862 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13863
13864 format %{ "blsmskq $dst, $src" %}
13865
13866 ins_encode %{
13867 __ blsmskq($dst$$Register, $src$$Register);
13868 %}
13869
13870 ins_pipe(ialu_reg);
13871 %}
13872
13873 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13874 %{
13875 match(Set dst (AndL (AddL src minus_1) src) );
13876 predicate(UseBMI1Instructions);
13877 effect(KILL cr);
13878 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13879
13880 format %{ "blsrq $dst, $src" %}
13881
13882 ins_encode %{
13883 __ blsrq($dst$$Register, $src$$Register);
13884 %}
13885
13886 ins_pipe(ialu_reg);
13887 %}
13888
13889 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13890 %{
13891 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13892 predicate(UseBMI1Instructions);
13893 effect(KILL cr);
13894 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13895
13896 ins_cost(125);
13897 format %{ "blsrq $dst, $src" %}
13898
13899 ins_encode %{
13900 __ blsrq($dst$$Register, $src$$Address);
13901 %}
13902
13903 ins_pipe(ialu_reg);
13904 %}
13905
13906 // Or Instructions
13907 // Or Register with Register
13908 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13909 %{
13910 predicate(!UseAPX);
13911 match(Set dst (OrL dst src));
13912 effect(KILL cr);
13913 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13914
13915 format %{ "orq $dst, $src\t# long" %}
13916 ins_encode %{
13917 __ orq($dst$$Register, $src$$Register);
13918 %}
13919 ins_pipe(ialu_reg_reg);
13920 %}
13921
13922 // Or Register with Register using New Data Destination (NDD)
13923 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13924 %{
13925 predicate(UseAPX);
13926 match(Set dst (OrL src1 src2));
13927 effect(KILL cr);
13928 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13929
13930 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13931 ins_encode %{
13932 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13933
13934 %}
13935 ins_pipe(ialu_reg_reg);
13936 %}
13937
13938 // Use any_RegP to match R15 (TLS register) without spilling.
13939 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13940 match(Set dst (OrL dst (CastP2X src)));
13941 effect(KILL cr);
13942 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13943
13944 format %{ "orq $dst, $src\t# long" %}
13945 ins_encode %{
13946 __ orq($dst$$Register, $src$$Register);
13947 %}
13948 ins_pipe(ialu_reg_reg);
13949 %}
13950
13951 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13952 match(Set dst (OrL src1 (CastP2X src2)));
13953 effect(KILL cr);
13954 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13955
13956 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13957 ins_encode %{
13958 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13959 %}
13960 ins_pipe(ialu_reg_reg);
13961 %}
13962
13963 // Or Register with Immediate
13964 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13965 %{
13966 predicate(!UseAPX);
13967 match(Set dst (OrL dst src));
13968 effect(KILL cr);
13969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970
13971 format %{ "orq $dst, $src\t# long" %}
13972 ins_encode %{
13973 __ orq($dst$$Register, $src$$constant);
13974 %}
13975 ins_pipe(ialu_reg);
13976 %}
13977
13978 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13979 %{
13980 predicate(UseAPX);
13981 match(Set dst (OrL src1 src2));
13982 effect(KILL cr);
13983 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13984
13985 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13986 ins_encode %{
13987 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13988 %}
13989 ins_pipe(ialu_reg);
13990 %}
13991
13992 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13993 %{
13994 predicate(UseAPX);
13995 match(Set dst (OrL src1 src2));
13996 effect(KILL cr);
13997 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13998
13999 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14000 ins_encode %{
14001 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14002 %}
14003 ins_pipe(ialu_reg);
14004 %}
14005
14006 // Or Memory with Immediate
14007 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14008 %{
14009 predicate(UseAPX);
14010 match(Set dst (OrL (LoadL src1) src2));
14011 effect(KILL cr);
14012 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14013
14014 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14015 ins_encode %{
14016 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14017 %}
14018 ins_pipe(ialu_reg);
14019 %}
14020
14021 // Or Register with Memory
14022 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14023 %{
14024 predicate(!UseAPX);
14025 match(Set dst (OrL dst (LoadL src)));
14026 effect(KILL cr);
14027 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14028
14029 ins_cost(150);
14030 format %{ "orq $dst, $src\t# long" %}
14031 ins_encode %{
14032 __ orq($dst$$Register, $src$$Address);
14033 %}
14034 ins_pipe(ialu_reg_mem);
14035 %}
14036
14037 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14038 %{
14039 predicate(UseAPX);
14040 match(Set dst (OrL src1 (LoadL src2)));
14041 effect(KILL cr);
14042 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14043
14044 ins_cost(150);
14045 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14046 ins_encode %{
14047 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14048 %}
14049 ins_pipe(ialu_reg_mem);
14050 %}
14051
14052 // Or Memory with Register
14053 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14054 %{
14055 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14056 effect(KILL cr);
14057 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14058
14059 ins_cost(150);
14060 format %{ "orq $dst, $src\t# long" %}
14061 ins_encode %{
14062 __ orq($dst$$Address, $src$$Register);
14063 %}
14064 ins_pipe(ialu_mem_reg);
14065 %}
14066
14067 // Or Memory with Immediate
14068 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14069 %{
14070 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14071 effect(KILL cr);
14072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073
14074 ins_cost(125);
14075 format %{ "orq $dst, $src\t# long" %}
14076 ins_encode %{
14077 __ orq($dst$$Address, $src$$constant);
14078 %}
14079 ins_pipe(ialu_mem_imm);
14080 %}
14081
14082 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14083 %{
14084 // con should be a pure 64-bit power of 2 immediate
14085 // because AND/OR works well enough for 8/32-bit values.
14086 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14087
14088 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14089 effect(KILL cr);
14090
14091 ins_cost(125);
14092 format %{ "btsq $dst, log2($con)\t# long" %}
14093 ins_encode %{
14094 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14095 %}
14096 ins_pipe(ialu_mem_imm);
14097 %}
14098
14099 // Xor Instructions
14100 // Xor Register with Register
14101 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14102 %{
14103 predicate(!UseAPX);
14104 match(Set dst (XorL dst src));
14105 effect(KILL cr);
14106 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14107
14108 format %{ "xorq $dst, $src\t# long" %}
14109 ins_encode %{
14110 __ xorq($dst$$Register, $src$$Register);
14111 %}
14112 ins_pipe(ialu_reg_reg);
14113 %}
14114
14115 // Xor Register with Register using New Data Destination (NDD)
14116 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14117 %{
14118 predicate(UseAPX);
14119 match(Set dst (XorL src1 src2));
14120 effect(KILL cr);
14121 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14122
14123 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14124 ins_encode %{
14125 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14126 %}
14127 ins_pipe(ialu_reg_reg);
14128 %}
14129
14130 // Xor Register with Immediate -1
14131 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14132 %{
14133 predicate(!UseAPX);
14134 match(Set dst (XorL dst imm));
14135
14136 format %{ "notq $dst" %}
14137 ins_encode %{
14138 __ notq($dst$$Register);
14139 %}
14140 ins_pipe(ialu_reg);
14141 %}
14142
14143 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14144 %{
14145 predicate(UseAPX);
14146 match(Set dst (XorL src imm));
14147 flag(PD::Flag_ndd_demotable_opr1);
14148
14149 format %{ "enotq $dst, $src" %}
14150 ins_encode %{
14151 __ enotq($dst$$Register, $src$$Register);
14152 %}
14153 ins_pipe(ialu_reg);
14154 %}
14155
14156 // Xor Register with Immediate
14157 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14158 %{
14159 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14160 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14161 match(Set dst (XorL dst src));
14162 effect(KILL cr);
14163 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14164
14165 format %{ "xorq $dst, $src\t# long" %}
14166 ins_encode %{
14167 __ xorq($dst$$Register, $src$$constant);
14168 %}
14169 ins_pipe(ialu_reg);
14170 %}
14171
14172 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14173 %{
14174 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14175 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14176 match(Set dst (XorL src1 src2));
14177 effect(KILL cr);
14178 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14179
14180 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14181 ins_encode %{
14182 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14183 %}
14184 ins_pipe(ialu_reg);
14185 %}
14186
14187 // Xor Memory with Immediate
14188 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14189 %{
14190 predicate(UseAPX);
14191 match(Set dst (XorL (LoadL src1) src2));
14192 effect(KILL cr);
14193 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14194 ins_cost(150);
14195
14196 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14197 ins_encode %{
14198 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14199 %}
14200 ins_pipe(ialu_reg);
14201 %}
14202
14203 // Xor Register with Memory
14204 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14205 %{
14206 predicate(!UseAPX);
14207 match(Set dst (XorL dst (LoadL src)));
14208 effect(KILL cr);
14209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14210
14211 ins_cost(150);
14212 format %{ "xorq $dst, $src\t# long" %}
14213 ins_encode %{
14214 __ xorq($dst$$Register, $src$$Address);
14215 %}
14216 ins_pipe(ialu_reg_mem);
14217 %}
14218
14219 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14220 %{
14221 predicate(UseAPX);
14222 match(Set dst (XorL src1 (LoadL src2)));
14223 effect(KILL cr);
14224 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14225
14226 ins_cost(150);
14227 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14228 ins_encode %{
14229 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14230 %}
14231 ins_pipe(ialu_reg_mem);
14232 %}
14233
14234 // Xor Memory with Register
14235 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14236 %{
14237 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14238 effect(KILL cr);
14239 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14240
14241 ins_cost(150);
14242 format %{ "xorq $dst, $src\t# long" %}
14243 ins_encode %{
14244 __ xorq($dst$$Address, $src$$Register);
14245 %}
14246 ins_pipe(ialu_mem_reg);
14247 %}
14248
14249 // Xor Memory with Immediate
14250 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14251 %{
14252 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14253 effect(KILL cr);
14254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14255
14256 ins_cost(125);
14257 format %{ "xorq $dst, $src\t# long" %}
14258 ins_encode %{
14259 __ xorq($dst$$Address, $src$$constant);
14260 %}
14261 ins_pipe(ialu_mem_imm);
14262 %}
14263
14264 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14265 %{
14266 match(Set dst (CmpLTMask p q));
14267 effect(KILL cr);
14268
14269 ins_cost(400);
14270 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14271 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14272 "negl $dst" %}
14273 ins_encode %{
14274 __ cmpl($p$$Register, $q$$Register);
14275 __ setcc(Assembler::less, $dst$$Register);
14276 __ negl($dst$$Register);
14277 %}
14278 ins_pipe(pipe_slow);
14279 %}
14280
14281 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14282 %{
14283 match(Set dst (CmpLTMask dst zero));
14284 effect(KILL cr);
14285
14286 ins_cost(100);
14287 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14288 ins_encode %{
14289 __ sarl($dst$$Register, 31);
14290 %}
14291 ins_pipe(ialu_reg);
14292 %}
14293
14294 /* Better to save a register than avoid a branch */
14295 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14296 %{
14297 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14298 effect(KILL cr);
14299 ins_cost(300);
14300 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14301 "jge done\n\t"
14302 "addl $p,$y\n"
14303 "done: " %}
14304 ins_encode %{
14305 Register Rp = $p$$Register;
14306 Register Rq = $q$$Register;
14307 Register Ry = $y$$Register;
14308 Label done;
14309 __ subl(Rp, Rq);
14310 __ jccb(Assembler::greaterEqual, done);
14311 __ addl(Rp, Ry);
14312 __ bind(done);
14313 %}
14314 ins_pipe(pipe_cmplt);
14315 %}
14316
14317 /* Better to save a register than avoid a branch */
14318 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14319 %{
14320 match(Set y (AndI (CmpLTMask p q) y));
14321 effect(KILL cr);
14322
14323 ins_cost(300);
14324
14325 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14326 "jlt done\n\t"
14327 "xorl $y, $y\n"
14328 "done: " %}
14329 ins_encode %{
14330 Register Rp = $p$$Register;
14331 Register Rq = $q$$Register;
14332 Register Ry = $y$$Register;
14333 Label done;
14334 __ cmpl(Rp, Rq);
14335 __ jccb(Assembler::less, done);
14336 __ xorl(Ry, Ry);
14337 __ bind(done);
14338 %}
14339 ins_pipe(pipe_cmplt);
14340 %}
14341
14342
14343 //---------- FP Instructions------------------------------------------------
14344
14345 // Really expensive, avoid
14346 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14347 %{
14348 match(Set cr (CmpF src1 src2));
14349
14350 ins_cost(500);
14351 format %{ "ucomiss $src1, $src2\n\t"
14352 "jnp,s exit\n\t"
14353 "pushfq\t# saw NaN, set CF\n\t"
14354 "andq [rsp], #0xffffff2b\n\t"
14355 "popfq\n"
14356 "exit:" %}
14357 ins_encode %{
14358 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14359 emit_cmpfp_fixup(masm);
14360 %}
14361 ins_pipe(pipe_slow);
14362 %}
14363
14364 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14365 match(Set cr (CmpF src1 src2));
14366
14367 ins_cost(100);
14368 format %{ "ucomiss $src1, $src2" %}
14369 ins_encode %{
14370 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14371 %}
14372 ins_pipe(pipe_slow);
14373 %}
14374
14375 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14376 match(Set cr (CmpF src1 src2));
14377
14378 ins_cost(100);
14379 format %{ "vucomxss $src1, $src2" %}
14380 ins_encode %{
14381 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14382 %}
14383 ins_pipe(pipe_slow);
14384 %}
14385
14386 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14387 match(Set cr (CmpF src1 (LoadF src2)));
14388
14389 ins_cost(100);
14390 format %{ "ucomiss $src1, $src2" %}
14391 ins_encode %{
14392 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14393 %}
14394 ins_pipe(pipe_slow);
14395 %}
14396
14397 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14398 match(Set cr (CmpF src1 (LoadF src2)));
14399
14400 ins_cost(100);
14401 format %{ "vucomxss $src1, $src2" %}
14402 ins_encode %{
14403 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14404 %}
14405 ins_pipe(pipe_slow);
14406 %}
14407
14408 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14409 match(Set cr (CmpF src con));
14410
14411 ins_cost(100);
14412 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14413 ins_encode %{
14414 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14415 %}
14416 ins_pipe(pipe_slow);
14417 %}
14418
14419 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14420 match(Set cr (CmpF src con));
14421
14422 ins_cost(100);
14423 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14424 ins_encode %{
14425 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14426 %}
14427 ins_pipe(pipe_slow);
14428 %}
14429
14430 // Really expensive, avoid
14431 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14432 %{
14433 match(Set cr (CmpD src1 src2));
14434
14435 ins_cost(500);
14436 format %{ "ucomisd $src1, $src2\n\t"
14437 "jnp,s exit\n\t"
14438 "pushfq\t# saw NaN, set CF\n\t"
14439 "andq [rsp], #0xffffff2b\n\t"
14440 "popfq\n"
14441 "exit:" %}
14442 ins_encode %{
14443 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14444 emit_cmpfp_fixup(masm);
14445 %}
14446 ins_pipe(pipe_slow);
14447 %}
14448
14449 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14450 match(Set cr (CmpD src1 src2));
14451
14452 ins_cost(100);
14453 format %{ "ucomisd $src1, $src2 test" %}
14454 ins_encode %{
14455 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14456 %}
14457 ins_pipe(pipe_slow);
14458 %}
14459
14460 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14461 match(Set cr (CmpD src1 src2));
14462
14463 ins_cost(100);
14464 format %{ "vucomxsd $src1, $src2 test" %}
14465 ins_encode %{
14466 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14467 %}
14468 ins_pipe(pipe_slow);
14469 %}
14470
14471 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14472 match(Set cr (CmpD src1 (LoadD src2)));
14473
14474 ins_cost(100);
14475 format %{ "ucomisd $src1, $src2" %}
14476 ins_encode %{
14477 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14478 %}
14479 ins_pipe(pipe_slow);
14480 %}
14481
14482 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14483 match(Set cr (CmpD src1 (LoadD src2)));
14484
14485 ins_cost(100);
14486 format %{ "vucomxsd $src1, $src2" %}
14487 ins_encode %{
14488 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14489 %}
14490 ins_pipe(pipe_slow);
14491 %}
14492
14493 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14494 match(Set cr (CmpD src con));
14495 ins_cost(100);
14496 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14497 ins_encode %{
14498 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14499 %}
14500 ins_pipe(pipe_slow);
14501 %}
14502
14503 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14504 match(Set cr (CmpD src con));
14505
14506 ins_cost(100);
14507 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14508 ins_encode %{
14509 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14510 %}
14511 ins_pipe(pipe_slow);
14512 %}
14513
14514 // Compare into -1,0,1
14515 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14516 %{
14517 match(Set dst (CmpF3 src1 src2));
14518 effect(KILL cr);
14519
14520 ins_cost(275);
14521 format %{ "ucomiss $src1, $src2\n\t"
14522 "movl $dst, #-1\n\t"
14523 "jp,s done\n\t"
14524 "jb,s done\n\t"
14525 "setne $dst\n\t"
14526 "movzbl $dst, $dst\n"
14527 "done:" %}
14528 ins_encode %{
14529 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14530 emit_cmpfp3(masm, $dst$$Register);
14531 %}
14532 ins_pipe(pipe_slow);
14533 %}
14534
14535 // Compare into -1,0,1
14536 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14537 %{
14538 match(Set dst (CmpF3 src1 (LoadF src2)));
14539 effect(KILL cr);
14540
14541 ins_cost(275);
14542 format %{ "ucomiss $src1, $src2\n\t"
14543 "movl $dst, #-1\n\t"
14544 "jp,s done\n\t"
14545 "jb,s done\n\t"
14546 "setne $dst\n\t"
14547 "movzbl $dst, $dst\n"
14548 "done:" %}
14549 ins_encode %{
14550 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14551 emit_cmpfp3(masm, $dst$$Register);
14552 %}
14553 ins_pipe(pipe_slow);
14554 %}
14555
14556 // Compare into -1,0,1
14557 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14558 match(Set dst (CmpF3 src con));
14559 effect(KILL cr);
14560
14561 ins_cost(275);
14562 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14563 "movl $dst, #-1\n\t"
14564 "jp,s done\n\t"
14565 "jb,s done\n\t"
14566 "setne $dst\n\t"
14567 "movzbl $dst, $dst\n"
14568 "done:" %}
14569 ins_encode %{
14570 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14571 emit_cmpfp3(masm, $dst$$Register);
14572 %}
14573 ins_pipe(pipe_slow);
14574 %}
14575
14576 // Compare into -1,0,1
14577 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14578 %{
14579 match(Set dst (CmpD3 src1 src2));
14580 effect(KILL cr);
14581
14582 ins_cost(275);
14583 format %{ "ucomisd $src1, $src2\n\t"
14584 "movl $dst, #-1\n\t"
14585 "jp,s done\n\t"
14586 "jb,s done\n\t"
14587 "setne $dst\n\t"
14588 "movzbl $dst, $dst\n"
14589 "done:" %}
14590 ins_encode %{
14591 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14592 emit_cmpfp3(masm, $dst$$Register);
14593 %}
14594 ins_pipe(pipe_slow);
14595 %}
14596
14597 // Compare into -1,0,1
14598 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14599 %{
14600 match(Set dst (CmpD3 src1 (LoadD src2)));
14601 effect(KILL cr);
14602
14603 ins_cost(275);
14604 format %{ "ucomisd $src1, $src2\n\t"
14605 "movl $dst, #-1\n\t"
14606 "jp,s done\n\t"
14607 "jb,s done\n\t"
14608 "setne $dst\n\t"
14609 "movzbl $dst, $dst\n"
14610 "done:" %}
14611 ins_encode %{
14612 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14613 emit_cmpfp3(masm, $dst$$Register);
14614 %}
14615 ins_pipe(pipe_slow);
14616 %}
14617
14618 // Compare into -1,0,1
14619 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14620 match(Set dst (CmpD3 src con));
14621 effect(KILL cr);
14622
14623 ins_cost(275);
14624 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14625 "movl $dst, #-1\n\t"
14626 "jp,s done\n\t"
14627 "jb,s done\n\t"
14628 "setne $dst\n\t"
14629 "movzbl $dst, $dst\n"
14630 "done:" %}
14631 ins_encode %{
14632 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14633 emit_cmpfp3(masm, $dst$$Register);
14634 %}
14635 ins_pipe(pipe_slow);
14636 %}
14637
14638 //----------Arithmetic Conversion Instructions---------------------------------
14639
14640 instruct convF2D_reg_reg(regD dst, regF src)
14641 %{
14642 match(Set dst (ConvF2D src));
14643
14644 format %{ "cvtss2sd $dst, $src" %}
14645 ins_encode %{
14646 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14647 %}
14648 ins_pipe(pipe_slow); // XXX
14649 %}
14650
14651 instruct convF2D_reg_mem(regD dst, memory src)
14652 %{
14653 predicate(UseAVX == 0);
14654 match(Set dst (ConvF2D (LoadF src)));
14655
14656 format %{ "cvtss2sd $dst, $src" %}
14657 ins_encode %{
14658 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14659 %}
14660 ins_pipe(pipe_slow); // XXX
14661 %}
14662
14663 instruct convD2F_reg_reg(regF dst, regD src)
14664 %{
14665 match(Set dst (ConvD2F src));
14666
14667 format %{ "cvtsd2ss $dst, $src" %}
14668 ins_encode %{
14669 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14670 %}
14671 ins_pipe(pipe_slow); // XXX
14672 %}
14673
14674 instruct convD2F_reg_mem(regF dst, memory src)
14675 %{
14676 predicate(UseAVX == 0);
14677 match(Set dst (ConvD2F (LoadD src)));
14678
14679 format %{ "cvtsd2ss $dst, $src" %}
14680 ins_encode %{
14681 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14682 %}
14683 ins_pipe(pipe_slow); // XXX
14684 %}
14685
14686 // XXX do mem variants
14687 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14688 %{
14689 predicate(!VM_Version::supports_avx10_2());
14690 match(Set dst (ConvF2I src));
14691 effect(KILL cr);
14692 format %{ "convert_f2i $dst, $src" %}
14693 ins_encode %{
14694 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14695 %}
14696 ins_pipe(pipe_slow);
14697 %}
14698
14699 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14700 %{
14701 predicate(VM_Version::supports_avx10_2());
14702 match(Set dst (ConvF2I src));
14703 format %{ "evcvttss2sisl $dst, $src" %}
14704 ins_encode %{
14705 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14706 %}
14707 ins_pipe(pipe_slow);
14708 %}
14709
14710 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14711 %{
14712 predicate(VM_Version::supports_avx10_2());
14713 match(Set dst (ConvF2I (LoadF src)));
14714 format %{ "evcvttss2sisl $dst, $src" %}
14715 ins_encode %{
14716 __ evcvttss2sisl($dst$$Register, $src$$Address);
14717 %}
14718 ins_pipe(pipe_slow);
14719 %}
14720
14721 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14722 %{
14723 predicate(!VM_Version::supports_avx10_2());
14724 match(Set dst (ConvF2L src));
14725 effect(KILL cr);
14726 format %{ "convert_f2l $dst, $src"%}
14727 ins_encode %{
14728 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14729 %}
14730 ins_pipe(pipe_slow);
14731 %}
14732
14733 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14734 %{
14735 predicate(VM_Version::supports_avx10_2());
14736 match(Set dst (ConvF2L src));
14737 format %{ "evcvttss2sisq $dst, $src" %}
14738 ins_encode %{
14739 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14740 %}
14741 ins_pipe(pipe_slow);
14742 %}
14743
14744 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14745 %{
14746 predicate(VM_Version::supports_avx10_2());
14747 match(Set dst (ConvF2L (LoadF src)));
14748 format %{ "evcvttss2sisq $dst, $src" %}
14749 ins_encode %{
14750 __ evcvttss2sisq($dst$$Register, $src$$Address);
14751 %}
14752 ins_pipe(pipe_slow);
14753 %}
14754
14755 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14756 %{
14757 predicate(!VM_Version::supports_avx10_2());
14758 match(Set dst (ConvD2I src));
14759 effect(KILL cr);
14760 format %{ "convert_d2i $dst, $src"%}
14761 ins_encode %{
14762 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14763 %}
14764 ins_pipe(pipe_slow);
14765 %}
14766
14767 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14768 %{
14769 predicate(VM_Version::supports_avx10_2());
14770 match(Set dst (ConvD2I src));
14771 format %{ "evcvttsd2sisl $dst, $src" %}
14772 ins_encode %{
14773 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14774 %}
14775 ins_pipe(pipe_slow);
14776 %}
14777
14778 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14779 %{
14780 predicate(VM_Version::supports_avx10_2());
14781 match(Set dst (ConvD2I (LoadD src)));
14782 format %{ "evcvttsd2sisl $dst, $src" %}
14783 ins_encode %{
14784 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14785 %}
14786 ins_pipe(pipe_slow);
14787 %}
14788
14789 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14790 %{
14791 predicate(!VM_Version::supports_avx10_2());
14792 match(Set dst (ConvD2L src));
14793 effect(KILL cr);
14794 format %{ "convert_d2l $dst, $src"%}
14795 ins_encode %{
14796 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14797 %}
14798 ins_pipe(pipe_slow);
14799 %}
14800
14801 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14802 %{
14803 predicate(VM_Version::supports_avx10_2());
14804 match(Set dst (ConvD2L src));
14805 format %{ "evcvttsd2sisq $dst, $src" %}
14806 ins_encode %{
14807 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14808 %}
14809 ins_pipe(pipe_slow);
14810 %}
14811
14812 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14813 %{
14814 predicate(VM_Version::supports_avx10_2());
14815 match(Set dst (ConvD2L (LoadD src)));
14816 format %{ "evcvttsd2sisq $dst, $src" %}
14817 ins_encode %{
14818 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14819 %}
14820 ins_pipe(pipe_slow);
14821 %}
14822
14823 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14824 %{
14825 match(Set dst (RoundD src));
14826 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14827 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14828 ins_encode %{
14829 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14830 %}
14831 ins_pipe(pipe_slow);
14832 %}
14833
14834 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14835 %{
14836 match(Set dst (RoundF src));
14837 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14838 format %{ "round_float $dst,$src" %}
14839 ins_encode %{
14840 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14841 %}
14842 ins_pipe(pipe_slow);
14843 %}
14844
14845 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14846 %{
14847 predicate(!UseXmmI2F);
14848 match(Set dst (ConvI2F src));
14849
14850 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14851 ins_encode %{
14852 if (UseAVX > 0) {
14853 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14854 }
14855 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14856 %}
14857 ins_pipe(pipe_slow); // XXX
14858 %}
14859
14860 instruct convI2F_reg_mem(regF dst, memory src)
14861 %{
14862 predicate(UseAVX == 0);
14863 match(Set dst (ConvI2F (LoadI src)));
14864
14865 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14866 ins_encode %{
14867 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14868 %}
14869 ins_pipe(pipe_slow); // XXX
14870 %}
14871
14872 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14873 %{
14874 predicate(!UseXmmI2D);
14875 match(Set dst (ConvI2D src));
14876
14877 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14878 ins_encode %{
14879 if (UseAVX > 0) {
14880 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14881 }
14882 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14883 %}
14884 ins_pipe(pipe_slow); // XXX
14885 %}
14886
14887 instruct convI2D_reg_mem(regD dst, memory src)
14888 %{
14889 predicate(UseAVX == 0);
14890 match(Set dst (ConvI2D (LoadI src)));
14891
14892 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14893 ins_encode %{
14894 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14895 %}
14896 ins_pipe(pipe_slow); // XXX
14897 %}
14898
14899 instruct convXI2F_reg(regF dst, rRegI src)
14900 %{
14901 predicate(UseXmmI2F);
14902 match(Set dst (ConvI2F src));
14903
14904 format %{ "movdl $dst, $src\n\t"
14905 "cvtdq2psl $dst, $dst\t# i2f" %}
14906 ins_encode %{
14907 __ movdl($dst$$XMMRegister, $src$$Register);
14908 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14909 %}
14910 ins_pipe(pipe_slow); // XXX
14911 %}
14912
14913 instruct convXI2D_reg(regD dst, rRegI src)
14914 %{
14915 predicate(UseXmmI2D);
14916 match(Set dst (ConvI2D src));
14917
14918 format %{ "movdl $dst, $src\n\t"
14919 "cvtdq2pdl $dst, $dst\t# i2d" %}
14920 ins_encode %{
14921 __ movdl($dst$$XMMRegister, $src$$Register);
14922 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14923 %}
14924 ins_pipe(pipe_slow); // XXX
14925 %}
14926
14927 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14928 %{
14929 match(Set dst (ConvL2F src));
14930
14931 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14932 ins_encode %{
14933 if (UseAVX > 0) {
14934 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14935 }
14936 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14937 %}
14938 ins_pipe(pipe_slow); // XXX
14939 %}
14940
14941 instruct convL2F_reg_mem(regF dst, memory src)
14942 %{
14943 predicate(UseAVX == 0);
14944 match(Set dst (ConvL2F (LoadL src)));
14945
14946 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14947 ins_encode %{
14948 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14949 %}
14950 ins_pipe(pipe_slow); // XXX
14951 %}
14952
14953 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14954 %{
14955 match(Set dst (ConvL2D src));
14956
14957 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14958 ins_encode %{
14959 if (UseAVX > 0) {
14960 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14961 }
14962 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14963 %}
14964 ins_pipe(pipe_slow); // XXX
14965 %}
14966
14967 instruct convL2D_reg_mem(regD dst, memory src)
14968 %{
14969 predicate(UseAVX == 0);
14970 match(Set dst (ConvL2D (LoadL src)));
14971
14972 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14973 ins_encode %{
14974 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14975 %}
14976 ins_pipe(pipe_slow); // XXX
14977 %}
14978
14979 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14980 %{
14981 match(Set dst (ConvI2L src));
14982
14983 ins_cost(125);
14984 format %{ "movslq $dst, $src\t# i2l" %}
14985 ins_encode %{
14986 __ movslq($dst$$Register, $src$$Register);
14987 %}
14988 ins_pipe(ialu_reg_reg);
14989 %}
14990
14991 // Zero-extend convert int to long
14992 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14993 %{
14994 match(Set dst (AndL (ConvI2L src) mask));
14995
14996 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14997 ins_encode %{
14998 if ($dst$$reg != $src$$reg) {
14999 __ movl($dst$$Register, $src$$Register);
15000 }
15001 %}
15002 ins_pipe(ialu_reg_reg);
15003 %}
15004
15005 // Zero-extend convert int to long
15006 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15007 %{
15008 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15009
15010 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15011 ins_encode %{
15012 __ movl($dst$$Register, $src$$Address);
15013 %}
15014 ins_pipe(ialu_reg_mem);
15015 %}
15016
15017 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15018 %{
15019 match(Set dst (AndL src mask));
15020
15021 format %{ "movl $dst, $src\t# zero-extend long" %}
15022 ins_encode %{
15023 __ movl($dst$$Register, $src$$Register);
15024 %}
15025 ins_pipe(ialu_reg_reg);
15026 %}
15027
15028 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15029 %{
15030 match(Set dst (ConvL2I src));
15031
15032 format %{ "movl $dst, $src\t# l2i" %}
15033 ins_encode %{
15034 __ movl($dst$$Register, $src$$Register);
15035 %}
15036 ins_pipe(ialu_reg_reg);
15037 %}
15038
15039
15040 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15041 match(Set dst (MoveF2I src));
15042 effect(DEF dst, USE src);
15043
15044 ins_cost(125);
15045 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15046 ins_encode %{
15047 __ movl($dst$$Register, Address(rsp, $src$$disp));
15048 %}
15049 ins_pipe(ialu_reg_mem);
15050 %}
15051
15052 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15053 match(Set dst (MoveI2F src));
15054 effect(DEF dst, USE src);
15055
15056 ins_cost(125);
15057 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15058 ins_encode %{
15059 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15060 %}
15061 ins_pipe(pipe_slow);
15062 %}
15063
15064 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15065 match(Set dst (MoveD2L src));
15066 effect(DEF dst, USE src);
15067
15068 ins_cost(125);
15069 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15070 ins_encode %{
15071 __ movq($dst$$Register, Address(rsp, $src$$disp));
15072 %}
15073 ins_pipe(ialu_reg_mem);
15074 %}
15075
15076 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15077 predicate(!UseXmmLoadAndClearUpper);
15078 match(Set dst (MoveL2D src));
15079 effect(DEF dst, USE src);
15080
15081 ins_cost(125);
15082 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15083 ins_encode %{
15084 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15085 %}
15086 ins_pipe(pipe_slow);
15087 %}
15088
15089 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15090 predicate(UseXmmLoadAndClearUpper);
15091 match(Set dst (MoveL2D src));
15092 effect(DEF dst, USE src);
15093
15094 ins_cost(125);
15095 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15096 ins_encode %{
15097 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15098 %}
15099 ins_pipe(pipe_slow);
15100 %}
15101
15102
15103 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15104 match(Set dst (MoveF2I src));
15105 effect(DEF dst, USE src);
15106
15107 ins_cost(95); // XXX
15108 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15109 ins_encode %{
15110 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15111 %}
15112 ins_pipe(pipe_slow);
15113 %}
15114
15115 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15116 match(Set dst (MoveI2F src));
15117 effect(DEF dst, USE src);
15118
15119 ins_cost(100);
15120 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15121 ins_encode %{
15122 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15123 %}
15124 ins_pipe( ialu_mem_reg );
15125 %}
15126
15127 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15128 match(Set dst (MoveD2L src));
15129 effect(DEF dst, USE src);
15130
15131 ins_cost(95); // XXX
15132 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15133 ins_encode %{
15134 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15135 %}
15136 ins_pipe(pipe_slow);
15137 %}
15138
15139 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15140 match(Set dst (MoveL2D src));
15141 effect(DEF dst, USE src);
15142
15143 ins_cost(100);
15144 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15145 ins_encode %{
15146 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15147 %}
15148 ins_pipe(ialu_mem_reg);
15149 %}
15150
15151 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15152 match(Set dst (MoveF2I src));
15153 effect(DEF dst, USE src);
15154 ins_cost(85);
15155 format %{ "movd $dst,$src\t# MoveF2I" %}
15156 ins_encode %{
15157 __ movdl($dst$$Register, $src$$XMMRegister);
15158 %}
15159 ins_pipe( pipe_slow );
15160 %}
15161
15162 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15163 match(Set dst (MoveD2L src));
15164 effect(DEF dst, USE src);
15165 ins_cost(85);
15166 format %{ "movd $dst,$src\t# MoveD2L" %}
15167 ins_encode %{
15168 __ movdq($dst$$Register, $src$$XMMRegister);
15169 %}
15170 ins_pipe( pipe_slow );
15171 %}
15172
15173 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15174 match(Set dst (MoveI2F src));
15175 effect(DEF dst, USE src);
15176 ins_cost(100);
15177 format %{ "movd $dst,$src\t# MoveI2F" %}
15178 ins_encode %{
15179 __ movdl($dst$$XMMRegister, $src$$Register);
15180 %}
15181 ins_pipe( pipe_slow );
15182 %}
15183
15184 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15185 match(Set dst (MoveL2D src));
15186 effect(DEF dst, USE src);
15187 ins_cost(100);
15188 format %{ "movd $dst,$src\t# MoveL2D" %}
15189 ins_encode %{
15190 __ movdq($dst$$XMMRegister, $src$$Register);
15191 %}
15192 ins_pipe( pipe_slow );
15193 %}
15194
15195 // Fast clearing of an array
15196 // Small non-constant lenght ClearArray for non-AVX512 targets.
15197 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15198 Universe dummy, rFlagsReg cr)
15199 %{
15200 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15201 match(Set dummy (ClearArray cnt base));
15202 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15203
15204 format %{ $$template
15205 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15206 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15207 $$emit$$"jg LARGE\n\t"
15208 $$emit$$"dec rcx\n\t"
15209 $$emit$$"js DONE\t# Zero length\n\t"
15210 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15211 $$emit$$"dec rcx\n\t"
15212 $$emit$$"jge LOOP\n\t"
15213 $$emit$$"jmp DONE\n\t"
15214 $$emit$$"# LARGE:\n\t"
15215 if (UseFastStosb) {
15216 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15217 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218 } else if (UseXMMForObjInit) {
15219 $$emit$$"mov rdi,rax\n\t"
15220 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15221 $$emit$$"jmpq L_zero_64_bytes\n\t"
15222 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15223 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15224 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15225 $$emit$$"add 0x40,rax\n\t"
15226 $$emit$$"# L_zero_64_bytes:\n\t"
15227 $$emit$$"sub 0x8,rcx\n\t"
15228 $$emit$$"jge L_loop\n\t"
15229 $$emit$$"add 0x4,rcx\n\t"
15230 $$emit$$"jl L_tail\n\t"
15231 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232 $$emit$$"add 0x20,rax\n\t"
15233 $$emit$$"sub 0x4,rcx\n\t"
15234 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235 $$emit$$"add 0x4,rcx\n\t"
15236 $$emit$$"jle L_end\n\t"
15237 $$emit$$"dec rcx\n\t"
15238 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239 $$emit$$"vmovq xmm0,(rax)\n\t"
15240 $$emit$$"add 0x8,rax\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"jge L_sloop\n\t"
15243 $$emit$$"# L_end:\n\t"
15244 } else {
15245 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246 }
15247 $$emit$$"# DONE"
15248 %}
15249 ins_encode %{
15250 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15251 $tmp$$XMMRegister, false, knoreg);
15252 %}
15253 ins_pipe(pipe_slow);
15254 %}
15255
15256 // Small non-constant length ClearArray for AVX512 targets.
15257 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15258 Universe dummy, rFlagsReg cr)
15259 %{
15260 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15261 match(Set dummy (ClearArray cnt base));
15262 ins_cost(125);
15263 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15264
15265 format %{ $$template
15266 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15268 $$emit$$"jg LARGE\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"js DONE\t# Zero length\n\t"
15271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge LOOP\n\t"
15274 $$emit$$"jmp DONE\n\t"
15275 $$emit$$"# LARGE:\n\t"
15276 if (UseFastStosb) {
15277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279 } else if (UseXMMForObjInit) {
15280 $$emit$$"mov rdi,rax\n\t"
15281 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15282 $$emit$$"jmpq L_zero_64_bytes\n\t"
15283 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15284 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15286 $$emit$$"add 0x40,rax\n\t"
15287 $$emit$$"# L_zero_64_bytes:\n\t"
15288 $$emit$$"sub 0x8,rcx\n\t"
15289 $$emit$$"jge L_loop\n\t"
15290 $$emit$$"add 0x4,rcx\n\t"
15291 $$emit$$"jl L_tail\n\t"
15292 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15293 $$emit$$"add 0x20,rax\n\t"
15294 $$emit$$"sub 0x4,rcx\n\t"
15295 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15296 $$emit$$"add 0x4,rcx\n\t"
15297 $$emit$$"jle L_end\n\t"
15298 $$emit$$"dec rcx\n\t"
15299 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15300 $$emit$$"vmovq xmm0,(rax)\n\t"
15301 $$emit$$"add 0x8,rax\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"jge L_sloop\n\t"
15304 $$emit$$"# L_end:\n\t"
15305 } else {
15306 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15307 }
15308 $$emit$$"# DONE"
15309 %}
15310 ins_encode %{
15311 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15312 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15313 %}
15314 ins_pipe(pipe_slow);
15315 %}
15316
15317 // Large non-constant length ClearArray for non-AVX512 targets.
15318 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15319 Universe dummy, rFlagsReg cr)
15320 %{
15321 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15322 match(Set dummy (ClearArray cnt base));
15323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15324
15325 format %{ $$template
15326 if (UseFastStosb) {
15327 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15328 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15329 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15330 } else if (UseXMMForObjInit) {
15331 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15332 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15333 $$emit$$"jmpq L_zero_64_bytes\n\t"
15334 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15336 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15337 $$emit$$"add 0x40,rax\n\t"
15338 $$emit$$"# L_zero_64_bytes:\n\t"
15339 $$emit$$"sub 0x8,rcx\n\t"
15340 $$emit$$"jge L_loop\n\t"
15341 $$emit$$"add 0x4,rcx\n\t"
15342 $$emit$$"jl L_tail\n\t"
15343 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15344 $$emit$$"add 0x20,rax\n\t"
15345 $$emit$$"sub 0x4,rcx\n\t"
15346 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jle L_end\n\t"
15349 $$emit$$"dec rcx\n\t"
15350 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351 $$emit$$"vmovq xmm0,(rax)\n\t"
15352 $$emit$$"add 0x8,rax\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"jge L_sloop\n\t"
15355 $$emit$$"# L_end:\n\t"
15356 } else {
15357 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15358 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15359 }
15360 %}
15361 ins_encode %{
15362 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15363 $tmp$$XMMRegister, true, knoreg);
15364 %}
15365 ins_pipe(pipe_slow);
15366 %}
15367
15368 // Large non-constant length ClearArray for AVX512 targets.
15369 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15370 Universe dummy, rFlagsReg cr)
15371 %{
15372 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15373 match(Set dummy (ClearArray cnt base));
15374 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15375
15376 format %{ $$template
15377 if (UseFastStosb) {
15378 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15379 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15380 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15381 } else if (UseXMMForObjInit) {
15382 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15383 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15384 $$emit$$"jmpq L_zero_64_bytes\n\t"
15385 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15386 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15388 $$emit$$"add 0x40,rax\n\t"
15389 $$emit$$"# L_zero_64_bytes:\n\t"
15390 $$emit$$"sub 0x8,rcx\n\t"
15391 $$emit$$"jge L_loop\n\t"
15392 $$emit$$"add 0x4,rcx\n\t"
15393 $$emit$$"jl L_tail\n\t"
15394 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15395 $$emit$$"add 0x20,rax\n\t"
15396 $$emit$$"sub 0x4,rcx\n\t"
15397 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15398 $$emit$$"add 0x4,rcx\n\t"
15399 $$emit$$"jle L_end\n\t"
15400 $$emit$$"dec rcx\n\t"
15401 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15402 $$emit$$"vmovq xmm0,(rax)\n\t"
15403 $$emit$$"add 0x8,rax\n\t"
15404 $$emit$$"dec rcx\n\t"
15405 $$emit$$"jge L_sloop\n\t"
15406 $$emit$$"# L_end:\n\t"
15407 } else {
15408 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15409 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15410 }
15411 %}
15412 ins_encode %{
15413 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15414 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15415 %}
15416 ins_pipe(pipe_slow);
15417 %}
15418
15419 // Small constant length ClearArray for AVX512 targets.
15420 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15421 %{
15422 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15423 match(Set dummy (ClearArray cnt base));
15424 ins_cost(100);
15425 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15426 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15427 ins_encode %{
15428 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15429 %}
15430 ins_pipe(pipe_slow);
15431 %}
15432
15433 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15434 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15435 %{
15436 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15437 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15438 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15439
15440 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15441 ins_encode %{
15442 __ string_compare($str1$$Register, $str2$$Register,
15443 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15444 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15445 %}
15446 ins_pipe( pipe_slow );
15447 %}
15448
15449 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15450 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15451 %{
15452 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15453 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15454 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15455
15456 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15457 ins_encode %{
15458 __ string_compare($str1$$Register, $str2$$Register,
15459 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15460 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15461 %}
15462 ins_pipe( pipe_slow );
15463 %}
15464
15465 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15466 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15467 %{
15468 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15469 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15470 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15471
15472 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15473 ins_encode %{
15474 __ string_compare($str1$$Register, $str2$$Register,
15475 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15476 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15477 %}
15478 ins_pipe( pipe_slow );
15479 %}
15480
15481 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15482 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15483 %{
15484 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15485 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15486 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15487
15488 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15489 ins_encode %{
15490 __ string_compare($str1$$Register, $str2$$Register,
15491 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15492 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15493 %}
15494 ins_pipe( pipe_slow );
15495 %}
15496
15497 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15498 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15499 %{
15500 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15501 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15502 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15503
15504 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15505 ins_encode %{
15506 __ string_compare($str1$$Register, $str2$$Register,
15507 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15508 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15509 %}
15510 ins_pipe( pipe_slow );
15511 %}
15512
15513 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15514 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15515 %{
15516 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15517 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15518 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15519
15520 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15521 ins_encode %{
15522 __ string_compare($str1$$Register, $str2$$Register,
15523 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15524 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15525 %}
15526 ins_pipe( pipe_slow );
15527 %}
15528
15529 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15530 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15531 %{
15532 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15533 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15534 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15535
15536 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15537 ins_encode %{
15538 __ string_compare($str2$$Register, $str1$$Register,
15539 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15540 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15541 %}
15542 ins_pipe( pipe_slow );
15543 %}
15544
15545 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15546 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15547 %{
15548 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15549 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15550 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15551
15552 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15553 ins_encode %{
15554 __ string_compare($str2$$Register, $str1$$Register,
15555 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15556 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15557 %}
15558 ins_pipe( pipe_slow );
15559 %}
15560
15561 // fast search of substring with known size.
15562 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15563 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15564 %{
15565 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15566 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15567 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15568
15569 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15570 ins_encode %{
15571 int icnt2 = (int)$int_cnt2$$constant;
15572 if (icnt2 >= 16) {
15573 // IndexOf for constant substrings with size >= 16 elements
15574 // which don't need to be loaded through stack.
15575 __ string_indexofC8($str1$$Register, $str2$$Register,
15576 $cnt1$$Register, $cnt2$$Register,
15577 icnt2, $result$$Register,
15578 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15579 } else {
15580 // Small strings are loaded through stack if they cross page boundary.
15581 __ string_indexof($str1$$Register, $str2$$Register,
15582 $cnt1$$Register, $cnt2$$Register,
15583 icnt2, $result$$Register,
15584 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15585 }
15586 %}
15587 ins_pipe( pipe_slow );
15588 %}
15589
15590 // fast search of substring with known size.
15591 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15592 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15593 %{
15594 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15595 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15596 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15597
15598 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15599 ins_encode %{
15600 int icnt2 = (int)$int_cnt2$$constant;
15601 if (icnt2 >= 8) {
15602 // IndexOf for constant substrings with size >= 8 elements
15603 // which don't need to be loaded through stack.
15604 __ string_indexofC8($str1$$Register, $str2$$Register,
15605 $cnt1$$Register, $cnt2$$Register,
15606 icnt2, $result$$Register,
15607 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15608 } else {
15609 // Small strings are loaded through stack if they cross page boundary.
15610 __ string_indexof($str1$$Register, $str2$$Register,
15611 $cnt1$$Register, $cnt2$$Register,
15612 icnt2, $result$$Register,
15613 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15614 }
15615 %}
15616 ins_pipe( pipe_slow );
15617 %}
15618
15619 // fast search of substring with known size.
15620 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15621 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15622 %{
15623 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15624 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15625 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15626
15627 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15628 ins_encode %{
15629 int icnt2 = (int)$int_cnt2$$constant;
15630 if (icnt2 >= 8) {
15631 // IndexOf for constant substrings with size >= 8 elements
15632 // which don't need to be loaded through stack.
15633 __ string_indexofC8($str1$$Register, $str2$$Register,
15634 $cnt1$$Register, $cnt2$$Register,
15635 icnt2, $result$$Register,
15636 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15637 } else {
15638 // Small strings are loaded through stack if they cross page boundary.
15639 __ string_indexof($str1$$Register, $str2$$Register,
15640 $cnt1$$Register, $cnt2$$Register,
15641 icnt2, $result$$Register,
15642 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15643 }
15644 %}
15645 ins_pipe( pipe_slow );
15646 %}
15647
15648 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15649 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15650 %{
15651 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15652 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15653 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15654
15655 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15656 ins_encode %{
15657 __ string_indexof($str1$$Register, $str2$$Register,
15658 $cnt1$$Register, $cnt2$$Register,
15659 (-1), $result$$Register,
15660 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15661 %}
15662 ins_pipe( pipe_slow );
15663 %}
15664
15665 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15666 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15667 %{
15668 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15669 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15670 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15671
15672 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15673 ins_encode %{
15674 __ string_indexof($str1$$Register, $str2$$Register,
15675 $cnt1$$Register, $cnt2$$Register,
15676 (-1), $result$$Register,
15677 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15678 %}
15679 ins_pipe( pipe_slow );
15680 %}
15681
15682 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15683 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15684 %{
15685 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15686 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15687 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15688
15689 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15690 ins_encode %{
15691 __ string_indexof($str1$$Register, $str2$$Register,
15692 $cnt1$$Register, $cnt2$$Register,
15693 (-1), $result$$Register,
15694 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15695 %}
15696 ins_pipe( pipe_slow );
15697 %}
15698
15699 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15700 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15701 %{
15702 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15703 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15704 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15705 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15706 ins_encode %{
15707 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15708 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15709 %}
15710 ins_pipe( pipe_slow );
15711 %}
15712
15713 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15714 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15715 %{
15716 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15717 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15718 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15719 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15720 ins_encode %{
15721 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15722 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15723 %}
15724 ins_pipe( pipe_slow );
15725 %}
15726
15727 // fast string equals
15728 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15729 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15730 %{
15731 predicate(!VM_Version::supports_avx512vlbw());
15732 match(Set result (StrEquals (Binary str1 str2) cnt));
15733 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15734
15735 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15736 ins_encode %{
15737 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15738 $cnt$$Register, $result$$Register, $tmp3$$Register,
15739 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15740 %}
15741 ins_pipe( pipe_slow );
15742 %}
15743
15744 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15745 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15746 %{
15747 predicate(VM_Version::supports_avx512vlbw());
15748 match(Set result (StrEquals (Binary str1 str2) cnt));
15749 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15750
15751 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15752 ins_encode %{
15753 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15754 $cnt$$Register, $result$$Register, $tmp3$$Register,
15755 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15756 %}
15757 ins_pipe( pipe_slow );
15758 %}
15759
15760 // fast array equals
15761 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15762 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15763 %{
15764 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15765 match(Set result (AryEq ary1 ary2));
15766 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15767
15768 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15769 ins_encode %{
15770 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15771 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15772 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15773 %}
15774 ins_pipe( pipe_slow );
15775 %}
15776
15777 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15778 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15779 %{
15780 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15781 match(Set result (AryEq ary1 ary2));
15782 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15783
15784 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15785 ins_encode %{
15786 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15787 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15788 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15789 %}
15790 ins_pipe( pipe_slow );
15791 %}
15792
15793 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15794 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15795 %{
15796 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15797 match(Set result (AryEq ary1 ary2));
15798 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15799
15800 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15801 ins_encode %{
15802 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15803 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15804 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15805 %}
15806 ins_pipe( pipe_slow );
15807 %}
15808
15809 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15810 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15811 %{
15812 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15813 match(Set result (AryEq ary1 ary2));
15814 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15815
15816 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15817 ins_encode %{
15818 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15819 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15820 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15821 %}
15822 ins_pipe( pipe_slow );
15823 %}
15824
15825 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15826 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15827 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15828 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15829 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15830 %{
15831 predicate(UseAVX >= 2);
15832 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15833 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15834 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15835 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15836 USE basic_type, KILL cr);
15837
15838 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15839 ins_encode %{
15840 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15841 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15842 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15843 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15844 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15845 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15846 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15847 %}
15848 ins_pipe( pipe_slow );
15849 %}
15850
15851 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15852 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15853 %{
15854 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15855 match(Set result (CountPositives ary1 len));
15856 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15857
15858 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15859 ins_encode %{
15860 __ count_positives($ary1$$Register, $len$$Register,
15861 $result$$Register, $tmp3$$Register,
15862 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15863 %}
15864 ins_pipe( pipe_slow );
15865 %}
15866
15867 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15868 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15869 %{
15870 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15871 match(Set result (CountPositives ary1 len));
15872 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15873
15874 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15875 ins_encode %{
15876 __ count_positives($ary1$$Register, $len$$Register,
15877 $result$$Register, $tmp3$$Register,
15878 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15879 %}
15880 ins_pipe( pipe_slow );
15881 %}
15882
15883 // fast char[] to byte[] compression
15884 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15885 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15886 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15887 match(Set result (StrCompressedCopy src (Binary dst len)));
15888 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15889 USE_KILL len, KILL tmp5, KILL cr);
15890
15891 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15892 ins_encode %{
15893 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15894 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15895 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15896 knoreg, knoreg);
15897 %}
15898 ins_pipe( pipe_slow );
15899 %}
15900
15901 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15902 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15903 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15904 match(Set result (StrCompressedCopy src (Binary dst len)));
15905 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15906 USE_KILL len, KILL tmp5, KILL cr);
15907
15908 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15909 ins_encode %{
15910 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15911 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15912 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15913 $ktmp1$$KRegister, $ktmp2$$KRegister);
15914 %}
15915 ins_pipe( pipe_slow );
15916 %}
15917 // fast byte[] to char[] inflation
15918 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15919 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15920 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15921 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15922 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15923
15924 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15925 ins_encode %{
15926 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15927 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15928 %}
15929 ins_pipe( pipe_slow );
15930 %}
15931
15932 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15933 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15934 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15935 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15936 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15937
15938 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15939 ins_encode %{
15940 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15941 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15942 %}
15943 ins_pipe( pipe_slow );
15944 %}
15945
15946 // encode char[] to byte[] in ISO_8859_1
15947 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15948 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15949 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15950 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15951 match(Set result (EncodeISOArray src (Binary dst len)));
15952 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15953
15954 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15955 ins_encode %{
15956 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15957 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15958 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15959 %}
15960 ins_pipe( pipe_slow );
15961 %}
15962
15963 // encode char[] to byte[] in ASCII
15964 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15965 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15966 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15967 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15968 match(Set result (EncodeISOArray src (Binary dst len)));
15969 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15970
15971 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15972 ins_encode %{
15973 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15974 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15975 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15976 %}
15977 ins_pipe( pipe_slow );
15978 %}
15979
15980 //----------Overflow Math Instructions-----------------------------------------
15981
15982 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15983 %{
15984 match(Set cr (OverflowAddI op1 op2));
15985 effect(DEF cr, USE_KILL op1, USE op2);
15986
15987 format %{ "addl $op1, $op2\t# overflow check int" %}
15988
15989 ins_encode %{
15990 __ addl($op1$$Register, $op2$$Register);
15991 %}
15992 ins_pipe(ialu_reg_reg);
15993 %}
15994
15995 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15996 %{
15997 match(Set cr (OverflowAddI op1 op2));
15998 effect(DEF cr, USE_KILL op1, USE op2);
15999
16000 format %{ "addl $op1, $op2\t# overflow check int" %}
16001
16002 ins_encode %{
16003 __ addl($op1$$Register, $op2$$constant);
16004 %}
16005 ins_pipe(ialu_reg_reg);
16006 %}
16007
16008 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16009 %{
16010 match(Set cr (OverflowAddL op1 op2));
16011 effect(DEF cr, USE_KILL op1, USE op2);
16012
16013 format %{ "addq $op1, $op2\t# overflow check long" %}
16014 ins_encode %{
16015 __ addq($op1$$Register, $op2$$Register);
16016 %}
16017 ins_pipe(ialu_reg_reg);
16018 %}
16019
16020 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16021 %{
16022 match(Set cr (OverflowAddL op1 op2));
16023 effect(DEF cr, USE_KILL op1, USE op2);
16024
16025 format %{ "addq $op1, $op2\t# overflow check long" %}
16026 ins_encode %{
16027 __ addq($op1$$Register, $op2$$constant);
16028 %}
16029 ins_pipe(ialu_reg_reg);
16030 %}
16031
16032 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16033 %{
16034 match(Set cr (OverflowSubI op1 op2));
16035
16036 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16037 ins_encode %{
16038 __ cmpl($op1$$Register, $op2$$Register);
16039 %}
16040 ins_pipe(ialu_reg_reg);
16041 %}
16042
16043 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16044 %{
16045 match(Set cr (OverflowSubI op1 op2));
16046
16047 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16048 ins_encode %{
16049 __ cmpl($op1$$Register, $op2$$constant);
16050 %}
16051 ins_pipe(ialu_reg_reg);
16052 %}
16053
16054 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16055 %{
16056 match(Set cr (OverflowSubL op1 op2));
16057
16058 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16059 ins_encode %{
16060 __ cmpq($op1$$Register, $op2$$Register);
16061 %}
16062 ins_pipe(ialu_reg_reg);
16063 %}
16064
16065 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16066 %{
16067 match(Set cr (OverflowSubL op1 op2));
16068
16069 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16070 ins_encode %{
16071 __ cmpq($op1$$Register, $op2$$constant);
16072 %}
16073 ins_pipe(ialu_reg_reg);
16074 %}
16075
16076 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16077 %{
16078 match(Set cr (OverflowSubI zero op2));
16079 effect(DEF cr, USE_KILL op2);
16080
16081 format %{ "negl $op2\t# overflow check int" %}
16082 ins_encode %{
16083 __ negl($op2$$Register);
16084 %}
16085 ins_pipe(ialu_reg_reg);
16086 %}
16087
16088 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16089 %{
16090 match(Set cr (OverflowSubL zero op2));
16091 effect(DEF cr, USE_KILL op2);
16092
16093 format %{ "negq $op2\t# overflow check long" %}
16094 ins_encode %{
16095 __ negq($op2$$Register);
16096 %}
16097 ins_pipe(ialu_reg_reg);
16098 %}
16099
16100 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16101 %{
16102 match(Set cr (OverflowMulI op1 op2));
16103 effect(DEF cr, USE_KILL op1, USE op2);
16104
16105 format %{ "imull $op1, $op2\t# overflow check int" %}
16106 ins_encode %{
16107 __ imull($op1$$Register, $op2$$Register);
16108 %}
16109 ins_pipe(ialu_reg_reg_alu0);
16110 %}
16111
16112 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16113 %{
16114 match(Set cr (OverflowMulI op1 op2));
16115 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16116
16117 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16118 ins_encode %{
16119 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16120 %}
16121 ins_pipe(ialu_reg_reg_alu0);
16122 %}
16123
16124 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16125 %{
16126 match(Set cr (OverflowMulL op1 op2));
16127 effect(DEF cr, USE_KILL op1, USE op2);
16128
16129 format %{ "imulq $op1, $op2\t# overflow check long" %}
16130 ins_encode %{
16131 __ imulq($op1$$Register, $op2$$Register);
16132 %}
16133 ins_pipe(ialu_reg_reg_alu0);
16134 %}
16135
16136 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16137 %{
16138 match(Set cr (OverflowMulL op1 op2));
16139 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16140
16141 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16142 ins_encode %{
16143 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16144 %}
16145 ins_pipe(ialu_reg_reg_alu0);
16146 %}
16147
16148
16149 //----------Control Flow Instructions------------------------------------------
16150 // Signed compare Instructions
16151
16152 // XXX more variants!!
16153 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16154 %{
16155 match(Set cr (CmpI op1 op2));
16156 effect(DEF cr, USE op1, USE op2);
16157
16158 format %{ "cmpl $op1, $op2" %}
16159 ins_encode %{
16160 __ cmpl($op1$$Register, $op2$$Register);
16161 %}
16162 ins_pipe(ialu_cr_reg_reg);
16163 %}
16164
16165 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16166 %{
16167 match(Set cr (CmpI op1 op2));
16168
16169 format %{ "cmpl $op1, $op2" %}
16170 ins_encode %{
16171 __ cmpl($op1$$Register, $op2$$constant);
16172 %}
16173 ins_pipe(ialu_cr_reg_imm);
16174 %}
16175
16176 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16177 %{
16178 match(Set cr (CmpI op1 (LoadI op2)));
16179
16180 ins_cost(500); // XXX
16181 format %{ "cmpl $op1, $op2" %}
16182 ins_encode %{
16183 __ cmpl($op1$$Register, $op2$$Address);
16184 %}
16185 ins_pipe(ialu_cr_reg_mem);
16186 %}
16187
16188 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16189 %{
16190 match(Set cr (CmpI src zero));
16191
16192 format %{ "testl $src, $src" %}
16193 ins_encode %{
16194 __ testl($src$$Register, $src$$Register);
16195 %}
16196 ins_pipe(ialu_cr_reg_imm);
16197 %}
16198
16199 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16200 %{
16201 match(Set cr (CmpI (AndI src con) zero));
16202
16203 format %{ "testl $src, $con" %}
16204 ins_encode %{
16205 __ testl($src$$Register, $con$$constant);
16206 %}
16207 ins_pipe(ialu_cr_reg_imm);
16208 %}
16209
16210 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16211 %{
16212 match(Set cr (CmpI (AndI src1 src2) zero));
16213
16214 format %{ "testl $src1, $src2" %}
16215 ins_encode %{
16216 __ testl($src1$$Register, $src2$$Register);
16217 %}
16218 ins_pipe(ialu_cr_reg_imm);
16219 %}
16220
16221 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16222 %{
16223 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16224
16225 format %{ "testl $src, $mem" %}
16226 ins_encode %{
16227 __ testl($src$$Register, $mem$$Address);
16228 %}
16229 ins_pipe(ialu_cr_reg_mem);
16230 %}
16231
16232 // Unsigned compare Instructions; really, same as signed except they
16233 // produce an rFlagsRegU instead of rFlagsReg.
16234 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16235 %{
16236 match(Set cr (CmpU op1 op2));
16237
16238 format %{ "cmpl $op1, $op2\t# unsigned" %}
16239 ins_encode %{
16240 __ cmpl($op1$$Register, $op2$$Register);
16241 %}
16242 ins_pipe(ialu_cr_reg_reg);
16243 %}
16244
16245 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16246 %{
16247 match(Set cr (CmpU op1 op2));
16248
16249 format %{ "cmpl $op1, $op2\t# unsigned" %}
16250 ins_encode %{
16251 __ cmpl($op1$$Register, $op2$$constant);
16252 %}
16253 ins_pipe(ialu_cr_reg_imm);
16254 %}
16255
16256 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16257 %{
16258 match(Set cr (CmpU op1 (LoadI op2)));
16259
16260 ins_cost(500); // XXX
16261 format %{ "cmpl $op1, $op2\t# unsigned" %}
16262 ins_encode %{
16263 __ cmpl($op1$$Register, $op2$$Address);
16264 %}
16265 ins_pipe(ialu_cr_reg_mem);
16266 %}
16267
16268 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16269 %{
16270 match(Set cr (CmpU src zero));
16271
16272 format %{ "testl $src, $src\t# unsigned" %}
16273 ins_encode %{
16274 __ testl($src$$Register, $src$$Register);
16275 %}
16276 ins_pipe(ialu_cr_reg_imm);
16277 %}
16278
16279 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16280 %{
16281 match(Set cr (CmpP op1 op2));
16282
16283 format %{ "cmpq $op1, $op2\t# ptr" %}
16284 ins_encode %{
16285 __ cmpq($op1$$Register, $op2$$Register);
16286 %}
16287 ins_pipe(ialu_cr_reg_reg);
16288 %}
16289
16290 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16291 %{
16292 match(Set cr (CmpP op1 (LoadP op2)));
16293 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16294
16295 ins_cost(500); // XXX
16296 format %{ "cmpq $op1, $op2\t# ptr" %}
16297 ins_encode %{
16298 __ cmpq($op1$$Register, $op2$$Address);
16299 %}
16300 ins_pipe(ialu_cr_reg_mem);
16301 %}
16302
16303 // XXX this is generalized by compP_rReg_mem???
16304 // Compare raw pointer (used in out-of-heap check).
16305 // Only works because non-oop pointers must be raw pointers
16306 // and raw pointers have no anti-dependencies.
16307 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16308 %{
16309 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16310 n->in(2)->as_Load()->barrier_data() == 0);
16311 match(Set cr (CmpP op1 (LoadP op2)));
16312
16313 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16314 ins_encode %{
16315 __ cmpq($op1$$Register, $op2$$Address);
16316 %}
16317 ins_pipe(ialu_cr_reg_mem);
16318 %}
16319
16320 // This will generate a signed flags result. This should be OK since
16321 // any compare to a zero should be eq/neq.
16322 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16323 %{
16324 match(Set cr (CmpP src zero));
16325
16326 format %{ "testq $src, $src\t# ptr" %}
16327 ins_encode %{
16328 __ testq($src$$Register, $src$$Register);
16329 %}
16330 ins_pipe(ialu_cr_reg_imm);
16331 %}
16332
16333 // This will generate a signed flags result. This should be OK since
16334 // any compare to a zero should be eq/neq.
16335 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16336 %{
16337 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16338 n->in(1)->as_Load()->barrier_data() == 0);
16339 match(Set cr (CmpP (LoadP op) zero));
16340
16341 ins_cost(500); // XXX
16342 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16343 ins_encode %{
16344 __ testq($op$$Address, 0xFFFFFFFF);
16345 %}
16346 ins_pipe(ialu_cr_reg_imm);
16347 %}
16348
16349 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16350 %{
16351 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16352 n->in(1)->as_Load()->barrier_data() == 0);
16353 match(Set cr (CmpP (LoadP mem) zero));
16354
16355 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16356 ins_encode %{
16357 __ cmpq(r12, $mem$$Address);
16358 %}
16359 ins_pipe(ialu_cr_reg_mem);
16360 %}
16361
16362 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16363 %{
16364 match(Set cr (CmpN op1 op2));
16365
16366 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16367 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16368 ins_pipe(ialu_cr_reg_reg);
16369 %}
16370
16371 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16372 %{
16373 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16374 match(Set cr (CmpN src (LoadN mem)));
16375
16376 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16377 ins_encode %{
16378 __ cmpl($src$$Register, $mem$$Address);
16379 %}
16380 ins_pipe(ialu_cr_reg_mem);
16381 %}
16382
16383 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16384 match(Set cr (CmpN op1 op2));
16385
16386 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16387 ins_encode %{
16388 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16389 %}
16390 ins_pipe(ialu_cr_reg_imm);
16391 %}
16392
16393 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16394 %{
16395 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16396 match(Set cr (CmpN src (LoadN mem)));
16397
16398 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16399 ins_encode %{
16400 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16401 %}
16402 ins_pipe(ialu_cr_reg_mem);
16403 %}
16404
16405 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16406 match(Set cr (CmpN op1 op2));
16407
16408 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16409 ins_encode %{
16410 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16411 %}
16412 ins_pipe(ialu_cr_reg_imm);
16413 %}
16414
16415 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16416 %{
16417 predicate(!UseCompactObjectHeaders);
16418 match(Set cr (CmpN src (LoadNKlass mem)));
16419
16420 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16421 ins_encode %{
16422 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16423 %}
16424 ins_pipe(ialu_cr_reg_mem);
16425 %}
16426
16427 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16428 match(Set cr (CmpN src zero));
16429
16430 format %{ "testl $src, $src\t# compressed ptr" %}
16431 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16432 ins_pipe(ialu_cr_reg_imm);
16433 %}
16434
16435 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16436 %{
16437 predicate(CompressedOops::base() != nullptr &&
16438 n->in(1)->as_Load()->barrier_data() == 0);
16439 match(Set cr (CmpN (LoadN mem) zero));
16440
16441 ins_cost(500); // XXX
16442 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16443 ins_encode %{
16444 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16445 %}
16446 ins_pipe(ialu_cr_reg_mem);
16447 %}
16448
16449 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16450 %{
16451 predicate(CompressedOops::base() == nullptr &&
16452 n->in(1)->as_Load()->barrier_data() == 0);
16453 match(Set cr (CmpN (LoadN mem) zero));
16454
16455 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16456 ins_encode %{
16457 __ cmpl(r12, $mem$$Address);
16458 %}
16459 ins_pipe(ialu_cr_reg_mem);
16460 %}
16461
16462 // Yanked all unsigned pointer compare operations.
16463 // Pointer compares are done with CmpP which is already unsigned.
16464
16465 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16466 %{
16467 match(Set cr (CmpL op1 op2));
16468
16469 format %{ "cmpq $op1, $op2" %}
16470 ins_encode %{
16471 __ cmpq($op1$$Register, $op2$$Register);
16472 %}
16473 ins_pipe(ialu_cr_reg_reg);
16474 %}
16475
16476 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16477 %{
16478 match(Set cr (CmpL op1 op2));
16479
16480 format %{ "cmpq $op1, $op2" %}
16481 ins_encode %{
16482 __ cmpq($op1$$Register, $op2$$constant);
16483 %}
16484 ins_pipe(ialu_cr_reg_imm);
16485 %}
16486
16487 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16488 %{
16489 match(Set cr (CmpL op1 (LoadL op2)));
16490
16491 format %{ "cmpq $op1, $op2" %}
16492 ins_encode %{
16493 __ cmpq($op1$$Register, $op2$$Address);
16494 %}
16495 ins_pipe(ialu_cr_reg_mem);
16496 %}
16497
16498 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16499 %{
16500 match(Set cr (CmpL src zero));
16501
16502 format %{ "testq $src, $src" %}
16503 ins_encode %{
16504 __ testq($src$$Register, $src$$Register);
16505 %}
16506 ins_pipe(ialu_cr_reg_imm);
16507 %}
16508
16509 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16510 %{
16511 match(Set cr (CmpL (AndL src con) zero));
16512
16513 format %{ "testq $src, $con\t# long" %}
16514 ins_encode %{
16515 __ testq($src$$Register, $con$$constant);
16516 %}
16517 ins_pipe(ialu_cr_reg_imm);
16518 %}
16519
16520 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16521 %{
16522 match(Set cr (CmpL (AndL src1 src2) zero));
16523
16524 format %{ "testq $src1, $src2\t# long" %}
16525 ins_encode %{
16526 __ testq($src1$$Register, $src2$$Register);
16527 %}
16528 ins_pipe(ialu_cr_reg_imm);
16529 %}
16530
16531 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16532 %{
16533 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16534
16535 format %{ "testq $src, $mem" %}
16536 ins_encode %{
16537 __ testq($src$$Register, $mem$$Address);
16538 %}
16539 ins_pipe(ialu_cr_reg_mem);
16540 %}
16541
16542 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16543 %{
16544 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16545
16546 format %{ "testq $src, $mem" %}
16547 ins_encode %{
16548 __ testq($src$$Register, $mem$$Address);
16549 %}
16550 ins_pipe(ialu_cr_reg_mem);
16551 %}
16552
16553 // Manifest a CmpU result in an integer register. Very painful.
16554 // This is the test to avoid.
16555 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16556 %{
16557 match(Set dst (CmpU3 src1 src2));
16558 effect(KILL flags);
16559
16560 ins_cost(275); // XXX
16561 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16562 "movl $dst, -1\n\t"
16563 "jb,u done\n\t"
16564 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16565 "done:" %}
16566 ins_encode %{
16567 Label done;
16568 __ cmpl($src1$$Register, $src2$$Register);
16569 __ movl($dst$$Register, -1);
16570 __ jccb(Assembler::below, done);
16571 __ setcc(Assembler::notZero, $dst$$Register);
16572 __ bind(done);
16573 %}
16574 ins_pipe(pipe_slow);
16575 %}
16576
16577 // Manifest a CmpL result in an integer register. Very painful.
16578 // This is the test to avoid.
16579 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16580 %{
16581 match(Set dst (CmpL3 src1 src2));
16582 effect(KILL flags);
16583
16584 ins_cost(275); // XXX
16585 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16586 "movl $dst, -1\n\t"
16587 "jl,s done\n\t"
16588 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16589 "done:" %}
16590 ins_encode %{
16591 Label done;
16592 __ cmpq($src1$$Register, $src2$$Register);
16593 __ movl($dst$$Register, -1);
16594 __ jccb(Assembler::less, done);
16595 __ setcc(Assembler::notZero, $dst$$Register);
16596 __ bind(done);
16597 %}
16598 ins_pipe(pipe_slow);
16599 %}
16600
16601 // Manifest a CmpUL result in an integer register. Very painful.
16602 // This is the test to avoid.
16603 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16604 %{
16605 match(Set dst (CmpUL3 src1 src2));
16606 effect(KILL flags);
16607
16608 ins_cost(275); // XXX
16609 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16610 "movl $dst, -1\n\t"
16611 "jb,u done\n\t"
16612 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16613 "done:" %}
16614 ins_encode %{
16615 Label done;
16616 __ cmpq($src1$$Register, $src2$$Register);
16617 __ movl($dst$$Register, -1);
16618 __ jccb(Assembler::below, done);
16619 __ setcc(Assembler::notZero, $dst$$Register);
16620 __ bind(done);
16621 %}
16622 ins_pipe(pipe_slow);
16623 %}
16624
16625 // Unsigned long compare Instructions; really, same as signed long except they
16626 // produce an rFlagsRegU instead of rFlagsReg.
16627 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16628 %{
16629 match(Set cr (CmpUL op1 op2));
16630
16631 format %{ "cmpq $op1, $op2\t# unsigned" %}
16632 ins_encode %{
16633 __ cmpq($op1$$Register, $op2$$Register);
16634 %}
16635 ins_pipe(ialu_cr_reg_reg);
16636 %}
16637
16638 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16639 %{
16640 match(Set cr (CmpUL op1 op2));
16641
16642 format %{ "cmpq $op1, $op2\t# unsigned" %}
16643 ins_encode %{
16644 __ cmpq($op1$$Register, $op2$$constant);
16645 %}
16646 ins_pipe(ialu_cr_reg_imm);
16647 %}
16648
16649 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16650 %{
16651 match(Set cr (CmpUL op1 (LoadL op2)));
16652
16653 format %{ "cmpq $op1, $op2\t# unsigned" %}
16654 ins_encode %{
16655 __ cmpq($op1$$Register, $op2$$Address);
16656 %}
16657 ins_pipe(ialu_cr_reg_mem);
16658 %}
16659
16660 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16661 %{
16662 match(Set cr (CmpUL src zero));
16663
16664 format %{ "testq $src, $src\t# unsigned" %}
16665 ins_encode %{
16666 __ testq($src$$Register, $src$$Register);
16667 %}
16668 ins_pipe(ialu_cr_reg_imm);
16669 %}
16670
16671 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16672 %{
16673 match(Set cr (CmpI (LoadB mem) imm));
16674
16675 ins_cost(125);
16676 format %{ "cmpb $mem, $imm" %}
16677 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16678 ins_pipe(ialu_cr_reg_mem);
16679 %}
16680
16681 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16682 %{
16683 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16684
16685 ins_cost(125);
16686 format %{ "testb $mem, $imm\t# ubyte" %}
16687 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16688 ins_pipe(ialu_cr_reg_mem);
16689 %}
16690
16691 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16692 %{
16693 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16694
16695 ins_cost(125);
16696 format %{ "testb $mem, $imm\t# byte" %}
16697 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16698 ins_pipe(ialu_cr_reg_mem);
16699 %}
16700
16701 //----------Max and Min--------------------------------------------------------
16702 // Min Instructions
16703
16704 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16705 %{
16706 predicate(!UseAPX);
16707 effect(USE_DEF dst, USE src, USE cr);
16708
16709 format %{ "cmovlgt $dst, $src\t# min" %}
16710 ins_encode %{
16711 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16712 %}
16713 ins_pipe(pipe_cmov_reg);
16714 %}
16715
16716 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16717 %{
16718 predicate(UseAPX);
16719 effect(DEF dst, USE src1, USE src2, USE cr);
16720
16721 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16722 ins_encode %{
16723 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16724 %}
16725 ins_pipe(pipe_cmov_reg);
16726 %}
16727
16728 instruct minI_rReg(rRegI dst, rRegI src)
16729 %{
16730 predicate(!UseAPX);
16731 match(Set dst (MinI dst src));
16732
16733 ins_cost(200);
16734 expand %{
16735 rFlagsReg cr;
16736 compI_rReg(cr, dst, src);
16737 cmovI_reg_g(dst, src, cr);
16738 %}
16739 %}
16740
16741 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16742 %{
16743 predicate(UseAPX);
16744 match(Set dst (MinI src1 src2));
16745 effect(DEF dst, USE src1, USE src2);
16746 flag(PD::Flag_ndd_demotable_opr1);
16747
16748 ins_cost(200);
16749 expand %{
16750 rFlagsReg cr;
16751 compI_rReg(cr, src1, src2);
16752 cmovI_reg_g_ndd(dst, src1, src2, cr);
16753 %}
16754 %}
16755
16756 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16757 %{
16758 predicate(!UseAPX);
16759 effect(USE_DEF dst, USE src, USE cr);
16760
16761 format %{ "cmovllt $dst, $src\t# max" %}
16762 ins_encode %{
16763 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16764 %}
16765 ins_pipe(pipe_cmov_reg);
16766 %}
16767
16768 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16769 %{
16770 predicate(UseAPX);
16771 effect(DEF dst, USE src1, USE src2, USE cr);
16772
16773 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16774 ins_encode %{
16775 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16776 %}
16777 ins_pipe(pipe_cmov_reg);
16778 %}
16779
16780 instruct maxI_rReg(rRegI dst, rRegI src)
16781 %{
16782 predicate(!UseAPX);
16783 match(Set dst (MaxI dst src));
16784
16785 ins_cost(200);
16786 expand %{
16787 rFlagsReg cr;
16788 compI_rReg(cr, dst, src);
16789 cmovI_reg_l(dst, src, cr);
16790 %}
16791 %}
16792
16793 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16794 %{
16795 predicate(UseAPX);
16796 match(Set dst (MaxI src1 src2));
16797 effect(DEF dst, USE src1, USE src2);
16798 flag(PD::Flag_ndd_demotable_opr1);
16799
16800 ins_cost(200);
16801 expand %{
16802 rFlagsReg cr;
16803 compI_rReg(cr, src1, src2);
16804 cmovI_reg_l_ndd(dst, src1, src2, cr);
16805 %}
16806 %}
16807
16808 // ============================================================================
16809 // Branch Instructions
16810
16811 // Jump Direct - Label defines a relative address from JMP+1
16812 instruct jmpDir(label labl)
16813 %{
16814 match(Goto);
16815 effect(USE labl);
16816
16817 ins_cost(300);
16818 format %{ "jmp $labl" %}
16819 size(5);
16820 ins_encode %{
16821 Label* L = $labl$$label;
16822 __ jmp(*L, false); // Always long jump
16823 %}
16824 ins_pipe(pipe_jmp);
16825 %}
16826
16827 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16828 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16829 %{
16830 match(If cop cr);
16831 effect(USE labl);
16832
16833 ins_cost(300);
16834 format %{ "j$cop $labl" %}
16835 size(6);
16836 ins_encode %{
16837 Label* L = $labl$$label;
16838 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16839 %}
16840 ins_pipe(pipe_jcc);
16841 %}
16842
16843 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16844 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16845 %{
16846 match(CountedLoopEnd cop cr);
16847 effect(USE labl);
16848
16849 ins_cost(300);
16850 format %{ "j$cop $labl\t# loop end" %}
16851 size(6);
16852 ins_encode %{
16853 Label* L = $labl$$label;
16854 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16855 %}
16856 ins_pipe(pipe_jcc);
16857 %}
16858
16859 // Jump Direct Conditional - using unsigned comparison
16860 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16861 match(If cop cmp);
16862 effect(USE labl);
16863
16864 ins_cost(300);
16865 format %{ "j$cop,u $labl" %}
16866 size(6);
16867 ins_encode %{
16868 Label* L = $labl$$label;
16869 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16870 %}
16871 ins_pipe(pipe_jcc);
16872 %}
16873
16874 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16875 match(If cop cmp);
16876 effect(USE labl);
16877
16878 ins_cost(200);
16879 format %{ "j$cop,u $labl" %}
16880 size(6);
16881 ins_encode %{
16882 Label* L = $labl$$label;
16883 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16884 %}
16885 ins_pipe(pipe_jcc);
16886 %}
16887
16888 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16889 match(If cop cmp);
16890 effect(USE labl);
16891
16892 ins_cost(200);
16893 format %{ $$template
16894 if ($cop$$cmpcode == Assembler::notEqual) {
16895 $$emit$$"jp,u $labl\n\t"
16896 $$emit$$"j$cop,u $labl"
16897 } else {
16898 $$emit$$"jp,u done\n\t"
16899 $$emit$$"j$cop,u $labl\n\t"
16900 $$emit$$"done:"
16901 }
16902 %}
16903 ins_encode %{
16904 Label* l = $labl$$label;
16905 if ($cop$$cmpcode == Assembler::notEqual) {
16906 __ jcc(Assembler::parity, *l, false);
16907 __ jcc(Assembler::notEqual, *l, false);
16908 } else if ($cop$$cmpcode == Assembler::equal) {
16909 Label done;
16910 __ jccb(Assembler::parity, done);
16911 __ jcc(Assembler::equal, *l, false);
16912 __ bind(done);
16913 } else {
16914 ShouldNotReachHere();
16915 }
16916 %}
16917 ins_pipe(pipe_jcc);
16918 %}
16919
16920 // Jump Direct Conditional - using signed and unsigned comparison
16921 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16922 match(If cop cmp);
16923 effect(USE labl);
16924
16925 ins_cost(200);
16926 format %{ "j$cop,su $labl" %}
16927 size(6);
16928 ins_encode %{
16929 Label* L = $labl$$label;
16930 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16931 %}
16932 ins_pipe(pipe_jcc);
16933 %}
16934
16935 // ============================================================================
16936 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16937 // superklass array for an instance of the superklass. Set a hidden
16938 // internal cache on a hit (cache is checked with exposed code in
16939 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16940 // encoding ALSO sets flags.
16941
16942 instruct partialSubtypeCheck(rdi_RegP result,
16943 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16944 rFlagsReg cr)
16945 %{
16946 match(Set result (PartialSubtypeCheck sub super));
16947 predicate(!UseSecondarySupersTable);
16948 effect(KILL rcx, KILL cr);
16949
16950 ins_cost(1100); // slightly larger than the next version
16951 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16952 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16953 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16954 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16955 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16956 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16957 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16958 "miss:\t" %}
16959
16960 ins_encode %{
16961 Label miss;
16962 // NB: Callers may assume that, when $result is a valid register,
16963 // check_klass_subtype_slow_path_linear sets it to a nonzero
16964 // value.
16965 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16966 $rcx$$Register, $result$$Register,
16967 nullptr, &miss,
16968 /*set_cond_codes:*/ true);
16969 __ xorptr($result$$Register, $result$$Register);
16970 __ bind(miss);
16971 %}
16972
16973 ins_pipe(pipe_slow);
16974 %}
16975
16976 // ============================================================================
16977 // Two versions of hashtable-based partialSubtypeCheck, both used when
16978 // we need to search for a super class in the secondary supers array.
16979 // The first is used when we don't know _a priori_ the class being
16980 // searched for. The second, far more common, is used when we do know:
16981 // this is used for instanceof, checkcast, and any case where C2 can
16982 // determine it by constant propagation.
16983
16984 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16985 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16986 rFlagsReg cr)
16987 %{
16988 match(Set result (PartialSubtypeCheck sub super));
16989 predicate(UseSecondarySupersTable);
16990 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16991
16992 ins_cost(1000);
16993 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16994
16995 ins_encode %{
16996 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16997 $temp3$$Register, $temp4$$Register, $result$$Register);
16998 %}
16999
17000 ins_pipe(pipe_slow);
17001 %}
17002
17003 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17004 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17005 rFlagsReg cr)
17006 %{
17007 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17008 predicate(UseSecondarySupersTable);
17009 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17010
17011 ins_cost(700); // smaller than the next version
17012 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17013
17014 ins_encode %{
17015 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17016 if (InlineSecondarySupersTest) {
17017 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17018 $temp3$$Register, $temp4$$Register, $result$$Register,
17019 super_klass_slot);
17020 } else {
17021 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17022 }
17023 %}
17024
17025 ins_pipe(pipe_slow);
17026 %}
17027
17028 // ============================================================================
17029 // Branch Instructions -- short offset versions
17030 //
17031 // These instructions are used to replace jumps of a long offset (the default
17032 // match) with jumps of a shorter offset. These instructions are all tagged
17033 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17034 // match rules in general matching. Instead, the ADLC generates a conversion
17035 // method in the MachNode which can be used to do in-place replacement of the
17036 // long variant with the shorter variant. The compiler will determine if a
17037 // branch can be taken by the is_short_branch_offset() predicate in the machine
17038 // specific code section of the file.
17039
17040 // Jump Direct - Label defines a relative address from JMP+1
17041 instruct jmpDir_short(label labl) %{
17042 match(Goto);
17043 effect(USE labl);
17044
17045 ins_cost(300);
17046 format %{ "jmp,s $labl" %}
17047 size(2);
17048 ins_encode %{
17049 Label* L = $labl$$label;
17050 __ jmpb(*L);
17051 %}
17052 ins_pipe(pipe_jmp);
17053 ins_short_branch(1);
17054 %}
17055
17056 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17057 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17058 match(If cop cr);
17059 effect(USE labl);
17060
17061 ins_cost(300);
17062 format %{ "j$cop,s $labl" %}
17063 size(2);
17064 ins_encode %{
17065 Label* L = $labl$$label;
17066 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17067 %}
17068 ins_pipe(pipe_jcc);
17069 ins_short_branch(1);
17070 %}
17071
17072 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17073 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17074 match(CountedLoopEnd cop cr);
17075 effect(USE labl);
17076
17077 ins_cost(300);
17078 format %{ "j$cop,s $labl\t# loop end" %}
17079 size(2);
17080 ins_encode %{
17081 Label* L = $labl$$label;
17082 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17083 %}
17084 ins_pipe(pipe_jcc);
17085 ins_short_branch(1);
17086 %}
17087
17088 // Jump Direct Conditional - using unsigned comparison
17089 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17090 match(If cop cmp);
17091 effect(USE labl);
17092
17093 ins_cost(300);
17094 format %{ "j$cop,us $labl" %}
17095 size(2);
17096 ins_encode %{
17097 Label* L = $labl$$label;
17098 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17099 %}
17100 ins_pipe(pipe_jcc);
17101 ins_short_branch(1);
17102 %}
17103
17104 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17105 match(If cop cmp);
17106 effect(USE labl);
17107
17108 ins_cost(300);
17109 format %{ "j$cop,us $labl" %}
17110 size(2);
17111 ins_encode %{
17112 Label* L = $labl$$label;
17113 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17114 %}
17115 ins_pipe(pipe_jcc);
17116 ins_short_branch(1);
17117 %}
17118
17119 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17120 match(If cop cmp);
17121 effect(USE labl);
17122
17123 ins_cost(300);
17124 format %{ $$template
17125 if ($cop$$cmpcode == Assembler::notEqual) {
17126 $$emit$$"jp,u,s $labl\n\t"
17127 $$emit$$"j$cop,u,s $labl"
17128 } else {
17129 $$emit$$"jp,u,s done\n\t"
17130 $$emit$$"j$cop,u,s $labl\n\t"
17131 $$emit$$"done:"
17132 }
17133 %}
17134 size(4);
17135 ins_encode %{
17136 Label* l = $labl$$label;
17137 if ($cop$$cmpcode == Assembler::notEqual) {
17138 __ jccb(Assembler::parity, *l);
17139 __ jccb(Assembler::notEqual, *l);
17140 } else if ($cop$$cmpcode == Assembler::equal) {
17141 Label done;
17142 __ jccb(Assembler::parity, done);
17143 __ jccb(Assembler::equal, *l);
17144 __ bind(done);
17145 } else {
17146 ShouldNotReachHere();
17147 }
17148 %}
17149 ins_pipe(pipe_jcc);
17150 ins_short_branch(1);
17151 %}
17152
17153 // Jump Direct Conditional - using signed and unsigned comparison
17154 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17155 match(If cop cmp);
17156 effect(USE labl);
17157
17158 ins_cost(300);
17159 format %{ "j$cop,sus $labl" %}
17160 size(2);
17161 ins_encode %{
17162 Label* L = $labl$$label;
17163 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17164 %}
17165 ins_pipe(pipe_jcc);
17166 ins_short_branch(1);
17167 %}
17168
17169 // ============================================================================
17170 // inlined locking and unlocking
17171
17172 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17173 match(Set cr (FastLock object box));
17174 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17175 ins_cost(300);
17176 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17177 ins_encode %{
17178 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17179 %}
17180 ins_pipe(pipe_slow);
17181 %}
17182
17183 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17184 match(Set cr (FastUnlock object rax_reg));
17185 effect(TEMP tmp, USE_KILL rax_reg);
17186 ins_cost(300);
17187 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17188 ins_encode %{
17189 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17190 %}
17191 ins_pipe(pipe_slow);
17192 %}
17193
17194
17195 // ============================================================================
17196 // Safepoint Instructions
17197 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17198 %{
17199 match(SafePoint poll);
17200 effect(KILL cr, USE poll);
17201
17202 format %{ "testl rax, [$poll]\t"
17203 "# Safepoint: poll for GC" %}
17204 ins_cost(125);
17205 ins_encode %{
17206 __ relocate(relocInfo::poll_type);
17207 address pre_pc = __ pc();
17208 __ testl(rax, Address($poll$$Register, 0));
17209 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17210 %}
17211 ins_pipe(ialu_reg_mem);
17212 %}
17213
17214 instruct mask_all_evexL(kReg dst, rRegL src) %{
17215 match(Set dst (MaskAll src));
17216 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17217 ins_encode %{
17218 int mask_len = Matcher::vector_length(this);
17219 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17220 %}
17221 ins_pipe( pipe_slow );
17222 %}
17223
17224 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17225 predicate(Matcher::vector_length(n) > 32);
17226 match(Set dst (MaskAll src));
17227 effect(TEMP tmp);
17228 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17229 ins_encode %{
17230 int mask_len = Matcher::vector_length(this);
17231 __ movslq($tmp$$Register, $src$$Register);
17232 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17233 %}
17234 ins_pipe( pipe_slow );
17235 %}
17236
17237 // ============================================================================
17238 // Procedure Call/Return Instructions
17239 // Call Java Static Instruction
17240 // Note: If this code changes, the corresponding ret_addr_offset() and
17241 // compute_padding() functions will have to be adjusted.
17242 instruct CallStaticJavaDirect(method meth) %{
17243 match(CallStaticJava);
17244 effect(USE meth);
17245
17246 ins_cost(300);
17247 format %{ "call,static " %}
17248 opcode(0xE8); /* E8 cd */
17249 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17250 ins_pipe(pipe_slow);
17251 ins_alignment(4);
17252 %}
17253
17254 // Call Java Dynamic Instruction
17255 // Note: If this code changes, the corresponding ret_addr_offset() and
17256 // compute_padding() functions will have to be adjusted.
17257 instruct CallDynamicJavaDirect(method meth)
17258 %{
17259 match(CallDynamicJava);
17260 effect(USE meth);
17261
17262 ins_cost(300);
17263 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17264 "call,dynamic " %}
17265 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17266 ins_pipe(pipe_slow);
17267 ins_alignment(4);
17268 %}
17269
17270 // Call Runtime Instruction
17271 instruct CallRuntimeDirect(method meth)
17272 %{
17273 match(CallRuntime);
17274 effect(USE meth);
17275
17276 ins_cost(300);
17277 format %{ "call,runtime " %}
17278 ins_encode(clear_avx, Java_To_Runtime(meth));
17279 ins_pipe(pipe_slow);
17280 %}
17281
17282 // Call runtime without safepoint
17283 instruct CallLeafDirect(method meth)
17284 %{
17285 match(CallLeaf);
17286 effect(USE meth);
17287
17288 ins_cost(300);
17289 format %{ "call_leaf,runtime " %}
17290 ins_encode(clear_avx, Java_To_Runtime(meth));
17291 ins_pipe(pipe_slow);
17292 %}
17293
17294 // Call runtime without safepoint and with vector arguments
17295 instruct CallLeafDirectVector(method meth)
17296 %{
17297 match(CallLeafVector);
17298 effect(USE meth);
17299
17300 ins_cost(300);
17301 format %{ "call_leaf,vector " %}
17302 ins_encode(Java_To_Runtime(meth));
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 // Call runtime without safepoint
17307 instruct CallLeafNoFPDirect(method meth)
17308 %{
17309 match(CallLeafNoFP);
17310 effect(USE meth);
17311
17312 ins_cost(300);
17313 format %{ "call_leaf_nofp,runtime " %}
17314 ins_encode(clear_avx, Java_To_Runtime(meth));
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 // Return Instruction
17319 // Remove the return address & jump to it.
17320 // Notice: We always emit a nop after a ret to make sure there is room
17321 // for safepoint patching
17322 instruct Ret()
17323 %{
17324 match(Return);
17325
17326 format %{ "ret" %}
17327 ins_encode %{
17328 __ ret(0);
17329 %}
17330 ins_pipe(pipe_jmp);
17331 %}
17332
17333 // Tail Call; Jump from runtime stub to Java code.
17334 // Also known as an 'interprocedural jump'.
17335 // Target of jump will eventually return to caller.
17336 // TailJump below removes the return address.
17337 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17338 // emitted just above the TailCall which has reset rbp to the caller state.
17339 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17340 %{
17341 match(TailCall jump_target method_ptr);
17342
17343 ins_cost(300);
17344 format %{ "jmp $jump_target\t# rbx holds method" %}
17345 ins_encode %{
17346 __ jmp($jump_target$$Register);
17347 %}
17348 ins_pipe(pipe_jmp);
17349 %}
17350
17351 // Tail Jump; remove the return address; jump to target.
17352 // TailCall above leaves the return address around.
17353 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17354 %{
17355 match(TailJump jump_target ex_oop);
17356
17357 ins_cost(300);
17358 format %{ "popq rdx\t# pop return address\n\t"
17359 "jmp $jump_target" %}
17360 ins_encode %{
17361 __ popq(as_Register(RDX_enc));
17362 __ jmp($jump_target$$Register);
17363 %}
17364 ins_pipe(pipe_jmp);
17365 %}
17366
17367 // Forward exception.
17368 instruct ForwardExceptionjmp()
17369 %{
17370 match(ForwardException);
17371
17372 format %{ "jmp forward_exception_stub" %}
17373 ins_encode %{
17374 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17375 %}
17376 ins_pipe(pipe_jmp);
17377 %}
17378
17379 // Create exception oop: created by stack-crawling runtime code.
17380 // Created exception is now available to this handler, and is setup
17381 // just prior to jumping to this handler. No code emitted.
17382 instruct CreateException(rax_RegP ex_oop)
17383 %{
17384 match(Set ex_oop (CreateEx));
17385
17386 size(0);
17387 // use the following format syntax
17388 format %{ "# exception oop is in rax; no code emitted" %}
17389 ins_encode();
17390 ins_pipe(empty);
17391 %}
17392
17393 // Rethrow exception:
17394 // The exception oop will come in the first argument position.
17395 // Then JUMP (not call) to the rethrow stub code.
17396 instruct RethrowException()
17397 %{
17398 match(Rethrow);
17399
17400 // use the following format syntax
17401 format %{ "jmp rethrow_stub" %}
17402 ins_encode %{
17403 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17404 %}
17405 ins_pipe(pipe_jmp);
17406 %}
17407
17408 // ============================================================================
17409 // This name is KNOWN by the ADLC and cannot be changed.
17410 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17411 // for this guy.
17412 instruct tlsLoadP(r15_RegP dst) %{
17413 match(Set dst (ThreadLocal));
17414 effect(DEF dst);
17415
17416 size(0);
17417 format %{ "# TLS is in R15" %}
17418 ins_encode( /*empty encoding*/ );
17419 ins_pipe(ialu_reg_reg);
17420 %}
17421
17422 instruct addF_reg(regF dst, regF src) %{
17423 predicate(UseAVX == 0);
17424 match(Set dst (AddF dst src));
17425
17426 format %{ "addss $dst, $src" %}
17427 ins_cost(150);
17428 ins_encode %{
17429 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17430 %}
17431 ins_pipe(pipe_slow);
17432 %}
17433
17434 instruct addF_mem(regF dst, memory src) %{
17435 predicate(UseAVX == 0);
17436 match(Set dst (AddF dst (LoadF src)));
17437
17438 format %{ "addss $dst, $src" %}
17439 ins_cost(150);
17440 ins_encode %{
17441 __ addss($dst$$XMMRegister, $src$$Address);
17442 %}
17443 ins_pipe(pipe_slow);
17444 %}
17445
17446 instruct addF_imm(regF dst, immF con) %{
17447 predicate(UseAVX == 0);
17448 match(Set dst (AddF dst con));
17449 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17450 ins_cost(150);
17451 ins_encode %{
17452 __ addss($dst$$XMMRegister, $constantaddress($con));
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17458 predicate(UseAVX > 0);
17459 match(Set dst (AddF src1 src2));
17460
17461 format %{ "vaddss $dst, $src1, $src2" %}
17462 ins_cost(150);
17463 ins_encode %{
17464 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17465 %}
17466 ins_pipe(pipe_slow);
17467 %}
17468
17469 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17470 predicate(UseAVX > 0);
17471 match(Set dst (AddF src1 (LoadF src2)));
17472
17473 format %{ "vaddss $dst, $src1, $src2" %}
17474 ins_cost(150);
17475 ins_encode %{
17476 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17477 %}
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17482 predicate(UseAVX > 0);
17483 match(Set dst (AddF src con));
17484
17485 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17486 ins_cost(150);
17487 ins_encode %{
17488 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 instruct addD_reg(regD dst, regD src) %{
17494 predicate(UseAVX == 0);
17495 match(Set dst (AddD dst src));
17496
17497 format %{ "addsd $dst, $src" %}
17498 ins_cost(150);
17499 ins_encode %{
17500 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17501 %}
17502 ins_pipe(pipe_slow);
17503 %}
17504
17505 instruct addD_mem(regD dst, memory src) %{
17506 predicate(UseAVX == 0);
17507 match(Set dst (AddD dst (LoadD src)));
17508
17509 format %{ "addsd $dst, $src" %}
17510 ins_cost(150);
17511 ins_encode %{
17512 __ addsd($dst$$XMMRegister, $src$$Address);
17513 %}
17514 ins_pipe(pipe_slow);
17515 %}
17516
17517 instruct addD_imm(regD dst, immD con) %{
17518 predicate(UseAVX == 0);
17519 match(Set dst (AddD dst con));
17520 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17521 ins_cost(150);
17522 ins_encode %{
17523 __ addsd($dst$$XMMRegister, $constantaddress($con));
17524 %}
17525 ins_pipe(pipe_slow);
17526 %}
17527
17528 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17529 predicate(UseAVX > 0);
17530 match(Set dst (AddD src1 src2));
17531
17532 format %{ "vaddsd $dst, $src1, $src2" %}
17533 ins_cost(150);
17534 ins_encode %{
17535 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17536 %}
17537 ins_pipe(pipe_slow);
17538 %}
17539
17540 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17541 predicate(UseAVX > 0);
17542 match(Set dst (AddD src1 (LoadD src2)));
17543
17544 format %{ "vaddsd $dst, $src1, $src2" %}
17545 ins_cost(150);
17546 ins_encode %{
17547 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17553 predicate(UseAVX > 0);
17554 match(Set dst (AddD src con));
17555
17556 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17557 ins_cost(150);
17558 ins_encode %{
17559 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17560 %}
17561 ins_pipe(pipe_slow);
17562 %}
17563
17564 instruct subF_reg(regF dst, regF src) %{
17565 predicate(UseAVX == 0);
17566 match(Set dst (SubF dst src));
17567
17568 format %{ "subss $dst, $src" %}
17569 ins_cost(150);
17570 ins_encode %{
17571 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17572 %}
17573 ins_pipe(pipe_slow);
17574 %}
17575
17576 instruct subF_mem(regF dst, memory src) %{
17577 predicate(UseAVX == 0);
17578 match(Set dst (SubF dst (LoadF src)));
17579
17580 format %{ "subss $dst, $src" %}
17581 ins_cost(150);
17582 ins_encode %{
17583 __ subss($dst$$XMMRegister, $src$$Address);
17584 %}
17585 ins_pipe(pipe_slow);
17586 %}
17587
17588 instruct subF_imm(regF dst, immF con) %{
17589 predicate(UseAVX == 0);
17590 match(Set dst (SubF dst con));
17591 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17592 ins_cost(150);
17593 ins_encode %{
17594 __ subss($dst$$XMMRegister, $constantaddress($con));
17595 %}
17596 ins_pipe(pipe_slow);
17597 %}
17598
17599 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17600 predicate(UseAVX > 0);
17601 match(Set dst (SubF src1 src2));
17602
17603 format %{ "vsubss $dst, $src1, $src2" %}
17604 ins_cost(150);
17605 ins_encode %{
17606 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17607 %}
17608 ins_pipe(pipe_slow);
17609 %}
17610
17611 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17612 predicate(UseAVX > 0);
17613 match(Set dst (SubF src1 (LoadF src2)));
17614
17615 format %{ "vsubss $dst, $src1, $src2" %}
17616 ins_cost(150);
17617 ins_encode %{
17618 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17619 %}
17620 ins_pipe(pipe_slow);
17621 %}
17622
17623 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17624 predicate(UseAVX > 0);
17625 match(Set dst (SubF src con));
17626
17627 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17628 ins_cost(150);
17629 ins_encode %{
17630 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17631 %}
17632 ins_pipe(pipe_slow);
17633 %}
17634
17635 instruct subD_reg(regD dst, regD src) %{
17636 predicate(UseAVX == 0);
17637 match(Set dst (SubD dst src));
17638
17639 format %{ "subsd $dst, $src" %}
17640 ins_cost(150);
17641 ins_encode %{
17642 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17643 %}
17644 ins_pipe(pipe_slow);
17645 %}
17646
17647 instruct subD_mem(regD dst, memory src) %{
17648 predicate(UseAVX == 0);
17649 match(Set dst (SubD dst (LoadD src)));
17650
17651 format %{ "subsd $dst, $src" %}
17652 ins_cost(150);
17653 ins_encode %{
17654 __ subsd($dst$$XMMRegister, $src$$Address);
17655 %}
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 instruct subD_imm(regD dst, immD con) %{
17660 predicate(UseAVX == 0);
17661 match(Set dst (SubD dst con));
17662 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17663 ins_cost(150);
17664 ins_encode %{
17665 __ subsd($dst$$XMMRegister, $constantaddress($con));
17666 %}
17667 ins_pipe(pipe_slow);
17668 %}
17669
17670 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17671 predicate(UseAVX > 0);
17672 match(Set dst (SubD src1 src2));
17673
17674 format %{ "vsubsd $dst, $src1, $src2" %}
17675 ins_cost(150);
17676 ins_encode %{
17677 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17678 %}
17679 ins_pipe(pipe_slow);
17680 %}
17681
17682 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17683 predicate(UseAVX > 0);
17684 match(Set dst (SubD src1 (LoadD src2)));
17685
17686 format %{ "vsubsd $dst, $src1, $src2" %}
17687 ins_cost(150);
17688 ins_encode %{
17689 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17690 %}
17691 ins_pipe(pipe_slow);
17692 %}
17693
17694 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17695 predicate(UseAVX > 0);
17696 match(Set dst (SubD src con));
17697
17698 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17699 ins_cost(150);
17700 ins_encode %{
17701 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17702 %}
17703 ins_pipe(pipe_slow);
17704 %}
17705
17706 instruct mulF_reg(regF dst, regF src) %{
17707 predicate(UseAVX == 0);
17708 match(Set dst (MulF dst src));
17709
17710 format %{ "mulss $dst, $src" %}
17711 ins_cost(150);
17712 ins_encode %{
17713 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17714 %}
17715 ins_pipe(pipe_slow);
17716 %}
17717
17718 instruct mulF_mem(regF dst, memory src) %{
17719 predicate(UseAVX == 0);
17720 match(Set dst (MulF dst (LoadF src)));
17721
17722 format %{ "mulss $dst, $src" %}
17723 ins_cost(150);
17724 ins_encode %{
17725 __ mulss($dst$$XMMRegister, $src$$Address);
17726 %}
17727 ins_pipe(pipe_slow);
17728 %}
17729
17730 instruct mulF_imm(regF dst, immF con) %{
17731 predicate(UseAVX == 0);
17732 match(Set dst (MulF dst con));
17733 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17734 ins_cost(150);
17735 ins_encode %{
17736 __ mulss($dst$$XMMRegister, $constantaddress($con));
17737 %}
17738 ins_pipe(pipe_slow);
17739 %}
17740
17741 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17742 predicate(UseAVX > 0);
17743 match(Set dst (MulF src1 src2));
17744
17745 format %{ "vmulss $dst, $src1, $src2" %}
17746 ins_cost(150);
17747 ins_encode %{
17748 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17749 %}
17750 ins_pipe(pipe_slow);
17751 %}
17752
17753 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17754 predicate(UseAVX > 0);
17755 match(Set dst (MulF src1 (LoadF src2)));
17756
17757 format %{ "vmulss $dst, $src1, $src2" %}
17758 ins_cost(150);
17759 ins_encode %{
17760 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17761 %}
17762 ins_pipe(pipe_slow);
17763 %}
17764
17765 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17766 predicate(UseAVX > 0);
17767 match(Set dst (MulF src con));
17768
17769 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17770 ins_cost(150);
17771 ins_encode %{
17772 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct mulD_reg(regD dst, regD src) %{
17778 predicate(UseAVX == 0);
17779 match(Set dst (MulD dst src));
17780
17781 format %{ "mulsd $dst, $src" %}
17782 ins_cost(150);
17783 ins_encode %{
17784 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17785 %}
17786 ins_pipe(pipe_slow);
17787 %}
17788
17789 instruct mulD_mem(regD dst, memory src) %{
17790 predicate(UseAVX == 0);
17791 match(Set dst (MulD dst (LoadD src)));
17792
17793 format %{ "mulsd $dst, $src" %}
17794 ins_cost(150);
17795 ins_encode %{
17796 __ mulsd($dst$$XMMRegister, $src$$Address);
17797 %}
17798 ins_pipe(pipe_slow);
17799 %}
17800
17801 instruct mulD_imm(regD dst, immD con) %{
17802 predicate(UseAVX == 0);
17803 match(Set dst (MulD dst con));
17804 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17805 ins_cost(150);
17806 ins_encode %{
17807 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17808 %}
17809 ins_pipe(pipe_slow);
17810 %}
17811
17812 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17813 predicate(UseAVX > 0);
17814 match(Set dst (MulD src1 src2));
17815
17816 format %{ "vmulsd $dst, $src1, $src2" %}
17817 ins_cost(150);
17818 ins_encode %{
17819 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17820 %}
17821 ins_pipe(pipe_slow);
17822 %}
17823
17824 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17825 predicate(UseAVX > 0);
17826 match(Set dst (MulD src1 (LoadD src2)));
17827
17828 format %{ "vmulsd $dst, $src1, $src2" %}
17829 ins_cost(150);
17830 ins_encode %{
17831 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17832 %}
17833 ins_pipe(pipe_slow);
17834 %}
17835
17836 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17837 predicate(UseAVX > 0);
17838 match(Set dst (MulD src con));
17839
17840 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17841 ins_cost(150);
17842 ins_encode %{
17843 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17844 %}
17845 ins_pipe(pipe_slow);
17846 %}
17847
17848 instruct divF_reg(regF dst, regF src) %{
17849 predicate(UseAVX == 0);
17850 match(Set dst (DivF dst src));
17851
17852 format %{ "divss $dst, $src" %}
17853 ins_cost(150);
17854 ins_encode %{
17855 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17856 %}
17857 ins_pipe(pipe_slow);
17858 %}
17859
17860 instruct divF_mem(regF dst, memory src) %{
17861 predicate(UseAVX == 0);
17862 match(Set dst (DivF dst (LoadF src)));
17863
17864 format %{ "divss $dst, $src" %}
17865 ins_cost(150);
17866 ins_encode %{
17867 __ divss($dst$$XMMRegister, $src$$Address);
17868 %}
17869 ins_pipe(pipe_slow);
17870 %}
17871
17872 instruct divF_imm(regF dst, immF con) %{
17873 predicate(UseAVX == 0);
17874 match(Set dst (DivF dst con));
17875 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17876 ins_cost(150);
17877 ins_encode %{
17878 __ divss($dst$$XMMRegister, $constantaddress($con));
17879 %}
17880 ins_pipe(pipe_slow);
17881 %}
17882
17883 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17884 predicate(UseAVX > 0);
17885 match(Set dst (DivF src1 src2));
17886
17887 format %{ "vdivss $dst, $src1, $src2" %}
17888 ins_cost(150);
17889 ins_encode %{
17890 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17891 %}
17892 ins_pipe(pipe_slow);
17893 %}
17894
17895 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17896 predicate(UseAVX > 0);
17897 match(Set dst (DivF src1 (LoadF src2)));
17898
17899 format %{ "vdivss $dst, $src1, $src2" %}
17900 ins_cost(150);
17901 ins_encode %{
17902 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17903 %}
17904 ins_pipe(pipe_slow);
17905 %}
17906
17907 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17908 predicate(UseAVX > 0);
17909 match(Set dst (DivF src con));
17910
17911 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17912 ins_cost(150);
17913 ins_encode %{
17914 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17915 %}
17916 ins_pipe(pipe_slow);
17917 %}
17918
17919 instruct divD_reg(regD dst, regD src) %{
17920 predicate(UseAVX == 0);
17921 match(Set dst (DivD dst src));
17922
17923 format %{ "divsd $dst, $src" %}
17924 ins_cost(150);
17925 ins_encode %{
17926 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17927 %}
17928 ins_pipe(pipe_slow);
17929 %}
17930
17931 instruct divD_mem(regD dst, memory src) %{
17932 predicate(UseAVX == 0);
17933 match(Set dst (DivD dst (LoadD src)));
17934
17935 format %{ "divsd $dst, $src" %}
17936 ins_cost(150);
17937 ins_encode %{
17938 __ divsd($dst$$XMMRegister, $src$$Address);
17939 %}
17940 ins_pipe(pipe_slow);
17941 %}
17942
17943 instruct divD_imm(regD dst, immD con) %{
17944 predicate(UseAVX == 0);
17945 match(Set dst (DivD dst con));
17946 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17947 ins_cost(150);
17948 ins_encode %{
17949 __ divsd($dst$$XMMRegister, $constantaddress($con));
17950 %}
17951 ins_pipe(pipe_slow);
17952 %}
17953
17954 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17955 predicate(UseAVX > 0);
17956 match(Set dst (DivD src1 src2));
17957
17958 format %{ "vdivsd $dst, $src1, $src2" %}
17959 ins_cost(150);
17960 ins_encode %{
17961 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17962 %}
17963 ins_pipe(pipe_slow);
17964 %}
17965
17966 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17967 predicate(UseAVX > 0);
17968 match(Set dst (DivD src1 (LoadD src2)));
17969
17970 format %{ "vdivsd $dst, $src1, $src2" %}
17971 ins_cost(150);
17972 ins_encode %{
17973 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17974 %}
17975 ins_pipe(pipe_slow);
17976 %}
17977
17978 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17979 predicate(UseAVX > 0);
17980 match(Set dst (DivD src con));
17981
17982 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17983 ins_cost(150);
17984 ins_encode %{
17985 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17986 %}
17987 ins_pipe(pipe_slow);
17988 %}
17989
17990 instruct absF_reg(regF dst) %{
17991 predicate(UseAVX == 0);
17992 match(Set dst (AbsF dst));
17993 ins_cost(150);
17994 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17995 ins_encode %{
17996 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17997 %}
17998 ins_pipe(pipe_slow);
17999 %}
18000
18001 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18002 predicate(UseAVX > 0);
18003 match(Set dst (AbsF src));
18004 ins_cost(150);
18005 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18006 ins_encode %{
18007 int vlen_enc = Assembler::AVX_128bit;
18008 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18009 ExternalAddress(float_signmask()), vlen_enc);
18010 %}
18011 ins_pipe(pipe_slow);
18012 %}
18013
18014 instruct absD_reg(regD dst) %{
18015 predicate(UseAVX == 0);
18016 match(Set dst (AbsD dst));
18017 ins_cost(150);
18018 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18019 "# abs double by sign masking" %}
18020 ins_encode %{
18021 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18022 %}
18023 ins_pipe(pipe_slow);
18024 %}
18025
18026 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18027 predicate(UseAVX > 0);
18028 match(Set dst (AbsD src));
18029 ins_cost(150);
18030 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18031 "# abs double by sign masking" %}
18032 ins_encode %{
18033 int vlen_enc = Assembler::AVX_128bit;
18034 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18035 ExternalAddress(double_signmask()), vlen_enc);
18036 %}
18037 ins_pipe(pipe_slow);
18038 %}
18039
18040 instruct negF_reg(regF dst) %{
18041 predicate(UseAVX == 0);
18042 match(Set dst (NegF dst));
18043 ins_cost(150);
18044 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18045 ins_encode %{
18046 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18047 %}
18048 ins_pipe(pipe_slow);
18049 %}
18050
18051 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18052 predicate(UseAVX > 0);
18053 match(Set dst (NegF src));
18054 ins_cost(150);
18055 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18056 ins_encode %{
18057 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18058 ExternalAddress(float_signflip()));
18059 %}
18060 ins_pipe(pipe_slow);
18061 %}
18062
18063 instruct negD_reg(regD dst) %{
18064 predicate(UseAVX == 0);
18065 match(Set dst (NegD dst));
18066 ins_cost(150);
18067 format %{ "xorpd $dst, [0x8000000000000000]\t"
18068 "# neg double by sign flipping" %}
18069 ins_encode %{
18070 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18071 %}
18072 ins_pipe(pipe_slow);
18073 %}
18074
18075 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18076 predicate(UseAVX > 0);
18077 match(Set dst (NegD src));
18078 ins_cost(150);
18079 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18080 "# neg double by sign flipping" %}
18081 ins_encode %{
18082 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18083 ExternalAddress(double_signflip()));
18084 %}
18085 ins_pipe(pipe_slow);
18086 %}
18087
18088 // sqrtss instruction needs destination register to be pre initialized for best performance
18089 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18090 instruct sqrtF_reg(regF dst) %{
18091 match(Set dst (SqrtF dst));
18092 format %{ "sqrtss $dst, $dst" %}
18093 ins_encode %{
18094 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18095 %}
18096 ins_pipe(pipe_slow);
18097 %}
18098
18099 // sqrtsd instruction needs destination register to be pre initialized for best performance
18100 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18101 instruct sqrtD_reg(regD dst) %{
18102 match(Set dst (SqrtD dst));
18103 format %{ "sqrtsd $dst, $dst" %}
18104 ins_encode %{
18105 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18106 %}
18107 ins_pipe(pipe_slow);
18108 %}
18109
18110 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18111 effect(TEMP tmp);
18112 match(Set dst (ConvF2HF src));
18113 ins_cost(125);
18114 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18115 ins_encode %{
18116 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18117 %}
18118 ins_pipe( pipe_slow );
18119 %}
18120
18121 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18122 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18123 effect(TEMP ktmp, TEMP rtmp);
18124 match(Set mem (StoreC mem (ConvF2HF src)));
18125 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18126 ins_encode %{
18127 __ movl($rtmp$$Register, 0x1);
18128 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18129 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18130 %}
18131 ins_pipe( pipe_slow );
18132 %}
18133
18134 instruct vconvF2HF(vec dst, vec src) %{
18135 match(Set dst (VectorCastF2HF src));
18136 format %{ "vector_conv_F2HF $dst $src" %}
18137 ins_encode %{
18138 int vlen_enc = vector_length_encoding(this, $src);
18139 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18140 %}
18141 ins_pipe( pipe_slow );
18142 %}
18143
18144 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18145 predicate(n->as_StoreVector()->memory_size() >= 16);
18146 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18147 format %{ "vcvtps2ph $mem,$src" %}
18148 ins_encode %{
18149 int vlen_enc = vector_length_encoding(this, $src);
18150 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18151 %}
18152 ins_pipe( pipe_slow );
18153 %}
18154
18155 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18156 match(Set dst (ConvHF2F src));
18157 format %{ "vcvtph2ps $dst,$src" %}
18158 ins_encode %{
18159 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18160 %}
18161 ins_pipe( pipe_slow );
18162 %}
18163
18164 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18165 match(Set dst (VectorCastHF2F (LoadVector mem)));
18166 format %{ "vcvtph2ps $dst,$mem" %}
18167 ins_encode %{
18168 int vlen_enc = vector_length_encoding(this);
18169 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18170 %}
18171 ins_pipe( pipe_slow );
18172 %}
18173
18174 instruct vconvHF2F(vec dst, vec src) %{
18175 match(Set dst (VectorCastHF2F src));
18176 ins_cost(125);
18177 format %{ "vector_conv_HF2F $dst,$src" %}
18178 ins_encode %{
18179 int vlen_enc = vector_length_encoding(this);
18180 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18181 %}
18182 ins_pipe( pipe_slow );
18183 %}
18184
18185 // ---------------------------------------- VectorReinterpret ------------------------------------
18186 instruct reinterpret_mask(kReg dst) %{
18187 predicate(n->bottom_type()->isa_vectmask() &&
18188 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18189 match(Set dst (VectorReinterpret dst));
18190 ins_cost(125);
18191 format %{ "vector_reinterpret $dst\t!" %}
18192 ins_encode %{
18193 // empty
18194 %}
18195 ins_pipe( pipe_slow );
18196 %}
18197
18198 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18199 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18200 n->bottom_type()->isa_vectmask() &&
18201 n->in(1)->bottom_type()->isa_vectmask() &&
18202 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18203 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18204 match(Set dst (VectorReinterpret src));
18205 effect(TEMP xtmp);
18206 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18207 ins_encode %{
18208 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18209 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18210 assert(src_sz == dst_sz , "src and dst size mismatch");
18211 int vlen_enc = vector_length_encoding(src_sz);
18212 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18213 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18214 %}
18215 ins_pipe( pipe_slow );
18216 %}
18217
18218 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18219 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18220 n->bottom_type()->isa_vectmask() &&
18221 n->in(1)->bottom_type()->isa_vectmask() &&
18222 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18223 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18224 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18225 match(Set dst (VectorReinterpret src));
18226 effect(TEMP xtmp);
18227 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18228 ins_encode %{
18229 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18230 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18231 assert(src_sz == dst_sz , "src and dst size mismatch");
18232 int vlen_enc = vector_length_encoding(src_sz);
18233 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18234 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18235 %}
18236 ins_pipe( pipe_slow );
18237 %}
18238
18239 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18240 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18241 n->bottom_type()->isa_vectmask() &&
18242 n->in(1)->bottom_type()->isa_vectmask() &&
18243 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18244 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18245 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18246 match(Set dst (VectorReinterpret src));
18247 effect(TEMP xtmp);
18248 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18249 ins_encode %{
18250 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18251 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18252 assert(src_sz == dst_sz , "src and dst size mismatch");
18253 int vlen_enc = vector_length_encoding(src_sz);
18254 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18255 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18256 %}
18257 ins_pipe( pipe_slow );
18258 %}
18259
18260 instruct reinterpret(vec dst) %{
18261 predicate(!n->bottom_type()->isa_vectmask() &&
18262 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18263 match(Set dst (VectorReinterpret dst));
18264 ins_cost(125);
18265 format %{ "vector_reinterpret $dst\t!" %}
18266 ins_encode %{
18267 // empty
18268 %}
18269 ins_pipe( pipe_slow );
18270 %}
18271
18272 instruct reinterpret_expand(vec dst, vec src) %{
18273 predicate(UseAVX == 0 &&
18274 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18275 match(Set dst (VectorReinterpret src));
18276 ins_cost(125);
18277 effect(TEMP dst);
18278 format %{ "vector_reinterpret_expand $dst,$src" %}
18279 ins_encode %{
18280 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18281 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18282
18283 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18284 if (src_vlen_in_bytes == 4) {
18285 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18286 } else {
18287 assert(src_vlen_in_bytes == 8, "");
18288 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18289 }
18290 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18291 %}
18292 ins_pipe( pipe_slow );
18293 %}
18294
18295 instruct vreinterpret_expand4(legVec dst, vec src) %{
18296 predicate(UseAVX > 0 &&
18297 !n->bottom_type()->isa_vectmask() &&
18298 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18299 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18300 match(Set dst (VectorReinterpret src));
18301 ins_cost(125);
18302 format %{ "vector_reinterpret_expand $dst,$src" %}
18303 ins_encode %{
18304 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18305 %}
18306 ins_pipe( pipe_slow );
18307 %}
18308
18309
18310 instruct vreinterpret_expand(legVec dst, vec src) %{
18311 predicate(UseAVX > 0 &&
18312 !n->bottom_type()->isa_vectmask() &&
18313 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18314 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18315 match(Set dst (VectorReinterpret src));
18316 ins_cost(125);
18317 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18318 ins_encode %{
18319 switch (Matcher::vector_length_in_bytes(this, $src)) {
18320 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18321 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18322 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18323 default: ShouldNotReachHere();
18324 }
18325 %}
18326 ins_pipe( pipe_slow );
18327 %}
18328
18329 instruct reinterpret_shrink(vec dst, legVec src) %{
18330 predicate(!n->bottom_type()->isa_vectmask() &&
18331 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18332 match(Set dst (VectorReinterpret src));
18333 ins_cost(125);
18334 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18335 ins_encode %{
18336 switch (Matcher::vector_length_in_bytes(this)) {
18337 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18338 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18339 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18340 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18341 default: ShouldNotReachHere();
18342 }
18343 %}
18344 ins_pipe( pipe_slow );
18345 %}
18346
18347 // ----------------------------------------------------------------------------------------------------
18348
18349 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18350 match(Set dst (RoundDoubleMode src rmode));
18351 format %{ "roundsd $dst,$src" %}
18352 ins_cost(150);
18353 ins_encode %{
18354 assert(UseSSE >= 4, "required");
18355 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18356 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18357 }
18358 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18359 %}
18360 ins_pipe(pipe_slow);
18361 %}
18362
18363 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18364 match(Set dst (RoundDoubleMode con rmode));
18365 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18366 ins_cost(150);
18367 ins_encode %{
18368 assert(UseSSE >= 4, "required");
18369 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18370 %}
18371 ins_pipe(pipe_slow);
18372 %}
18373
18374 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18375 predicate(Matcher::vector_length(n) < 8);
18376 match(Set dst (RoundDoubleModeV src rmode));
18377 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18378 ins_encode %{
18379 assert(UseAVX > 0, "required");
18380 int vlen_enc = vector_length_encoding(this);
18381 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18382 %}
18383 ins_pipe( pipe_slow );
18384 %}
18385
18386 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18387 predicate(Matcher::vector_length(n) == 8);
18388 match(Set dst (RoundDoubleModeV src rmode));
18389 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18390 ins_encode %{
18391 assert(UseAVX > 2, "required");
18392 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18393 %}
18394 ins_pipe( pipe_slow );
18395 %}
18396
18397 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18398 predicate(Matcher::vector_length(n) < 8);
18399 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18400 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18401 ins_encode %{
18402 assert(UseAVX > 0, "required");
18403 int vlen_enc = vector_length_encoding(this);
18404 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18405 %}
18406 ins_pipe( pipe_slow );
18407 %}
18408
18409 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18410 predicate(Matcher::vector_length(n) == 8);
18411 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18412 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18413 ins_encode %{
18414 assert(UseAVX > 2, "required");
18415 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18416 %}
18417 ins_pipe( pipe_slow );
18418 %}
18419
18420 instruct onspinwait() %{
18421 match(OnSpinWait);
18422 ins_cost(200);
18423
18424 format %{
18425 $$template
18426 $$emit$$"pause\t! membar_onspinwait"
18427 %}
18428 ins_encode %{
18429 __ pause();
18430 %}
18431 ins_pipe(pipe_slow);
18432 %}
18433
18434 // a * b + c
18435 instruct fmaD_reg(regD a, regD b, regD c) %{
18436 match(Set c (FmaD c (Binary a b)));
18437 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18438 ins_cost(150);
18439 ins_encode %{
18440 assert(UseFMA, "Needs FMA instructions support.");
18441 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18442 %}
18443 ins_pipe( pipe_slow );
18444 %}
18445
18446 // a * b + c
18447 instruct fmaF_reg(regF a, regF b, regF c) %{
18448 match(Set c (FmaF c (Binary a b)));
18449 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18450 ins_cost(150);
18451 ins_encode %{
18452 assert(UseFMA, "Needs FMA instructions support.");
18453 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18454 %}
18455 ins_pipe( pipe_slow );
18456 %}
18457
18458 // ====================VECTOR INSTRUCTIONS=====================================
18459
18460 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18461 instruct MoveVec2Leg(legVec dst, vec src) %{
18462 match(Set dst src);
18463 format %{ "" %}
18464 ins_encode %{
18465 ShouldNotReachHere();
18466 %}
18467 ins_pipe( fpu_reg_reg );
18468 %}
18469
18470 instruct MoveLeg2Vec(vec dst, legVec src) %{
18471 match(Set dst src);
18472 format %{ "" %}
18473 ins_encode %{
18474 ShouldNotReachHere();
18475 %}
18476 ins_pipe( fpu_reg_reg );
18477 %}
18478
18479 // ============================================================================
18480
18481 // Load vectors generic operand pattern
18482 instruct loadV(vec dst, memory mem) %{
18483 match(Set dst (LoadVector mem));
18484 ins_cost(125);
18485 format %{ "load_vector $dst,$mem" %}
18486 ins_encode %{
18487 BasicType bt = Matcher::vector_element_basic_type(this);
18488 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18489 %}
18490 ins_pipe( pipe_slow );
18491 %}
18492
18493 // Store vectors generic operand pattern.
18494 instruct storeV(memory mem, vec src) %{
18495 match(Set mem (StoreVector mem src));
18496 ins_cost(145);
18497 format %{ "store_vector $mem,$src\n\t" %}
18498 ins_encode %{
18499 switch (Matcher::vector_length_in_bytes(this, $src)) {
18500 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18501 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18502 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18503 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18504 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18505 default: ShouldNotReachHere();
18506 }
18507 %}
18508 ins_pipe( pipe_slow );
18509 %}
18510
18511 // ---------------------------------------- Gather ------------------------------------
18512
18513 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18514
18515 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18516 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18517 Matcher::vector_length_in_bytes(n) <= 32);
18518 match(Set dst (LoadVectorGather mem idx));
18519 effect(TEMP dst, TEMP tmp, TEMP mask);
18520 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18521 ins_encode %{
18522 int vlen_enc = vector_length_encoding(this);
18523 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18524 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18525 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18526 __ lea($tmp$$Register, $mem$$Address);
18527 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18528 %}
18529 ins_pipe( pipe_slow );
18530 %}
18531
18532
18533 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18534 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18535 !is_subword_type(Matcher::vector_element_basic_type(n)));
18536 match(Set dst (LoadVectorGather mem idx));
18537 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18538 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18539 ins_encode %{
18540 int vlen_enc = vector_length_encoding(this);
18541 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18542 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18543 __ lea($tmp$$Register, $mem$$Address);
18544 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18545 %}
18546 ins_pipe( pipe_slow );
18547 %}
18548
18549 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18550 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18551 !is_subword_type(Matcher::vector_element_basic_type(n)));
18552 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18553 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18554 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18555 ins_encode %{
18556 assert(UseAVX > 2, "sanity");
18557 int vlen_enc = vector_length_encoding(this);
18558 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18559 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18560 // Note: Since gather instruction partially updates the opmask register used
18561 // for predication hense moving mask operand to a temporary.
18562 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18563 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18564 __ lea($tmp$$Register, $mem$$Address);
18565 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18566 %}
18567 ins_pipe( pipe_slow );
18568 %}
18569
18570 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18571 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18572 match(Set dst (LoadVectorGather mem idx_base));
18573 effect(TEMP tmp, TEMP rtmp);
18574 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18575 ins_encode %{
18576 int vlen_enc = vector_length_encoding(this);
18577 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18578 __ lea($tmp$$Register, $mem$$Address);
18579 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18580 %}
18581 ins_pipe( pipe_slow );
18582 %}
18583
18584 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18585 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18586 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18587 match(Set dst (LoadVectorGather mem idx_base));
18588 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18589 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18590 ins_encode %{
18591 int vlen_enc = vector_length_encoding(this);
18592 int vector_len = Matcher::vector_length(this);
18593 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18594 __ lea($tmp$$Register, $mem$$Address);
18595 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18596 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18597 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18598 %}
18599 ins_pipe( pipe_slow );
18600 %}
18601
18602 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18603 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18604 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18605 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18606 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18607 ins_encode %{
18608 int vlen_enc = vector_length_encoding(this);
18609 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18610 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18611 __ lea($tmp$$Register, $mem$$Address);
18612 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18613 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18614 %}
18615 ins_pipe( pipe_slow );
18616 %}
18617
18618 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18619 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18620 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18621 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18622 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18623 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18624 ins_encode %{
18625 int vlen_enc = vector_length_encoding(this);
18626 int vector_len = Matcher::vector_length(this);
18627 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18628 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18629 __ lea($tmp$$Register, $mem$$Address);
18630 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18631 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18632 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18633 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18634 %}
18635 ins_pipe( pipe_slow );
18636 %}
18637
18638 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18639 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18640 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18641 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18642 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18643 ins_encode %{
18644 int vlen_enc = vector_length_encoding(this);
18645 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18646 __ lea($tmp$$Register, $mem$$Address);
18647 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18648 if (elem_bt == T_SHORT) {
18649 __ movl($mask_idx$$Register, 0x55555555);
18650 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18651 }
18652 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18653 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18654 %}
18655 ins_pipe( pipe_slow );
18656 %}
18657
18658 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18659 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18660 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18661 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18662 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18663 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18664 ins_encode %{
18665 int vlen_enc = vector_length_encoding(this);
18666 int vector_len = Matcher::vector_length(this);
18667 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18668 __ lea($tmp$$Register, $mem$$Address);
18669 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18670 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18671 if (elem_bt == T_SHORT) {
18672 __ movl($mask_idx$$Register, 0x55555555);
18673 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18674 }
18675 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18676 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18677 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18678 %}
18679 ins_pipe( pipe_slow );
18680 %}
18681
18682 // ====================Scatter=======================================
18683
18684 // Scatter INT, LONG, FLOAT, DOUBLE
18685
18686 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18687 predicate(UseAVX > 2);
18688 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18689 effect(TEMP tmp, TEMP ktmp);
18690 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18691 ins_encode %{
18692 int vlen_enc = vector_length_encoding(this, $src);
18693 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18694
18695 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18696 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18697
18698 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18699 __ lea($tmp$$Register, $mem$$Address);
18700 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18701 %}
18702 ins_pipe( pipe_slow );
18703 %}
18704
18705 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18706 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18707 effect(TEMP tmp, TEMP ktmp);
18708 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18709 ins_encode %{
18710 int vlen_enc = vector_length_encoding(this, $src);
18711 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18712 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18713 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18714 // Note: Since scatter instruction partially updates the opmask register used
18715 // for predication hense moving mask operand to a temporary.
18716 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18717 __ lea($tmp$$Register, $mem$$Address);
18718 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18719 %}
18720 ins_pipe( pipe_slow );
18721 %}
18722
18723 // ====================REPLICATE=======================================
18724
18725 // Replicate byte scalar to be vector
18726 instruct vReplB_reg(vec dst, rRegI src) %{
18727 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18728 match(Set dst (Replicate src));
18729 format %{ "replicateB $dst,$src" %}
18730 ins_encode %{
18731 uint vlen = Matcher::vector_length(this);
18732 if (UseAVX >= 2) {
18733 int vlen_enc = vector_length_encoding(this);
18734 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18735 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18736 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18737 } else {
18738 __ movdl($dst$$XMMRegister, $src$$Register);
18739 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18740 }
18741 } else {
18742 assert(UseAVX < 2, "");
18743 __ movdl($dst$$XMMRegister, $src$$Register);
18744 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18745 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18746 if (vlen >= 16) {
18747 assert(vlen == 16, "");
18748 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18749 }
18750 }
18751 %}
18752 ins_pipe( pipe_slow );
18753 %}
18754
18755 instruct ReplB_mem(vec dst, memory mem) %{
18756 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18757 match(Set dst (Replicate (LoadB mem)));
18758 format %{ "replicateB $dst,$mem" %}
18759 ins_encode %{
18760 int vlen_enc = vector_length_encoding(this);
18761 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18762 %}
18763 ins_pipe( pipe_slow );
18764 %}
18765
18766 // ====================ReplicateS=======================================
18767
18768 instruct vReplS_reg(vec dst, rRegI src) %{
18769 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18770 match(Set dst (Replicate src));
18771 format %{ "replicateS $dst,$src" %}
18772 ins_encode %{
18773 uint vlen = Matcher::vector_length(this);
18774 int vlen_enc = vector_length_encoding(this);
18775 if (UseAVX >= 2) {
18776 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18777 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18778 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18779 } else {
18780 __ movdl($dst$$XMMRegister, $src$$Register);
18781 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18782 }
18783 } else {
18784 assert(UseAVX < 2, "");
18785 __ movdl($dst$$XMMRegister, $src$$Register);
18786 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18787 if (vlen >= 8) {
18788 assert(vlen == 8, "");
18789 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18790 }
18791 }
18792 %}
18793 ins_pipe( pipe_slow );
18794 %}
18795
18796 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18797 match(Set dst (Replicate con));
18798 effect(TEMP rtmp);
18799 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18800 ins_encode %{
18801 int vlen_enc = vector_length_encoding(this);
18802 BasicType bt = Matcher::vector_element_basic_type(this);
18803 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18804 __ movl($rtmp$$Register, $con$$constant);
18805 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18806 %}
18807 ins_pipe( pipe_slow );
18808 %}
18809
18810 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18811 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18812 match(Set dst (Replicate src));
18813 effect(TEMP rtmp);
18814 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18815 ins_encode %{
18816 int vlen_enc = vector_length_encoding(this);
18817 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18818 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18819 %}
18820 ins_pipe( pipe_slow );
18821 %}
18822
18823 instruct ReplS_mem(vec dst, memory mem) %{
18824 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18825 match(Set dst (Replicate (LoadS mem)));
18826 format %{ "replicateS $dst,$mem" %}
18827 ins_encode %{
18828 int vlen_enc = vector_length_encoding(this);
18829 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 // ====================ReplicateI=======================================
18835
18836 instruct ReplI_reg(vec dst, rRegI src) %{
18837 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18838 match(Set dst (Replicate src));
18839 format %{ "replicateI $dst,$src" %}
18840 ins_encode %{
18841 uint vlen = Matcher::vector_length(this);
18842 int vlen_enc = vector_length_encoding(this);
18843 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18844 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18845 } else if (VM_Version::supports_avx2()) {
18846 __ movdl($dst$$XMMRegister, $src$$Register);
18847 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848 } else {
18849 __ movdl($dst$$XMMRegister, $src$$Register);
18850 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18851 }
18852 %}
18853 ins_pipe( pipe_slow );
18854 %}
18855
18856 instruct ReplI_mem(vec dst, memory mem) %{
18857 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18858 match(Set dst (Replicate (LoadI mem)));
18859 format %{ "replicateI $dst,$mem" %}
18860 ins_encode %{
18861 int vlen_enc = vector_length_encoding(this);
18862 if (VM_Version::supports_avx2()) {
18863 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18864 } else if (VM_Version::supports_avx()) {
18865 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18866 } else {
18867 __ movdl($dst$$XMMRegister, $mem$$Address);
18868 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18869 }
18870 %}
18871 ins_pipe( pipe_slow );
18872 %}
18873
18874 instruct ReplI_imm(vec dst, immI con) %{
18875 predicate(Matcher::is_non_long_integral_vector(n));
18876 match(Set dst (Replicate con));
18877 format %{ "replicateI $dst,$con" %}
18878 ins_encode %{
18879 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18880 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18881 type2aelembytes(Matcher::vector_element_basic_type(this))));
18882 BasicType bt = Matcher::vector_element_basic_type(this);
18883 int vlen = Matcher::vector_length_in_bytes(this);
18884 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18885 %}
18886 ins_pipe( pipe_slow );
18887 %}
18888
18889 // Replicate scalar zero to be vector
18890 instruct ReplI_zero(vec dst, immI_0 zero) %{
18891 predicate(Matcher::is_non_long_integral_vector(n));
18892 match(Set dst (Replicate zero));
18893 format %{ "replicateI $dst,$zero" %}
18894 ins_encode %{
18895 int vlen_enc = vector_length_encoding(this);
18896 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18897 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18898 } else {
18899 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18900 }
18901 %}
18902 ins_pipe( fpu_reg_reg );
18903 %}
18904
18905 instruct ReplI_M1(vec dst, immI_M1 con) %{
18906 predicate(Matcher::is_non_long_integral_vector(n));
18907 match(Set dst (Replicate con));
18908 format %{ "vallones $dst" %}
18909 ins_encode %{
18910 int vector_len = vector_length_encoding(this);
18911 __ vallones($dst$$XMMRegister, vector_len);
18912 %}
18913 ins_pipe( pipe_slow );
18914 %}
18915
18916 // ====================ReplicateL=======================================
18917
18918 // Replicate long (8 byte) scalar to be vector
18919 instruct ReplL_reg(vec dst, rRegL src) %{
18920 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18921 match(Set dst (Replicate src));
18922 format %{ "replicateL $dst,$src" %}
18923 ins_encode %{
18924 int vlen = Matcher::vector_length(this);
18925 int vlen_enc = vector_length_encoding(this);
18926 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18927 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18928 } else if (VM_Version::supports_avx2()) {
18929 __ movdq($dst$$XMMRegister, $src$$Register);
18930 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18931 } else {
18932 __ movdq($dst$$XMMRegister, $src$$Register);
18933 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18934 }
18935 %}
18936 ins_pipe( pipe_slow );
18937 %}
18938
18939 instruct ReplL_mem(vec dst, memory mem) %{
18940 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18941 match(Set dst (Replicate (LoadL mem)));
18942 format %{ "replicateL $dst,$mem" %}
18943 ins_encode %{
18944 int vlen_enc = vector_length_encoding(this);
18945 if (VM_Version::supports_avx2()) {
18946 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18947 } else if (VM_Version::supports_sse3()) {
18948 __ movddup($dst$$XMMRegister, $mem$$Address);
18949 } else {
18950 __ movq($dst$$XMMRegister, $mem$$Address);
18951 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18952 }
18953 %}
18954 ins_pipe( pipe_slow );
18955 %}
18956
18957 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18958 instruct ReplL_imm(vec dst, immL con) %{
18959 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18960 match(Set dst (Replicate con));
18961 format %{ "replicateL $dst,$con" %}
18962 ins_encode %{
18963 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18964 int vlen = Matcher::vector_length_in_bytes(this);
18965 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18966 %}
18967 ins_pipe( pipe_slow );
18968 %}
18969
18970 instruct ReplL_zero(vec dst, immL0 zero) %{
18971 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18972 match(Set dst (Replicate zero));
18973 format %{ "replicateL $dst,$zero" %}
18974 ins_encode %{
18975 int vlen_enc = vector_length_encoding(this);
18976 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18977 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18978 } else {
18979 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18980 }
18981 %}
18982 ins_pipe( fpu_reg_reg );
18983 %}
18984
18985 instruct ReplL_M1(vec dst, immL_M1 con) %{
18986 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18987 match(Set dst (Replicate con));
18988 format %{ "vallones $dst" %}
18989 ins_encode %{
18990 int vector_len = vector_length_encoding(this);
18991 __ vallones($dst$$XMMRegister, vector_len);
18992 %}
18993 ins_pipe( pipe_slow );
18994 %}
18995
18996 // ====================ReplicateF=======================================
18997
18998 instruct vReplF_reg(vec dst, vlRegF src) %{
18999 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19000 match(Set dst (Replicate src));
19001 format %{ "replicateF $dst,$src" %}
19002 ins_encode %{
19003 uint vlen = Matcher::vector_length(this);
19004 int vlen_enc = vector_length_encoding(this);
19005 if (vlen <= 4) {
19006 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19007 } else if (VM_Version::supports_avx2()) {
19008 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19009 } else {
19010 assert(vlen == 8, "sanity");
19011 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19012 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19013 }
19014 %}
19015 ins_pipe( pipe_slow );
19016 %}
19017
19018 instruct ReplF_reg(vec dst, vlRegF src) %{
19019 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19020 match(Set dst (Replicate src));
19021 format %{ "replicateF $dst,$src" %}
19022 ins_encode %{
19023 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19024 %}
19025 ins_pipe( pipe_slow );
19026 %}
19027
19028 instruct ReplF_mem(vec dst, memory mem) %{
19029 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19030 match(Set dst (Replicate (LoadF mem)));
19031 format %{ "replicateF $dst,$mem" %}
19032 ins_encode %{
19033 int vlen_enc = vector_length_encoding(this);
19034 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19035 %}
19036 ins_pipe( pipe_slow );
19037 %}
19038
19039 // Replicate float scalar immediate to be vector by loading from const table.
19040 instruct ReplF_imm(vec dst, immF con) %{
19041 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19042 match(Set dst (Replicate con));
19043 format %{ "replicateF $dst,$con" %}
19044 ins_encode %{
19045 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19046 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19047 int vlen = Matcher::vector_length_in_bytes(this);
19048 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19049 %}
19050 ins_pipe( pipe_slow );
19051 %}
19052
19053 instruct ReplF_zero(vec dst, immF0 zero) %{
19054 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19055 match(Set dst (Replicate zero));
19056 format %{ "replicateF $dst,$zero" %}
19057 ins_encode %{
19058 int vlen_enc = vector_length_encoding(this);
19059 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19060 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19061 } else {
19062 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19063 }
19064 %}
19065 ins_pipe( fpu_reg_reg );
19066 %}
19067
19068 // ====================ReplicateD=======================================
19069
19070 // Replicate double (8 bytes) scalar to be vector
19071 instruct vReplD_reg(vec dst, vlRegD src) %{
19072 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19073 match(Set dst (Replicate src));
19074 format %{ "replicateD $dst,$src" %}
19075 ins_encode %{
19076 uint vlen = Matcher::vector_length(this);
19077 int vlen_enc = vector_length_encoding(this);
19078 if (vlen <= 2) {
19079 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19080 } else if (VM_Version::supports_avx2()) {
19081 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19082 } else {
19083 assert(vlen == 4, "sanity");
19084 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19085 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19086 }
19087 %}
19088 ins_pipe( pipe_slow );
19089 %}
19090
19091 instruct ReplD_reg(vec dst, vlRegD src) %{
19092 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19093 match(Set dst (Replicate src));
19094 format %{ "replicateD $dst,$src" %}
19095 ins_encode %{
19096 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19097 %}
19098 ins_pipe( pipe_slow );
19099 %}
19100
19101 instruct ReplD_mem(vec dst, memory mem) %{
19102 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19103 match(Set dst (Replicate (LoadD mem)));
19104 format %{ "replicateD $dst,$mem" %}
19105 ins_encode %{
19106 if (Matcher::vector_length(this) >= 4) {
19107 int vlen_enc = vector_length_encoding(this);
19108 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19109 } else {
19110 __ movddup($dst$$XMMRegister, $mem$$Address);
19111 }
19112 %}
19113 ins_pipe( pipe_slow );
19114 %}
19115
19116 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19117 instruct ReplD_imm(vec dst, immD con) %{
19118 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19119 match(Set dst (Replicate con));
19120 format %{ "replicateD $dst,$con" %}
19121 ins_encode %{
19122 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19123 int vlen = Matcher::vector_length_in_bytes(this);
19124 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19125 %}
19126 ins_pipe( pipe_slow );
19127 %}
19128
19129 instruct ReplD_zero(vec dst, immD0 zero) %{
19130 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19131 match(Set dst (Replicate zero));
19132 format %{ "replicateD $dst,$zero" %}
19133 ins_encode %{
19134 int vlen_enc = vector_length_encoding(this);
19135 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19136 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19137 } else {
19138 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19139 }
19140 %}
19141 ins_pipe( fpu_reg_reg );
19142 %}
19143
19144 // ====================VECTOR INSERT=======================================
19145
19146 instruct insert(vec dst, rRegI val, immU8 idx) %{
19147 predicate(Matcher::vector_length_in_bytes(n) < 32);
19148 match(Set dst (VectorInsert (Binary dst val) idx));
19149 format %{ "vector_insert $dst,$val,$idx" %}
19150 ins_encode %{
19151 assert(UseSSE >= 4, "required");
19152 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19153
19154 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19155
19156 assert(is_integral_type(elem_bt), "");
19157 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19158
19159 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19160 %}
19161 ins_pipe( pipe_slow );
19162 %}
19163
19164 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19165 predicate(Matcher::vector_length_in_bytes(n) == 32);
19166 match(Set dst (VectorInsert (Binary src val) idx));
19167 effect(TEMP vtmp);
19168 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19169 ins_encode %{
19170 int vlen_enc = Assembler::AVX_256bit;
19171 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19172 int elem_per_lane = 16/type2aelembytes(elem_bt);
19173 int log2epr = log2(elem_per_lane);
19174
19175 assert(is_integral_type(elem_bt), "sanity");
19176 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19177
19178 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19179 uint y_idx = ($idx$$constant >> log2epr) & 1;
19180 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19181 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19182 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19183 %}
19184 ins_pipe( pipe_slow );
19185 %}
19186
19187 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19188 predicate(Matcher::vector_length_in_bytes(n) == 64);
19189 match(Set dst (VectorInsert (Binary src val) idx));
19190 effect(TEMP vtmp);
19191 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19192 ins_encode %{
19193 assert(UseAVX > 2, "sanity");
19194
19195 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19196 int elem_per_lane = 16/type2aelembytes(elem_bt);
19197 int log2epr = log2(elem_per_lane);
19198
19199 assert(is_integral_type(elem_bt), "");
19200 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19201
19202 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19203 uint y_idx = ($idx$$constant >> log2epr) & 3;
19204 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19205 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19206 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19207 %}
19208 ins_pipe( pipe_slow );
19209 %}
19210
19211 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19212 predicate(Matcher::vector_length(n) == 2);
19213 match(Set dst (VectorInsert (Binary dst val) idx));
19214 format %{ "vector_insert $dst,$val,$idx" %}
19215 ins_encode %{
19216 assert(UseSSE >= 4, "required");
19217 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19218 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19219
19220 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19221 %}
19222 ins_pipe( pipe_slow );
19223 %}
19224
19225 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19226 predicate(Matcher::vector_length(n) == 4);
19227 match(Set dst (VectorInsert (Binary src val) idx));
19228 effect(TEMP vtmp);
19229 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19230 ins_encode %{
19231 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19232 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19233
19234 uint x_idx = $idx$$constant & right_n_bits(1);
19235 uint y_idx = ($idx$$constant >> 1) & 1;
19236 int vlen_enc = Assembler::AVX_256bit;
19237 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19238 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19239 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19240 %}
19241 ins_pipe( pipe_slow );
19242 %}
19243
19244 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19245 predicate(Matcher::vector_length(n) == 8);
19246 match(Set dst (VectorInsert (Binary src val) idx));
19247 effect(TEMP vtmp);
19248 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19249 ins_encode %{
19250 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19251 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19252
19253 uint x_idx = $idx$$constant & right_n_bits(1);
19254 uint y_idx = ($idx$$constant >> 1) & 3;
19255 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19256 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19257 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19258 %}
19259 ins_pipe( pipe_slow );
19260 %}
19261
19262 instruct insertF(vec dst, regF val, immU8 idx) %{
19263 predicate(Matcher::vector_length(n) < 8);
19264 match(Set dst (VectorInsert (Binary dst val) idx));
19265 format %{ "vector_insert $dst,$val,$idx" %}
19266 ins_encode %{
19267 assert(UseSSE >= 4, "sanity");
19268
19269 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19270 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19271
19272 uint x_idx = $idx$$constant & right_n_bits(2);
19273 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19274 %}
19275 ins_pipe( pipe_slow );
19276 %}
19277
19278 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19279 predicate(Matcher::vector_length(n) >= 8);
19280 match(Set dst (VectorInsert (Binary src val) idx));
19281 effect(TEMP vtmp);
19282 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19283 ins_encode %{
19284 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19285 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19286
19287 int vlen = Matcher::vector_length(this);
19288 uint x_idx = $idx$$constant & right_n_bits(2);
19289 if (vlen == 8) {
19290 uint y_idx = ($idx$$constant >> 2) & 1;
19291 int vlen_enc = Assembler::AVX_256bit;
19292 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19293 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19294 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19295 } else {
19296 assert(vlen == 16, "sanity");
19297 uint y_idx = ($idx$$constant >> 2) & 3;
19298 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19299 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19300 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19301 }
19302 %}
19303 ins_pipe( pipe_slow );
19304 %}
19305
19306 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19307 predicate(Matcher::vector_length(n) == 2);
19308 match(Set dst (VectorInsert (Binary dst val) idx));
19309 effect(TEMP tmp);
19310 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19311 ins_encode %{
19312 assert(UseSSE >= 4, "sanity");
19313 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19314 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19315
19316 __ movq($tmp$$Register, $val$$XMMRegister);
19317 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19318 %}
19319 ins_pipe( pipe_slow );
19320 %}
19321
19322 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19323 predicate(Matcher::vector_length(n) == 4);
19324 match(Set dst (VectorInsert (Binary src val) idx));
19325 effect(TEMP vtmp, TEMP tmp);
19326 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19327 ins_encode %{
19328 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19329 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19330
19331 uint x_idx = $idx$$constant & right_n_bits(1);
19332 uint y_idx = ($idx$$constant >> 1) & 1;
19333 int vlen_enc = Assembler::AVX_256bit;
19334 __ movq($tmp$$Register, $val$$XMMRegister);
19335 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19336 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19337 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19338 %}
19339 ins_pipe( pipe_slow );
19340 %}
19341
19342 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19343 predicate(Matcher::vector_length(n) == 8);
19344 match(Set dst (VectorInsert (Binary src val) idx));
19345 effect(TEMP tmp, TEMP vtmp);
19346 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19347 ins_encode %{
19348 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19349 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19350
19351 uint x_idx = $idx$$constant & right_n_bits(1);
19352 uint y_idx = ($idx$$constant >> 1) & 3;
19353 __ movq($tmp$$Register, $val$$XMMRegister);
19354 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19355 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19356 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19357 %}
19358 ins_pipe( pipe_slow );
19359 %}
19360
19361 // ====================REDUCTION ARITHMETIC=======================================
19362
19363 // =======================Int Reduction==========================================
19364
19365 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19366 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19367 match(Set dst (AddReductionVI src1 src2));
19368 match(Set dst (MulReductionVI src1 src2));
19369 match(Set dst (AndReductionV src1 src2));
19370 match(Set dst ( OrReductionV src1 src2));
19371 match(Set dst (XorReductionV src1 src2));
19372 match(Set dst (MinReductionV src1 src2));
19373 match(Set dst (MaxReductionV src1 src2));
19374 effect(TEMP vtmp1, TEMP vtmp2);
19375 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19376 ins_encode %{
19377 int opcode = this->ideal_Opcode();
19378 int vlen = Matcher::vector_length(this, $src2);
19379 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19380 %}
19381 ins_pipe( pipe_slow );
19382 %}
19383
19384 // =======================Long Reduction==========================================
19385
19386 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19387 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19388 match(Set dst (AddReductionVL src1 src2));
19389 match(Set dst (MulReductionVL src1 src2));
19390 match(Set dst (AndReductionV src1 src2));
19391 match(Set dst ( OrReductionV src1 src2));
19392 match(Set dst (XorReductionV src1 src2));
19393 match(Set dst (MinReductionV src1 src2));
19394 match(Set dst (MaxReductionV src1 src2));
19395 effect(TEMP vtmp1, TEMP vtmp2);
19396 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19397 ins_encode %{
19398 int opcode = this->ideal_Opcode();
19399 int vlen = Matcher::vector_length(this, $src2);
19400 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19401 %}
19402 ins_pipe( pipe_slow );
19403 %}
19404
19405 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19406 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19407 match(Set dst (AddReductionVL src1 src2));
19408 match(Set dst (MulReductionVL src1 src2));
19409 match(Set dst (AndReductionV src1 src2));
19410 match(Set dst ( OrReductionV src1 src2));
19411 match(Set dst (XorReductionV src1 src2));
19412 match(Set dst (MinReductionV src1 src2));
19413 match(Set dst (MaxReductionV src1 src2));
19414 effect(TEMP vtmp1, TEMP vtmp2);
19415 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19416 ins_encode %{
19417 int opcode = this->ideal_Opcode();
19418 int vlen = Matcher::vector_length(this, $src2);
19419 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19420 %}
19421 ins_pipe( pipe_slow );
19422 %}
19423
19424 // =======================Float Reduction==========================================
19425
19426 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19427 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19428 match(Set dst (AddReductionVF dst src));
19429 match(Set dst (MulReductionVF dst src));
19430 effect(TEMP dst, TEMP vtmp);
19431 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19432 ins_encode %{
19433 int opcode = this->ideal_Opcode();
19434 int vlen = Matcher::vector_length(this, $src);
19435 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19436 %}
19437 ins_pipe( pipe_slow );
19438 %}
19439
19440 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19441 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19442 match(Set dst (AddReductionVF dst src));
19443 match(Set dst (MulReductionVF dst src));
19444 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19445 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19446 ins_encode %{
19447 int opcode = this->ideal_Opcode();
19448 int vlen = Matcher::vector_length(this, $src);
19449 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450 %}
19451 ins_pipe( pipe_slow );
19452 %}
19453
19454 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19455 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19456 match(Set dst (AddReductionVF dst src));
19457 match(Set dst (MulReductionVF dst src));
19458 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19459 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19460 ins_encode %{
19461 int opcode = this->ideal_Opcode();
19462 int vlen = Matcher::vector_length(this, $src);
19463 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19464 %}
19465 ins_pipe( pipe_slow );
19466 %}
19467
19468
19469 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19470 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19471 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19472 // src1 contains reduction identity
19473 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19474 match(Set dst (AddReductionVF src1 src2));
19475 match(Set dst (MulReductionVF src1 src2));
19476 effect(TEMP dst);
19477 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19478 ins_encode %{
19479 int opcode = this->ideal_Opcode();
19480 int vlen = Matcher::vector_length(this, $src2);
19481 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19482 %}
19483 ins_pipe( pipe_slow );
19484 %}
19485
19486 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19487 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19488 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19489 // src1 contains reduction identity
19490 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19491 match(Set dst (AddReductionVF src1 src2));
19492 match(Set dst (MulReductionVF src1 src2));
19493 effect(TEMP dst, TEMP vtmp);
19494 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19495 ins_encode %{
19496 int opcode = this->ideal_Opcode();
19497 int vlen = Matcher::vector_length(this, $src2);
19498 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19499 %}
19500 ins_pipe( pipe_slow );
19501 %}
19502
19503 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19504 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19505 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19506 // src1 contains reduction identity
19507 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19508 match(Set dst (AddReductionVF src1 src2));
19509 match(Set dst (MulReductionVF src1 src2));
19510 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19511 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19512 ins_encode %{
19513 int opcode = this->ideal_Opcode();
19514 int vlen = Matcher::vector_length(this, $src2);
19515 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19516 %}
19517 ins_pipe( pipe_slow );
19518 %}
19519
19520 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19521 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19522 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19523 // src1 contains reduction identity
19524 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19525 match(Set dst (AddReductionVF src1 src2));
19526 match(Set dst (MulReductionVF src1 src2));
19527 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19528 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19529 ins_encode %{
19530 int opcode = this->ideal_Opcode();
19531 int vlen = Matcher::vector_length(this, $src2);
19532 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19533 %}
19534 ins_pipe( pipe_slow );
19535 %}
19536
19537 // =======================Double Reduction==========================================
19538
19539 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19540 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19541 match(Set dst (AddReductionVD dst src));
19542 match(Set dst (MulReductionVD dst src));
19543 effect(TEMP dst, TEMP vtmp);
19544 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19545 ins_encode %{
19546 int opcode = this->ideal_Opcode();
19547 int vlen = Matcher::vector_length(this, $src);
19548 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19549 %}
19550 ins_pipe( pipe_slow );
19551 %}
19552
19553 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19554 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19555 match(Set dst (AddReductionVD dst src));
19556 match(Set dst (MulReductionVD dst src));
19557 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19558 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19559 ins_encode %{
19560 int opcode = this->ideal_Opcode();
19561 int vlen = Matcher::vector_length(this, $src);
19562 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19563 %}
19564 ins_pipe( pipe_slow );
19565 %}
19566
19567 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19568 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19569 match(Set dst (AddReductionVD dst src));
19570 match(Set dst (MulReductionVD dst src));
19571 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19572 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19573 ins_encode %{
19574 int opcode = this->ideal_Opcode();
19575 int vlen = Matcher::vector_length(this, $src);
19576 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19577 %}
19578 ins_pipe( pipe_slow );
19579 %}
19580
19581 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19582 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19583 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19584 // src1 contains reduction identity
19585 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19586 match(Set dst (AddReductionVD src1 src2));
19587 match(Set dst (MulReductionVD src1 src2));
19588 effect(TEMP dst);
19589 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19590 ins_encode %{
19591 int opcode = this->ideal_Opcode();
19592 int vlen = Matcher::vector_length(this, $src2);
19593 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19594 %}
19595 ins_pipe( pipe_slow );
19596 %}
19597
19598 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19599 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19600 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19601 // src1 contains reduction identity
19602 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19603 match(Set dst (AddReductionVD src1 src2));
19604 match(Set dst (MulReductionVD src1 src2));
19605 effect(TEMP dst, TEMP vtmp);
19606 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19607 ins_encode %{
19608 int opcode = this->ideal_Opcode();
19609 int vlen = Matcher::vector_length(this, $src2);
19610 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19611 %}
19612 ins_pipe( pipe_slow );
19613 %}
19614
19615 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19616 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19617 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19618 // src1 contains reduction identity
19619 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19620 match(Set dst (AddReductionVD src1 src2));
19621 match(Set dst (MulReductionVD src1 src2));
19622 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19623 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19624 ins_encode %{
19625 int opcode = this->ideal_Opcode();
19626 int vlen = Matcher::vector_length(this, $src2);
19627 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19628 %}
19629 ins_pipe( pipe_slow );
19630 %}
19631
19632 // =======================Byte Reduction==========================================
19633
19634 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19635 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19636 match(Set dst (AddReductionVI src1 src2));
19637 match(Set dst (AndReductionV src1 src2));
19638 match(Set dst ( OrReductionV src1 src2));
19639 match(Set dst (XorReductionV src1 src2));
19640 match(Set dst (MinReductionV src1 src2));
19641 match(Set dst (MaxReductionV src1 src2));
19642 effect(TEMP vtmp1, TEMP vtmp2);
19643 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19644 ins_encode %{
19645 int opcode = this->ideal_Opcode();
19646 int vlen = Matcher::vector_length(this, $src2);
19647 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19648 %}
19649 ins_pipe( pipe_slow );
19650 %}
19651
19652 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19653 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19654 match(Set dst (AddReductionVI src1 src2));
19655 match(Set dst (AndReductionV src1 src2));
19656 match(Set dst ( OrReductionV src1 src2));
19657 match(Set dst (XorReductionV src1 src2));
19658 match(Set dst (MinReductionV src1 src2));
19659 match(Set dst (MaxReductionV src1 src2));
19660 effect(TEMP vtmp1, TEMP vtmp2);
19661 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19662 ins_encode %{
19663 int opcode = this->ideal_Opcode();
19664 int vlen = Matcher::vector_length(this, $src2);
19665 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19666 %}
19667 ins_pipe( pipe_slow );
19668 %}
19669
19670 // =======================Short Reduction==========================================
19671
19672 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19673 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19674 match(Set dst (AddReductionVI src1 src2));
19675 match(Set dst (MulReductionVI src1 src2));
19676 match(Set dst (AndReductionV src1 src2));
19677 match(Set dst ( OrReductionV src1 src2));
19678 match(Set dst (XorReductionV src1 src2));
19679 match(Set dst (MinReductionV src1 src2));
19680 match(Set dst (MaxReductionV src1 src2));
19681 effect(TEMP vtmp1, TEMP vtmp2);
19682 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19683 ins_encode %{
19684 int opcode = this->ideal_Opcode();
19685 int vlen = Matcher::vector_length(this, $src2);
19686 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19687 %}
19688 ins_pipe( pipe_slow );
19689 %}
19690
19691 // =======================Mul Reduction==========================================
19692
19693 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19694 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19695 Matcher::vector_length(n->in(2)) <= 32); // src2
19696 match(Set dst (MulReductionVI src1 src2));
19697 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19698 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19699 ins_encode %{
19700 int opcode = this->ideal_Opcode();
19701 int vlen = Matcher::vector_length(this, $src2);
19702 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19703 %}
19704 ins_pipe( pipe_slow );
19705 %}
19706
19707 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19708 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19709 Matcher::vector_length(n->in(2)) == 64); // src2
19710 match(Set dst (MulReductionVI src1 src2));
19711 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19712 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19713 ins_encode %{
19714 int opcode = this->ideal_Opcode();
19715 int vlen = Matcher::vector_length(this, $src2);
19716 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19717 %}
19718 ins_pipe( pipe_slow );
19719 %}
19720
19721 //--------------------Min/Max Float Reduction --------------------
19722 // Float Min Reduction
19723 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19724 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19725 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19726 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19727 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19728 Matcher::vector_length(n->in(2)) == 2);
19729 match(Set dst (MinReductionV src1 src2));
19730 match(Set dst (MaxReductionV src1 src2));
19731 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19732 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19733 ins_encode %{
19734 assert(UseAVX > 0, "sanity");
19735
19736 int opcode = this->ideal_Opcode();
19737 int vlen = Matcher::vector_length(this, $src2);
19738 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19739 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19740 %}
19741 ins_pipe( pipe_slow );
19742 %}
19743
19744 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19745 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19746 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19747 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19748 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19749 Matcher::vector_length(n->in(2)) >= 4);
19750 match(Set dst (MinReductionV src1 src2));
19751 match(Set dst (MaxReductionV src1 src2));
19752 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19753 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19754 ins_encode %{
19755 assert(UseAVX > 0, "sanity");
19756
19757 int opcode = this->ideal_Opcode();
19758 int vlen = Matcher::vector_length(this, $src2);
19759 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19760 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19761 %}
19762 ins_pipe( pipe_slow );
19763 %}
19764
19765 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19766 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19767 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19768 Matcher::vector_length(n->in(2)) == 2);
19769 match(Set dst (MinReductionV dst src));
19770 match(Set dst (MaxReductionV dst src));
19771 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19772 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19773 ins_encode %{
19774 assert(UseAVX > 0, "sanity");
19775
19776 int opcode = this->ideal_Opcode();
19777 int vlen = Matcher::vector_length(this, $src);
19778 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19779 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19780 %}
19781 ins_pipe( pipe_slow );
19782 %}
19783
19784
19785 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19786 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19787 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19788 Matcher::vector_length(n->in(2)) >= 4);
19789 match(Set dst (MinReductionV dst src));
19790 match(Set dst (MaxReductionV dst src));
19791 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19792 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19793 ins_encode %{
19794 assert(UseAVX > 0, "sanity");
19795
19796 int opcode = this->ideal_Opcode();
19797 int vlen = Matcher::vector_length(this, $src);
19798 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19799 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19800 %}
19801 ins_pipe( pipe_slow );
19802 %}
19803
19804 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19805 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19806 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19807 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19808 Matcher::vector_length(n->in(2)) == 2);
19809 match(Set dst (MinReductionV src1 src2));
19810 match(Set dst (MaxReductionV src1 src2));
19811 effect(TEMP dst, TEMP xtmp1);
19812 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19813 ins_encode %{
19814 int opcode = this->ideal_Opcode();
19815 int vlen = Matcher::vector_length(this, $src2);
19816 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19817 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19818 %}
19819 ins_pipe( pipe_slow );
19820 %}
19821
19822 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19823 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19824 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19825 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19826 Matcher::vector_length(n->in(2)) >= 4);
19827 match(Set dst (MinReductionV src1 src2));
19828 match(Set dst (MaxReductionV src1 src2));
19829 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19830 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19831 ins_encode %{
19832 int opcode = this->ideal_Opcode();
19833 int vlen = Matcher::vector_length(this, $src2);
19834 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19835 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19836 %}
19837 ins_pipe( pipe_slow );
19838 %}
19839
19840 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19841 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19842 Matcher::vector_length(n->in(2)) == 2);
19843 match(Set dst (MinReductionV dst src));
19844 match(Set dst (MaxReductionV dst src));
19845 effect(TEMP dst, TEMP xtmp1);
19846 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19847 ins_encode %{
19848 int opcode = this->ideal_Opcode();
19849 int vlen = Matcher::vector_length(this, $src);
19850 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19851 $xtmp1$$XMMRegister);
19852 %}
19853 ins_pipe( pipe_slow );
19854 %}
19855
19856 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19857 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19858 Matcher::vector_length(n->in(2)) >= 4);
19859 match(Set dst (MinReductionV dst src));
19860 match(Set dst (MaxReductionV dst src));
19861 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19862 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19863 ins_encode %{
19864 int opcode = this->ideal_Opcode();
19865 int vlen = Matcher::vector_length(this, $src);
19866 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19867 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19868 %}
19869 ins_pipe( pipe_slow );
19870 %}
19871
19872 //--------------------Min Double Reduction --------------------
19873 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19874 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19875 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19876 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19877 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19878 Matcher::vector_length(n->in(2)) == 2);
19879 match(Set dst (MinReductionV src1 src2));
19880 match(Set dst (MaxReductionV src1 src2));
19881 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19882 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19883 ins_encode %{
19884 assert(UseAVX > 0, "sanity");
19885
19886 int opcode = this->ideal_Opcode();
19887 int vlen = Matcher::vector_length(this, $src2);
19888 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19889 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19890 %}
19891 ins_pipe( pipe_slow );
19892 %}
19893
19894 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19895 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19896 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19897 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19898 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19899 Matcher::vector_length(n->in(2)) >= 4);
19900 match(Set dst (MinReductionV src1 src2));
19901 match(Set dst (MaxReductionV src1 src2));
19902 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19903 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19904 ins_encode %{
19905 assert(UseAVX > 0, "sanity");
19906
19907 int opcode = this->ideal_Opcode();
19908 int vlen = Matcher::vector_length(this, $src2);
19909 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19910 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19911 %}
19912 ins_pipe( pipe_slow );
19913 %}
19914
19915
19916 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19917 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19918 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19919 Matcher::vector_length(n->in(2)) == 2);
19920 match(Set dst (MinReductionV dst src));
19921 match(Set dst (MaxReductionV dst src));
19922 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19923 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19924 ins_encode %{
19925 assert(UseAVX > 0, "sanity");
19926
19927 int opcode = this->ideal_Opcode();
19928 int vlen = Matcher::vector_length(this, $src);
19929 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19930 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19931 %}
19932 ins_pipe( pipe_slow );
19933 %}
19934
19935 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19936 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19937 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19938 Matcher::vector_length(n->in(2)) >= 4);
19939 match(Set dst (MinReductionV dst src));
19940 match(Set dst (MaxReductionV dst src));
19941 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19942 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19943 ins_encode %{
19944 assert(UseAVX > 0, "sanity");
19945
19946 int opcode = this->ideal_Opcode();
19947 int vlen = Matcher::vector_length(this, $src);
19948 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19949 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19950 %}
19951 ins_pipe( pipe_slow );
19952 %}
19953
19954 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19955 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19956 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19957 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19958 Matcher::vector_length(n->in(2)) == 2);
19959 match(Set dst (MinReductionV src1 src2));
19960 match(Set dst (MaxReductionV src1 src2));
19961 effect(TEMP dst, TEMP xtmp1);
19962 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19963 ins_encode %{
19964 int opcode = this->ideal_Opcode();
19965 int vlen = Matcher::vector_length(this, $src2);
19966 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19967 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19968 %}
19969 ins_pipe( pipe_slow );
19970 %}
19971
19972 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19973 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19974 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19975 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19976 Matcher::vector_length(n->in(2)) >= 4);
19977 match(Set dst (MinReductionV src1 src2));
19978 match(Set dst (MaxReductionV src1 src2));
19979 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19980 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19981 ins_encode %{
19982 int opcode = this->ideal_Opcode();
19983 int vlen = Matcher::vector_length(this, $src2);
19984 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19985 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19986 %}
19987 ins_pipe( pipe_slow );
19988 %}
19989
19990
19991 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19992 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19993 Matcher::vector_length(n->in(2)) == 2);
19994 match(Set dst (MinReductionV dst src));
19995 match(Set dst (MaxReductionV dst src));
19996 effect(TEMP dst, TEMP xtmp1);
19997 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19998 ins_encode %{
19999 int opcode = this->ideal_Opcode();
20000 int vlen = Matcher::vector_length(this, $src);
20001 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20002 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20003 %}
20004 ins_pipe( pipe_slow );
20005 %}
20006
20007 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20008 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20009 Matcher::vector_length(n->in(2)) >= 4);
20010 match(Set dst (MinReductionV dst src));
20011 match(Set dst (MaxReductionV dst src));
20012 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20013 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20014 ins_encode %{
20015 int opcode = this->ideal_Opcode();
20016 int vlen = Matcher::vector_length(this, $src);
20017 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20018 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20019 %}
20020 ins_pipe( pipe_slow );
20021 %}
20022
20023 // ====================VECTOR ARITHMETIC=======================================
20024
20025 // --------------------------------- ADD --------------------------------------
20026
20027 // Bytes vector add
20028 instruct vaddB(vec dst, vec src) %{
20029 predicate(UseAVX == 0);
20030 match(Set dst (AddVB dst src));
20031 format %{ "paddb $dst,$src\t! add packedB" %}
20032 ins_encode %{
20033 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20034 %}
20035 ins_pipe( pipe_slow );
20036 %}
20037
20038 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20039 predicate(UseAVX > 0);
20040 match(Set dst (AddVB src1 src2));
20041 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20042 ins_encode %{
20043 int vlen_enc = vector_length_encoding(this);
20044 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20045 %}
20046 ins_pipe( pipe_slow );
20047 %}
20048
20049 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20050 predicate((UseAVX > 0) &&
20051 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20052 match(Set dst (AddVB src (LoadVector mem)));
20053 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20054 ins_encode %{
20055 int vlen_enc = vector_length_encoding(this);
20056 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20057 %}
20058 ins_pipe( pipe_slow );
20059 %}
20060
20061 // Shorts/Chars vector add
20062 instruct vaddS(vec dst, vec src) %{
20063 predicate(UseAVX == 0);
20064 match(Set dst (AddVS dst src));
20065 format %{ "paddw $dst,$src\t! add packedS" %}
20066 ins_encode %{
20067 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20068 %}
20069 ins_pipe( pipe_slow );
20070 %}
20071
20072 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20073 predicate(UseAVX > 0);
20074 match(Set dst (AddVS src1 src2));
20075 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20076 ins_encode %{
20077 int vlen_enc = vector_length_encoding(this);
20078 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20079 %}
20080 ins_pipe( pipe_slow );
20081 %}
20082
20083 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20084 predicate((UseAVX > 0) &&
20085 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20086 match(Set dst (AddVS src (LoadVector mem)));
20087 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20088 ins_encode %{
20089 int vlen_enc = vector_length_encoding(this);
20090 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20091 %}
20092 ins_pipe( pipe_slow );
20093 %}
20094
20095 // Integers vector add
20096 instruct vaddI(vec dst, vec src) %{
20097 predicate(UseAVX == 0);
20098 match(Set dst (AddVI dst src));
20099 format %{ "paddd $dst,$src\t! add packedI" %}
20100 ins_encode %{
20101 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20102 %}
20103 ins_pipe( pipe_slow );
20104 %}
20105
20106 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20107 predicate(UseAVX > 0);
20108 match(Set dst (AddVI src1 src2));
20109 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20110 ins_encode %{
20111 int vlen_enc = vector_length_encoding(this);
20112 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20113 %}
20114 ins_pipe( pipe_slow );
20115 %}
20116
20117
20118 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20119 predicate((UseAVX > 0) &&
20120 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20121 match(Set dst (AddVI src (LoadVector mem)));
20122 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20123 ins_encode %{
20124 int vlen_enc = vector_length_encoding(this);
20125 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20126 %}
20127 ins_pipe( pipe_slow );
20128 %}
20129
20130 // Longs vector add
20131 instruct vaddL(vec dst, vec src) %{
20132 predicate(UseAVX == 0);
20133 match(Set dst (AddVL dst src));
20134 format %{ "paddq $dst,$src\t! add packedL" %}
20135 ins_encode %{
20136 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20137 %}
20138 ins_pipe( pipe_slow );
20139 %}
20140
20141 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20142 predicate(UseAVX > 0);
20143 match(Set dst (AddVL src1 src2));
20144 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20145 ins_encode %{
20146 int vlen_enc = vector_length_encoding(this);
20147 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20148 %}
20149 ins_pipe( pipe_slow );
20150 %}
20151
20152 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20153 predicate((UseAVX > 0) &&
20154 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20155 match(Set dst (AddVL src (LoadVector mem)));
20156 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20157 ins_encode %{
20158 int vlen_enc = vector_length_encoding(this);
20159 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20160 %}
20161 ins_pipe( pipe_slow );
20162 %}
20163
20164 // Floats vector add
20165 instruct vaddF(vec dst, vec src) %{
20166 predicate(UseAVX == 0);
20167 match(Set dst (AddVF dst src));
20168 format %{ "addps $dst,$src\t! add packedF" %}
20169 ins_encode %{
20170 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20171 %}
20172 ins_pipe( pipe_slow );
20173 %}
20174
20175 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20176 predicate(UseAVX > 0);
20177 match(Set dst (AddVF src1 src2));
20178 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20179 ins_encode %{
20180 int vlen_enc = vector_length_encoding(this);
20181 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20182 %}
20183 ins_pipe( pipe_slow );
20184 %}
20185
20186 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20187 predicate((UseAVX > 0) &&
20188 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20189 match(Set dst (AddVF src (LoadVector mem)));
20190 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20191 ins_encode %{
20192 int vlen_enc = vector_length_encoding(this);
20193 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20194 %}
20195 ins_pipe( pipe_slow );
20196 %}
20197
20198 // Doubles vector add
20199 instruct vaddD(vec dst, vec src) %{
20200 predicate(UseAVX == 0);
20201 match(Set dst (AddVD dst src));
20202 format %{ "addpd $dst,$src\t! add packedD" %}
20203 ins_encode %{
20204 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20205 %}
20206 ins_pipe( pipe_slow );
20207 %}
20208
20209 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20210 predicate(UseAVX > 0);
20211 match(Set dst (AddVD src1 src2));
20212 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20213 ins_encode %{
20214 int vlen_enc = vector_length_encoding(this);
20215 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20216 %}
20217 ins_pipe( pipe_slow );
20218 %}
20219
20220 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20221 predicate((UseAVX > 0) &&
20222 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20223 match(Set dst (AddVD src (LoadVector mem)));
20224 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20225 ins_encode %{
20226 int vlen_enc = vector_length_encoding(this);
20227 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20228 %}
20229 ins_pipe( pipe_slow );
20230 %}
20231
20232 // --------------------------------- SUB --------------------------------------
20233
20234 // Bytes vector sub
20235 instruct vsubB(vec dst, vec src) %{
20236 predicate(UseAVX == 0);
20237 match(Set dst (SubVB dst src));
20238 format %{ "psubb $dst,$src\t! sub packedB" %}
20239 ins_encode %{
20240 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20241 %}
20242 ins_pipe( pipe_slow );
20243 %}
20244
20245 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20246 predicate(UseAVX > 0);
20247 match(Set dst (SubVB src1 src2));
20248 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20249 ins_encode %{
20250 int vlen_enc = vector_length_encoding(this);
20251 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20252 %}
20253 ins_pipe( pipe_slow );
20254 %}
20255
20256 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20257 predicate((UseAVX > 0) &&
20258 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20259 match(Set dst (SubVB src (LoadVector mem)));
20260 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20261 ins_encode %{
20262 int vlen_enc = vector_length_encoding(this);
20263 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268 // Shorts/Chars vector sub
20269 instruct vsubS(vec dst, vec src) %{
20270 predicate(UseAVX == 0);
20271 match(Set dst (SubVS dst src));
20272 format %{ "psubw $dst,$src\t! sub packedS" %}
20273 ins_encode %{
20274 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20275 %}
20276 ins_pipe( pipe_slow );
20277 %}
20278
20279
20280 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20281 predicate(UseAVX > 0);
20282 match(Set dst (SubVS src1 src2));
20283 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20284 ins_encode %{
20285 int vlen_enc = vector_length_encoding(this);
20286 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20287 %}
20288 ins_pipe( pipe_slow );
20289 %}
20290
20291 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20292 predicate((UseAVX > 0) &&
20293 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20294 match(Set dst (SubVS src (LoadVector mem)));
20295 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20296 ins_encode %{
20297 int vlen_enc = vector_length_encoding(this);
20298 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20299 %}
20300 ins_pipe( pipe_slow );
20301 %}
20302
20303 // Integers vector sub
20304 instruct vsubI(vec dst, vec src) %{
20305 predicate(UseAVX == 0);
20306 match(Set dst (SubVI dst src));
20307 format %{ "psubd $dst,$src\t! sub packedI" %}
20308 ins_encode %{
20309 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20310 %}
20311 ins_pipe( pipe_slow );
20312 %}
20313
20314 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20315 predicate(UseAVX > 0);
20316 match(Set dst (SubVI src1 src2));
20317 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20318 ins_encode %{
20319 int vlen_enc = vector_length_encoding(this);
20320 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20321 %}
20322 ins_pipe( pipe_slow );
20323 %}
20324
20325 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20326 predicate((UseAVX > 0) &&
20327 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20328 match(Set dst (SubVI src (LoadVector mem)));
20329 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20330 ins_encode %{
20331 int vlen_enc = vector_length_encoding(this);
20332 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20333 %}
20334 ins_pipe( pipe_slow );
20335 %}
20336
20337 // Longs vector sub
20338 instruct vsubL(vec dst, vec src) %{
20339 predicate(UseAVX == 0);
20340 match(Set dst (SubVL dst src));
20341 format %{ "psubq $dst,$src\t! sub packedL" %}
20342 ins_encode %{
20343 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20344 %}
20345 ins_pipe( pipe_slow );
20346 %}
20347
20348 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20349 predicate(UseAVX > 0);
20350 match(Set dst (SubVL src1 src2));
20351 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20352 ins_encode %{
20353 int vlen_enc = vector_length_encoding(this);
20354 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20355 %}
20356 ins_pipe( pipe_slow );
20357 %}
20358
20359
20360 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20361 predicate((UseAVX > 0) &&
20362 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20363 match(Set dst (SubVL src (LoadVector mem)));
20364 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20365 ins_encode %{
20366 int vlen_enc = vector_length_encoding(this);
20367 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20368 %}
20369 ins_pipe( pipe_slow );
20370 %}
20371
20372 // Floats vector sub
20373 instruct vsubF(vec dst, vec src) %{
20374 predicate(UseAVX == 0);
20375 match(Set dst (SubVF dst src));
20376 format %{ "subps $dst,$src\t! sub packedF" %}
20377 ins_encode %{
20378 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20379 %}
20380 ins_pipe( pipe_slow );
20381 %}
20382
20383 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20384 predicate(UseAVX > 0);
20385 match(Set dst (SubVF src1 src2));
20386 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20387 ins_encode %{
20388 int vlen_enc = vector_length_encoding(this);
20389 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20390 %}
20391 ins_pipe( pipe_slow );
20392 %}
20393
20394 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20395 predicate((UseAVX > 0) &&
20396 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20397 match(Set dst (SubVF src (LoadVector mem)));
20398 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20399 ins_encode %{
20400 int vlen_enc = vector_length_encoding(this);
20401 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20402 %}
20403 ins_pipe( pipe_slow );
20404 %}
20405
20406 // Doubles vector sub
20407 instruct vsubD(vec dst, vec src) %{
20408 predicate(UseAVX == 0);
20409 match(Set dst (SubVD dst src));
20410 format %{ "subpd $dst,$src\t! sub packedD" %}
20411 ins_encode %{
20412 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20413 %}
20414 ins_pipe( pipe_slow );
20415 %}
20416
20417 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20418 predicate(UseAVX > 0);
20419 match(Set dst (SubVD src1 src2));
20420 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20421 ins_encode %{
20422 int vlen_enc = vector_length_encoding(this);
20423 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20424 %}
20425 ins_pipe( pipe_slow );
20426 %}
20427
20428 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20429 predicate((UseAVX > 0) &&
20430 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20431 match(Set dst (SubVD src (LoadVector mem)));
20432 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20433 ins_encode %{
20434 int vlen_enc = vector_length_encoding(this);
20435 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20436 %}
20437 ins_pipe( pipe_slow );
20438 %}
20439
20440 // --------------------------------- MUL --------------------------------------
20441
20442 // Byte vector mul
20443 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20444 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20445 match(Set dst (MulVB src1 src2));
20446 effect(TEMP dst, TEMP xtmp);
20447 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20448 ins_encode %{
20449 assert(UseSSE > 3, "required");
20450 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20451 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20452 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20453 __ psllw($dst$$XMMRegister, 8);
20454 __ psrlw($dst$$XMMRegister, 8);
20455 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20456 %}
20457 ins_pipe( pipe_slow );
20458 %}
20459
20460 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20461 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20462 match(Set dst (MulVB src1 src2));
20463 effect(TEMP dst, TEMP xtmp);
20464 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20465 ins_encode %{
20466 assert(UseSSE > 3, "required");
20467 // Odd-index elements
20468 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20469 __ psrlw($dst$$XMMRegister, 8);
20470 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20471 __ psrlw($xtmp$$XMMRegister, 8);
20472 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20473 __ psllw($dst$$XMMRegister, 8);
20474 // Even-index elements
20475 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20476 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20477 __ psllw($xtmp$$XMMRegister, 8);
20478 __ psrlw($xtmp$$XMMRegister, 8);
20479 // Combine
20480 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20481 %}
20482 ins_pipe( pipe_slow );
20483 %}
20484
20485 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20486 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20487 match(Set dst (MulVB src1 src2));
20488 effect(TEMP xtmp1, TEMP xtmp2);
20489 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20490 ins_encode %{
20491 int vlen_enc = vector_length_encoding(this);
20492 // Odd-index elements
20493 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20494 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20495 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20496 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20497 // Even-index elements
20498 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20499 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20500 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20501 // Combine
20502 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20503 %}
20504 ins_pipe( pipe_slow );
20505 %}
20506
20507 // Shorts/Chars vector mul
20508 instruct vmulS(vec dst, vec src) %{
20509 predicate(UseAVX == 0);
20510 match(Set dst (MulVS dst src));
20511 format %{ "pmullw $dst,$src\t! mul packedS" %}
20512 ins_encode %{
20513 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20514 %}
20515 ins_pipe( pipe_slow );
20516 %}
20517
20518 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20519 predicate(UseAVX > 0);
20520 match(Set dst (MulVS src1 src2));
20521 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20522 ins_encode %{
20523 int vlen_enc = vector_length_encoding(this);
20524 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20525 %}
20526 ins_pipe( pipe_slow );
20527 %}
20528
20529 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20530 predicate((UseAVX > 0) &&
20531 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20532 match(Set dst (MulVS src (LoadVector mem)));
20533 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20534 ins_encode %{
20535 int vlen_enc = vector_length_encoding(this);
20536 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20537 %}
20538 ins_pipe( pipe_slow );
20539 %}
20540
20541 // Integers vector mul
20542 instruct vmulI(vec dst, vec src) %{
20543 predicate(UseAVX == 0);
20544 match(Set dst (MulVI dst src));
20545 format %{ "pmulld $dst,$src\t! mul packedI" %}
20546 ins_encode %{
20547 assert(UseSSE > 3, "required");
20548 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20549 %}
20550 ins_pipe( pipe_slow );
20551 %}
20552
20553 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20554 predicate(UseAVX > 0);
20555 match(Set dst (MulVI src1 src2));
20556 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20557 ins_encode %{
20558 int vlen_enc = vector_length_encoding(this);
20559 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20560 %}
20561 ins_pipe( pipe_slow );
20562 %}
20563
20564 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20565 predicate((UseAVX > 0) &&
20566 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20567 match(Set dst (MulVI src (LoadVector mem)));
20568 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20569 ins_encode %{
20570 int vlen_enc = vector_length_encoding(this);
20571 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20572 %}
20573 ins_pipe( pipe_slow );
20574 %}
20575
20576 // Longs vector mul
20577 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20578 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20579 VM_Version::supports_avx512dq()) ||
20580 VM_Version::supports_avx512vldq());
20581 match(Set dst (MulVL src1 src2));
20582 ins_cost(500);
20583 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20584 ins_encode %{
20585 assert(UseAVX > 2, "required");
20586 int vlen_enc = vector_length_encoding(this);
20587 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20593 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20594 VM_Version::supports_avx512dq()) ||
20595 (Matcher::vector_length_in_bytes(n) > 8 &&
20596 VM_Version::supports_avx512vldq()));
20597 match(Set dst (MulVL src (LoadVector mem)));
20598 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20599 ins_cost(500);
20600 ins_encode %{
20601 assert(UseAVX > 2, "required");
20602 int vlen_enc = vector_length_encoding(this);
20603 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20604 %}
20605 ins_pipe( pipe_slow );
20606 %}
20607
20608 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20609 predicate(UseAVX == 0);
20610 match(Set dst (MulVL src1 src2));
20611 ins_cost(500);
20612 effect(TEMP dst, TEMP xtmp);
20613 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20614 ins_encode %{
20615 assert(VM_Version::supports_sse4_1(), "required");
20616 // Get the lo-hi products, only the lower 32 bits is in concerns
20617 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20618 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20619 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20620 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20621 __ psllq($dst$$XMMRegister, 32);
20622 // Get the lo-lo products
20623 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20624 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20625 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20626 %}
20627 ins_pipe( pipe_slow );
20628 %}
20629
20630 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20631 predicate(UseAVX > 0 &&
20632 ((Matcher::vector_length_in_bytes(n) == 64 &&
20633 !VM_Version::supports_avx512dq()) ||
20634 (Matcher::vector_length_in_bytes(n) < 64 &&
20635 !VM_Version::supports_avx512vldq())));
20636 match(Set dst (MulVL src1 src2));
20637 effect(TEMP xtmp1, TEMP xtmp2);
20638 ins_cost(500);
20639 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20640 ins_encode %{
20641 int vlen_enc = vector_length_encoding(this);
20642 // Get the lo-hi products, only the lower 32 bits is in concerns
20643 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20644 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20645 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20646 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20647 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20648 // Get the lo-lo products
20649 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20650 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20651 %}
20652 ins_pipe( pipe_slow );
20653 %}
20654
20655 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20656 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20657 match(Set dst (MulVL src1 src2));
20658 ins_cost(100);
20659 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20660 ins_encode %{
20661 int vlen_enc = vector_length_encoding(this);
20662 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20663 %}
20664 ins_pipe( pipe_slow );
20665 %}
20666
20667 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20668 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20669 match(Set dst (MulVL src1 src2));
20670 ins_cost(100);
20671 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20672 ins_encode %{
20673 int vlen_enc = vector_length_encoding(this);
20674 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20675 %}
20676 ins_pipe( pipe_slow );
20677 %}
20678
20679 // Floats vector mul
20680 instruct vmulF(vec dst, vec src) %{
20681 predicate(UseAVX == 0);
20682 match(Set dst (MulVF dst src));
20683 format %{ "mulps $dst,$src\t! mul packedF" %}
20684 ins_encode %{
20685 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20686 %}
20687 ins_pipe( pipe_slow );
20688 %}
20689
20690 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20691 predicate(UseAVX > 0);
20692 match(Set dst (MulVF src1 src2));
20693 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20694 ins_encode %{
20695 int vlen_enc = vector_length_encoding(this);
20696 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20697 %}
20698 ins_pipe( pipe_slow );
20699 %}
20700
20701 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20702 predicate((UseAVX > 0) &&
20703 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20704 match(Set dst (MulVF src (LoadVector mem)));
20705 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20706 ins_encode %{
20707 int vlen_enc = vector_length_encoding(this);
20708 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20709 %}
20710 ins_pipe( pipe_slow );
20711 %}
20712
20713 // Doubles vector mul
20714 instruct vmulD(vec dst, vec src) %{
20715 predicate(UseAVX == 0);
20716 match(Set dst (MulVD dst src));
20717 format %{ "mulpd $dst,$src\t! mul packedD" %}
20718 ins_encode %{
20719 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20720 %}
20721 ins_pipe( pipe_slow );
20722 %}
20723
20724 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20725 predicate(UseAVX > 0);
20726 match(Set dst (MulVD src1 src2));
20727 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20728 ins_encode %{
20729 int vlen_enc = vector_length_encoding(this);
20730 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20731 %}
20732 ins_pipe( pipe_slow );
20733 %}
20734
20735 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20736 predicate((UseAVX > 0) &&
20737 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20738 match(Set dst (MulVD src (LoadVector mem)));
20739 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20740 ins_encode %{
20741 int vlen_enc = vector_length_encoding(this);
20742 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20743 %}
20744 ins_pipe( pipe_slow );
20745 %}
20746
20747 // --------------------------------- DIV --------------------------------------
20748
20749 // Floats vector div
20750 instruct vdivF(vec dst, vec src) %{
20751 predicate(UseAVX == 0);
20752 match(Set dst (DivVF dst src));
20753 format %{ "divps $dst,$src\t! div packedF" %}
20754 ins_encode %{
20755 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20756 %}
20757 ins_pipe( pipe_slow );
20758 %}
20759
20760 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20761 predicate(UseAVX > 0);
20762 match(Set dst (DivVF src1 src2));
20763 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20764 ins_encode %{
20765 int vlen_enc = vector_length_encoding(this);
20766 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20767 %}
20768 ins_pipe( pipe_slow );
20769 %}
20770
20771 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20772 predicate((UseAVX > 0) &&
20773 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20774 match(Set dst (DivVF src (LoadVector mem)));
20775 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20776 ins_encode %{
20777 int vlen_enc = vector_length_encoding(this);
20778 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20779 %}
20780 ins_pipe( pipe_slow );
20781 %}
20782
20783 // Doubles vector div
20784 instruct vdivD(vec dst, vec src) %{
20785 predicate(UseAVX == 0);
20786 match(Set dst (DivVD dst src));
20787 format %{ "divpd $dst,$src\t! div packedD" %}
20788 ins_encode %{
20789 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20790 %}
20791 ins_pipe( pipe_slow );
20792 %}
20793
20794 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20795 predicate(UseAVX > 0);
20796 match(Set dst (DivVD src1 src2));
20797 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20798 ins_encode %{
20799 int vlen_enc = vector_length_encoding(this);
20800 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20801 %}
20802 ins_pipe( pipe_slow );
20803 %}
20804
20805 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20806 predicate((UseAVX > 0) &&
20807 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20808 match(Set dst (DivVD src (LoadVector mem)));
20809 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20810 ins_encode %{
20811 int vlen_enc = vector_length_encoding(this);
20812 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20813 %}
20814 ins_pipe( pipe_slow );
20815 %}
20816
20817 // ------------------------------ MinMax ---------------------------------------
20818
20819 // Byte, Short, Int vector Min/Max
20820 instruct minmax_reg_sse(vec dst, vec src) %{
20821 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20822 UseAVX == 0);
20823 match(Set dst (MinV dst src));
20824 match(Set dst (MaxV dst src));
20825 format %{ "vector_minmax $dst,$src\t! " %}
20826 ins_encode %{
20827 assert(UseSSE >= 4, "required");
20828
20829 int opcode = this->ideal_Opcode();
20830 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20831 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20832 %}
20833 ins_pipe( pipe_slow );
20834 %}
20835
20836 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20837 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20838 UseAVX > 0);
20839 match(Set dst (MinV src1 src2));
20840 match(Set dst (MaxV src1 src2));
20841 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20842 ins_encode %{
20843 int opcode = this->ideal_Opcode();
20844 int vlen_enc = vector_length_encoding(this);
20845 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20846
20847 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20848 %}
20849 ins_pipe( pipe_slow );
20850 %}
20851
20852 // Long vector Min/Max
20853 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20854 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20855 UseAVX == 0);
20856 match(Set dst (MinV dst src));
20857 match(Set dst (MaxV src dst));
20858 effect(TEMP dst, TEMP tmp);
20859 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20860 ins_encode %{
20861 assert(UseSSE >= 4, "required");
20862
20863 int opcode = this->ideal_Opcode();
20864 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20865 assert(elem_bt == T_LONG, "sanity");
20866
20867 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20868 %}
20869 ins_pipe( pipe_slow );
20870 %}
20871
20872 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20873 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20874 UseAVX > 0 && !VM_Version::supports_avx512vl());
20875 match(Set dst (MinV src1 src2));
20876 match(Set dst (MaxV src1 src2));
20877 effect(TEMP dst);
20878 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20879 ins_encode %{
20880 int vlen_enc = vector_length_encoding(this);
20881 int opcode = this->ideal_Opcode();
20882 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20883 assert(elem_bt == T_LONG, "sanity");
20884
20885 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20886 %}
20887 ins_pipe( pipe_slow );
20888 %}
20889
20890 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20891 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20892 Matcher::vector_element_basic_type(n) == T_LONG);
20893 match(Set dst (MinV src1 src2));
20894 match(Set dst (MaxV src1 src2));
20895 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20896 ins_encode %{
20897 assert(UseAVX > 2, "required");
20898
20899 int vlen_enc = vector_length_encoding(this);
20900 int opcode = this->ideal_Opcode();
20901 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20902 assert(elem_bt == T_LONG, "sanity");
20903
20904 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20905 %}
20906 ins_pipe( pipe_slow );
20907 %}
20908
20909 // Float/Double vector Min/Max
20910 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20911 predicate(VM_Version::supports_avx10_2() &&
20912 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20913 match(Set dst (MinV a b));
20914 match(Set dst (MaxV a b));
20915 format %{ "vector_minmaxFP $dst, $a, $b" %}
20916 ins_encode %{
20917 int vlen_enc = vector_length_encoding(this);
20918 int opcode = this->ideal_Opcode();
20919 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20920 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20921 %}
20922 ins_pipe( pipe_slow );
20923 %}
20924
20925 // Float/Double vector Min/Max
20926 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20927 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20928 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20929 UseAVX > 0);
20930 match(Set dst (MinV a b));
20931 match(Set dst (MaxV a b));
20932 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20933 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20934 ins_encode %{
20935 assert(UseAVX > 0, "required");
20936
20937 int opcode = this->ideal_Opcode();
20938 int vlen_enc = vector_length_encoding(this);
20939 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20940
20941 __ vminmax_fp(opcode, elem_bt,
20942 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20943 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20944 %}
20945 ins_pipe( pipe_slow );
20946 %}
20947
20948 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20949 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20950 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20951 match(Set dst (MinV a b));
20952 match(Set dst (MaxV a b));
20953 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20954 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20955 ins_encode %{
20956 assert(UseAVX > 2, "required");
20957
20958 int opcode = this->ideal_Opcode();
20959 int vlen_enc = vector_length_encoding(this);
20960 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20961
20962 __ evminmax_fp(opcode, elem_bt,
20963 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20964 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20965 %}
20966 ins_pipe( pipe_slow );
20967 %}
20968
20969 // ------------------------------ Unsigned vector Min/Max ----------------------
20970
20971 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20972 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20973 match(Set dst (UMinV a b));
20974 match(Set dst (UMaxV a b));
20975 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20976 ins_encode %{
20977 int opcode = this->ideal_Opcode();
20978 int vlen_enc = vector_length_encoding(this);
20979 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20980 assert(is_integral_type(elem_bt), "");
20981 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20982 %}
20983 ins_pipe( pipe_slow );
20984 %}
20985
20986 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20987 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20988 match(Set dst (UMinV a (LoadVector b)));
20989 match(Set dst (UMaxV a (LoadVector b)));
20990 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20991 ins_encode %{
20992 int opcode = this->ideal_Opcode();
20993 int vlen_enc = vector_length_encoding(this);
20994 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20995 assert(is_integral_type(elem_bt), "");
20996 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20997 %}
20998 ins_pipe( pipe_slow );
20999 %}
21000
21001 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21002 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21003 match(Set dst (UMinV a b));
21004 match(Set dst (UMaxV a b));
21005 effect(TEMP xtmp1, TEMP xtmp2);
21006 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21007 ins_encode %{
21008 int opcode = this->ideal_Opcode();
21009 int vlen_enc = vector_length_encoding(this);
21010 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21011 %}
21012 ins_pipe( pipe_slow );
21013 %}
21014
21015 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21016 match(Set dst (UMinV (Binary dst src2) mask));
21017 match(Set dst (UMaxV (Binary dst src2) mask));
21018 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21019 ins_encode %{
21020 int vlen_enc = vector_length_encoding(this);
21021 BasicType bt = Matcher::vector_element_basic_type(this);
21022 int opc = this->ideal_Opcode();
21023 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21024 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21025 %}
21026 ins_pipe( pipe_slow );
21027 %}
21028
21029 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21030 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21031 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21032 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21033 ins_encode %{
21034 int vlen_enc = vector_length_encoding(this);
21035 BasicType bt = Matcher::vector_element_basic_type(this);
21036 int opc = this->ideal_Opcode();
21037 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21038 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21039 %}
21040 ins_pipe( pipe_slow );
21041 %}
21042
21043 // --------------------------------- Signum/CopySign ---------------------------
21044
21045 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21046 match(Set dst (SignumF dst (Binary zero one)));
21047 effect(KILL cr);
21048 format %{ "signumF $dst, $dst" %}
21049 ins_encode %{
21050 int opcode = this->ideal_Opcode();
21051 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21052 %}
21053 ins_pipe( pipe_slow );
21054 %}
21055
21056 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21057 match(Set dst (SignumD dst (Binary zero one)));
21058 effect(KILL cr);
21059 format %{ "signumD $dst, $dst" %}
21060 ins_encode %{
21061 int opcode = this->ideal_Opcode();
21062 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21063 %}
21064 ins_pipe( pipe_slow );
21065 %}
21066
21067 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21068 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21069 match(Set dst (SignumVF src (Binary zero one)));
21070 match(Set dst (SignumVD src (Binary zero one)));
21071 effect(TEMP dst, TEMP xtmp1);
21072 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21073 ins_encode %{
21074 int opcode = this->ideal_Opcode();
21075 int vec_enc = vector_length_encoding(this);
21076 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21077 $xtmp1$$XMMRegister, vec_enc);
21078 %}
21079 ins_pipe( pipe_slow );
21080 %}
21081
21082 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21083 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21084 match(Set dst (SignumVF src (Binary zero one)));
21085 match(Set dst (SignumVD src (Binary zero one)));
21086 effect(TEMP dst, TEMP ktmp1);
21087 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21088 ins_encode %{
21089 int opcode = this->ideal_Opcode();
21090 int vec_enc = vector_length_encoding(this);
21091 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21092 $ktmp1$$KRegister, vec_enc);
21093 %}
21094 ins_pipe( pipe_slow );
21095 %}
21096
21097 // ---------------------------------------
21098 // For copySign use 0xE4 as writemask for vpternlog
21099 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21100 // C (xmm2) is set to 0x7FFFFFFF
21101 // Wherever xmm2 is 0, we want to pick from B (sign)
21102 // Wherever xmm2 is 1, we want to pick from A (src)
21103 //
21104 // A B C Result
21105 // 0 0 0 0
21106 // 0 0 1 0
21107 // 0 1 0 1
21108 // 0 1 1 0
21109 // 1 0 0 0
21110 // 1 0 1 1
21111 // 1 1 0 1
21112 // 1 1 1 1
21113 //
21114 // Result going from high bit to low bit is 0x11100100 = 0xe4
21115 // ---------------------------------------
21116
21117 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21118 match(Set dst (CopySignF dst src));
21119 effect(TEMP tmp1, TEMP tmp2);
21120 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21121 ins_encode %{
21122 __ movl($tmp2$$Register, 0x7FFFFFFF);
21123 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21124 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21125 %}
21126 ins_pipe( pipe_slow );
21127 %}
21128
21129 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21130 match(Set dst (CopySignD dst (Binary src zero)));
21131 ins_cost(100);
21132 effect(TEMP tmp1, TEMP tmp2);
21133 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21134 ins_encode %{
21135 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21136 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21137 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21138 %}
21139 ins_pipe( pipe_slow );
21140 %}
21141
21142 //----------------------------- CompressBits/ExpandBits ------------------------
21143
21144 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21145 predicate(n->bottom_type()->isa_int());
21146 match(Set dst (CompressBits src mask));
21147 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21148 ins_encode %{
21149 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21150 %}
21151 ins_pipe( pipe_slow );
21152 %}
21153
21154 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21155 predicate(n->bottom_type()->isa_int());
21156 match(Set dst (ExpandBits src mask));
21157 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21158 ins_encode %{
21159 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21160 %}
21161 ins_pipe( pipe_slow );
21162 %}
21163
21164 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21165 predicate(n->bottom_type()->isa_int());
21166 match(Set dst (CompressBits src (LoadI mask)));
21167 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21168 ins_encode %{
21169 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21170 %}
21171 ins_pipe( pipe_slow );
21172 %}
21173
21174 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21175 predicate(n->bottom_type()->isa_int());
21176 match(Set dst (ExpandBits src (LoadI mask)));
21177 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21178 ins_encode %{
21179 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21180 %}
21181 ins_pipe( pipe_slow );
21182 %}
21183
21184 // --------------------------------- Sqrt --------------------------------------
21185
21186 instruct vsqrtF_reg(vec dst, vec src) %{
21187 match(Set dst (SqrtVF src));
21188 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21189 ins_encode %{
21190 assert(UseAVX > 0, "required");
21191 int vlen_enc = vector_length_encoding(this);
21192 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21193 %}
21194 ins_pipe( pipe_slow );
21195 %}
21196
21197 instruct vsqrtF_mem(vec dst, memory mem) %{
21198 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21199 match(Set dst (SqrtVF (LoadVector mem)));
21200 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21201 ins_encode %{
21202 assert(UseAVX > 0, "required");
21203 int vlen_enc = vector_length_encoding(this);
21204 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21205 %}
21206 ins_pipe( pipe_slow );
21207 %}
21208
21209 // Floating point vector sqrt
21210 instruct vsqrtD_reg(vec dst, vec src) %{
21211 match(Set dst (SqrtVD src));
21212 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21213 ins_encode %{
21214 assert(UseAVX > 0, "required");
21215 int vlen_enc = vector_length_encoding(this);
21216 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21217 %}
21218 ins_pipe( pipe_slow );
21219 %}
21220
21221 instruct vsqrtD_mem(vec dst, memory mem) %{
21222 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21223 match(Set dst (SqrtVD (LoadVector mem)));
21224 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21225 ins_encode %{
21226 assert(UseAVX > 0, "required");
21227 int vlen_enc = vector_length_encoding(this);
21228 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21229 %}
21230 ins_pipe( pipe_slow );
21231 %}
21232
21233 // ------------------------------ Shift ---------------------------------------
21234
21235 // Left and right shift count vectors are the same on x86
21236 // (only lowest bits of xmm reg are used for count).
21237 instruct vshiftcnt(vec dst, rRegI cnt) %{
21238 match(Set dst (LShiftCntV cnt));
21239 match(Set dst (RShiftCntV cnt));
21240 format %{ "movdl $dst,$cnt\t! load shift count" %}
21241 ins_encode %{
21242 __ movdl($dst$$XMMRegister, $cnt$$Register);
21243 %}
21244 ins_pipe( pipe_slow );
21245 %}
21246
21247 // Byte vector shift
21248 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21249 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21250 match(Set dst ( LShiftVB src shift));
21251 match(Set dst ( RShiftVB src shift));
21252 match(Set dst (URShiftVB src shift));
21253 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21254 format %{"vector_byte_shift $dst,$src,$shift" %}
21255 ins_encode %{
21256 assert(UseSSE > 3, "required");
21257 int opcode = this->ideal_Opcode();
21258 bool sign = (opcode != Op_URShiftVB);
21259 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21260 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21261 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21262 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21263 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21264 %}
21265 ins_pipe( pipe_slow );
21266 %}
21267
21268 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21269 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21270 UseAVX <= 1);
21271 match(Set dst ( LShiftVB src shift));
21272 match(Set dst ( RShiftVB src shift));
21273 match(Set dst (URShiftVB src shift));
21274 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21275 format %{"vector_byte_shift $dst,$src,$shift" %}
21276 ins_encode %{
21277 assert(UseSSE > 3, "required");
21278 int opcode = this->ideal_Opcode();
21279 bool sign = (opcode != Op_URShiftVB);
21280 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21281 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21282 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21283 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21284 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21285 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21286 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21287 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21288 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21289 %}
21290 ins_pipe( pipe_slow );
21291 %}
21292
21293 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21294 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21295 UseAVX > 1);
21296 match(Set dst ( LShiftVB src shift));
21297 match(Set dst ( RShiftVB src shift));
21298 match(Set dst (URShiftVB src shift));
21299 effect(TEMP dst, TEMP tmp);
21300 format %{"vector_byte_shift $dst,$src,$shift" %}
21301 ins_encode %{
21302 int opcode = this->ideal_Opcode();
21303 bool sign = (opcode != Op_URShiftVB);
21304 int vlen_enc = Assembler::AVX_256bit;
21305 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21306 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21307 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21308 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21309 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21310 %}
21311 ins_pipe( pipe_slow );
21312 %}
21313
21314 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21315 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21316 match(Set dst ( LShiftVB src shift));
21317 match(Set dst ( RShiftVB src shift));
21318 match(Set dst (URShiftVB src shift));
21319 effect(TEMP dst, TEMP tmp);
21320 format %{"vector_byte_shift $dst,$src,$shift" %}
21321 ins_encode %{
21322 assert(UseAVX > 1, "required");
21323 int opcode = this->ideal_Opcode();
21324 bool sign = (opcode != Op_URShiftVB);
21325 int vlen_enc = Assembler::AVX_256bit;
21326 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21327 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21328 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21329 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21330 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21331 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21332 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21333 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21334 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21335 %}
21336 ins_pipe( pipe_slow );
21337 %}
21338
21339 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21340 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21341 match(Set dst ( LShiftVB src shift));
21342 match(Set dst (RShiftVB src shift));
21343 match(Set dst (URShiftVB src shift));
21344 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21345 format %{"vector_byte_shift $dst,$src,$shift" %}
21346 ins_encode %{
21347 assert(UseAVX > 2, "required");
21348 int opcode = this->ideal_Opcode();
21349 bool sign = (opcode != Op_URShiftVB);
21350 int vlen_enc = Assembler::AVX_512bit;
21351 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21352 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21353 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21354 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21355 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21356 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21357 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21358 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21359 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21360 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21361 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21362 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21363 %}
21364 ins_pipe( pipe_slow );
21365 %}
21366
21367 // Shorts vector logical right shift produces incorrect Java result
21368 // for negative data because java code convert short value into int with
21369 // sign extension before a shift. But char vectors are fine since chars are
21370 // unsigned values.
21371 // Shorts/Chars vector left shift
21372 instruct vshiftS(vec dst, vec src, vec shift) %{
21373 predicate(!n->as_ShiftV()->is_var_shift());
21374 match(Set dst ( LShiftVS src shift));
21375 match(Set dst ( RShiftVS src shift));
21376 match(Set dst (URShiftVS src shift));
21377 effect(TEMP dst, USE src, USE shift);
21378 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21379 ins_encode %{
21380 int opcode = this->ideal_Opcode();
21381 if (UseAVX > 0) {
21382 int vlen_enc = vector_length_encoding(this);
21383 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21384 } else {
21385 int vlen = Matcher::vector_length(this);
21386 if (vlen == 2) {
21387 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21388 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21389 } else if (vlen == 4) {
21390 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21391 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21392 } else {
21393 assert (vlen == 8, "sanity");
21394 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21395 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21396 }
21397 }
21398 %}
21399 ins_pipe( pipe_slow );
21400 %}
21401
21402 // Integers vector left shift
21403 instruct vshiftI(vec dst, vec src, vec shift) %{
21404 predicate(!n->as_ShiftV()->is_var_shift());
21405 match(Set dst ( LShiftVI src shift));
21406 match(Set dst ( RShiftVI src shift));
21407 match(Set dst (URShiftVI src shift));
21408 effect(TEMP dst, USE src, USE shift);
21409 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21410 ins_encode %{
21411 int opcode = this->ideal_Opcode();
21412 if (UseAVX > 0) {
21413 int vlen_enc = vector_length_encoding(this);
21414 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21415 } else {
21416 int vlen = Matcher::vector_length(this);
21417 if (vlen == 2) {
21418 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21419 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21420 } else {
21421 assert(vlen == 4, "sanity");
21422 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21423 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21424 }
21425 }
21426 %}
21427 ins_pipe( pipe_slow );
21428 %}
21429
21430 // Integers vector left constant shift
21431 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21432 match(Set dst (LShiftVI src (LShiftCntV shift)));
21433 match(Set dst (RShiftVI src (RShiftCntV shift)));
21434 match(Set dst (URShiftVI src (RShiftCntV shift)));
21435 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21436 ins_encode %{
21437 int opcode = this->ideal_Opcode();
21438 if (UseAVX > 0) {
21439 int vector_len = vector_length_encoding(this);
21440 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21441 } else {
21442 int vlen = Matcher::vector_length(this);
21443 if (vlen == 2) {
21444 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21445 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21446 } else {
21447 assert(vlen == 4, "sanity");
21448 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21449 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21450 }
21451 }
21452 %}
21453 ins_pipe( pipe_slow );
21454 %}
21455
21456 // Longs vector shift
21457 instruct vshiftL(vec dst, vec src, vec shift) %{
21458 predicate(!n->as_ShiftV()->is_var_shift());
21459 match(Set dst ( LShiftVL src shift));
21460 match(Set dst (URShiftVL src shift));
21461 effect(TEMP dst, USE src, USE shift);
21462 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21463 ins_encode %{
21464 int opcode = this->ideal_Opcode();
21465 if (UseAVX > 0) {
21466 int vlen_enc = vector_length_encoding(this);
21467 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21468 } else {
21469 assert(Matcher::vector_length(this) == 2, "");
21470 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21471 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21472 }
21473 %}
21474 ins_pipe( pipe_slow );
21475 %}
21476
21477 // Longs vector constant shift
21478 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21479 match(Set dst (LShiftVL src (LShiftCntV shift)));
21480 match(Set dst (URShiftVL src (RShiftCntV shift)));
21481 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21482 ins_encode %{
21483 int opcode = this->ideal_Opcode();
21484 if (UseAVX > 0) {
21485 int vector_len = vector_length_encoding(this);
21486 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21487 } else {
21488 assert(Matcher::vector_length(this) == 2, "");
21489 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21490 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21491 }
21492 %}
21493 ins_pipe( pipe_slow );
21494 %}
21495
21496 // -------------------ArithmeticRightShift -----------------------------------
21497 // Long vector arithmetic right shift
21498 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21499 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21500 match(Set dst (RShiftVL src shift));
21501 effect(TEMP dst, TEMP tmp);
21502 format %{ "vshiftq $dst,$src,$shift" %}
21503 ins_encode %{
21504 uint vlen = Matcher::vector_length(this);
21505 if (vlen == 2) {
21506 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21507 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21508 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21509 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21510 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21511 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21512 } else {
21513 assert(vlen == 4, "sanity");
21514 assert(UseAVX > 1, "required");
21515 int vlen_enc = Assembler::AVX_256bit;
21516 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21517 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21518 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21519 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21520 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21521 }
21522 %}
21523 ins_pipe( pipe_slow );
21524 %}
21525
21526 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21527 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21528 match(Set dst (RShiftVL src shift));
21529 format %{ "vshiftq $dst,$src,$shift" %}
21530 ins_encode %{
21531 int vlen_enc = vector_length_encoding(this);
21532 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21533 %}
21534 ins_pipe( pipe_slow );
21535 %}
21536
21537 // ------------------- Variable Shift -----------------------------
21538 // Byte variable shift
21539 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21540 predicate(Matcher::vector_length(n) <= 8 &&
21541 n->as_ShiftV()->is_var_shift() &&
21542 !VM_Version::supports_avx512bw());
21543 match(Set dst ( LShiftVB src shift));
21544 match(Set dst ( RShiftVB src shift));
21545 match(Set dst (URShiftVB src shift));
21546 effect(TEMP dst, TEMP vtmp);
21547 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21548 ins_encode %{
21549 assert(UseAVX >= 2, "required");
21550
21551 int opcode = this->ideal_Opcode();
21552 int vlen_enc = Assembler::AVX_128bit;
21553 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21554 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21555 %}
21556 ins_pipe( pipe_slow );
21557 %}
21558
21559 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21560 predicate(Matcher::vector_length(n) == 16 &&
21561 n->as_ShiftV()->is_var_shift() &&
21562 !VM_Version::supports_avx512bw());
21563 match(Set dst ( LShiftVB src shift));
21564 match(Set dst ( RShiftVB src shift));
21565 match(Set dst (URShiftVB src shift));
21566 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21567 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21568 ins_encode %{
21569 assert(UseAVX >= 2, "required");
21570
21571 int opcode = this->ideal_Opcode();
21572 int vlen_enc = Assembler::AVX_128bit;
21573 // Shift lower half and get word result in dst
21574 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21575
21576 // Shift upper half and get word result in vtmp1
21577 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21578 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21579 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21580
21581 // Merge and down convert the two word results to byte in dst
21582 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21583 %}
21584 ins_pipe( pipe_slow );
21585 %}
21586
21587 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21588 predicate(Matcher::vector_length(n) == 32 &&
21589 n->as_ShiftV()->is_var_shift() &&
21590 !VM_Version::supports_avx512bw());
21591 match(Set dst ( LShiftVB src shift));
21592 match(Set dst ( RShiftVB src shift));
21593 match(Set dst (URShiftVB src shift));
21594 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21595 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21596 ins_encode %{
21597 assert(UseAVX >= 2, "required");
21598
21599 int opcode = this->ideal_Opcode();
21600 int vlen_enc = Assembler::AVX_128bit;
21601 // Process lower 128 bits and get result in dst
21602 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21603 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21604 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21605 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21606 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21607
21608 // Process higher 128 bits and get result in vtmp3
21609 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21610 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21611 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21612 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21613 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21614 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21615 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21616
21617 // Merge the two results in dst
21618 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21619 %}
21620 ins_pipe( pipe_slow );
21621 %}
21622
21623 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21624 predicate(Matcher::vector_length(n) <= 32 &&
21625 n->as_ShiftV()->is_var_shift() &&
21626 VM_Version::supports_avx512bw());
21627 match(Set dst ( LShiftVB src shift));
21628 match(Set dst ( RShiftVB src shift));
21629 match(Set dst (URShiftVB src shift));
21630 effect(TEMP dst, TEMP vtmp);
21631 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21632 ins_encode %{
21633 assert(UseAVX > 2, "required");
21634
21635 int opcode = this->ideal_Opcode();
21636 int vlen_enc = vector_length_encoding(this);
21637 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21638 %}
21639 ins_pipe( pipe_slow );
21640 %}
21641
21642 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21643 predicate(Matcher::vector_length(n) == 64 &&
21644 n->as_ShiftV()->is_var_shift() &&
21645 VM_Version::supports_avx512bw());
21646 match(Set dst ( LShiftVB src shift));
21647 match(Set dst ( RShiftVB src shift));
21648 match(Set dst (URShiftVB src shift));
21649 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21650 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21651 ins_encode %{
21652 assert(UseAVX > 2, "required");
21653
21654 int opcode = this->ideal_Opcode();
21655 int vlen_enc = Assembler::AVX_256bit;
21656 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21657 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21658 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21659 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21660 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21661 %}
21662 ins_pipe( pipe_slow );
21663 %}
21664
21665 // Short variable shift
21666 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21667 predicate(Matcher::vector_length(n) <= 8 &&
21668 n->as_ShiftV()->is_var_shift() &&
21669 !VM_Version::supports_avx512bw());
21670 match(Set dst ( LShiftVS src shift));
21671 match(Set dst ( RShiftVS src shift));
21672 match(Set dst (URShiftVS src shift));
21673 effect(TEMP dst, TEMP vtmp);
21674 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21675 ins_encode %{
21676 assert(UseAVX >= 2, "required");
21677
21678 int opcode = this->ideal_Opcode();
21679 bool sign = (opcode != Op_URShiftVS);
21680 int vlen_enc = Assembler::AVX_256bit;
21681 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21682 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21683 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21684 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21685 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21686 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21687 %}
21688 ins_pipe( pipe_slow );
21689 %}
21690
21691 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21692 predicate(Matcher::vector_length(n) == 16 &&
21693 n->as_ShiftV()->is_var_shift() &&
21694 !VM_Version::supports_avx512bw());
21695 match(Set dst ( LShiftVS src shift));
21696 match(Set dst ( RShiftVS src shift));
21697 match(Set dst (URShiftVS src shift));
21698 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21699 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21700 ins_encode %{
21701 assert(UseAVX >= 2, "required");
21702
21703 int opcode = this->ideal_Opcode();
21704 bool sign = (opcode != Op_URShiftVS);
21705 int vlen_enc = Assembler::AVX_256bit;
21706 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21707 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21708 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21709 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21710 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21711
21712 // Shift upper half, with result in dst using vtmp1 as TEMP
21713 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21714 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21715 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21716 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21717 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21718 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21719
21720 // Merge lower and upper half result into dst
21721 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21722 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21728 predicate(n->as_ShiftV()->is_var_shift() &&
21729 VM_Version::supports_avx512bw());
21730 match(Set dst ( LShiftVS src shift));
21731 match(Set dst ( RShiftVS src shift));
21732 match(Set dst (URShiftVS src shift));
21733 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21734 ins_encode %{
21735 assert(UseAVX > 2, "required");
21736
21737 int opcode = this->ideal_Opcode();
21738 int vlen_enc = vector_length_encoding(this);
21739 if (!VM_Version::supports_avx512vl()) {
21740 vlen_enc = Assembler::AVX_512bit;
21741 }
21742 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21743 %}
21744 ins_pipe( pipe_slow );
21745 %}
21746
21747 //Integer variable shift
21748 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21749 predicate(n->as_ShiftV()->is_var_shift());
21750 match(Set dst ( LShiftVI src shift));
21751 match(Set dst ( RShiftVI src shift));
21752 match(Set dst (URShiftVI src shift));
21753 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21754 ins_encode %{
21755 assert(UseAVX >= 2, "required");
21756
21757 int opcode = this->ideal_Opcode();
21758 int vlen_enc = vector_length_encoding(this);
21759 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21760 %}
21761 ins_pipe( pipe_slow );
21762 %}
21763
21764 //Long variable shift
21765 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21766 predicate(n->as_ShiftV()->is_var_shift());
21767 match(Set dst ( LShiftVL src shift));
21768 match(Set dst (URShiftVL src shift));
21769 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21770 ins_encode %{
21771 assert(UseAVX >= 2, "required");
21772
21773 int opcode = this->ideal_Opcode();
21774 int vlen_enc = vector_length_encoding(this);
21775 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21776 %}
21777 ins_pipe( pipe_slow );
21778 %}
21779
21780 //Long variable right shift arithmetic
21781 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21782 predicate(Matcher::vector_length(n) <= 4 &&
21783 n->as_ShiftV()->is_var_shift() &&
21784 UseAVX == 2);
21785 match(Set dst (RShiftVL src shift));
21786 effect(TEMP dst, TEMP vtmp);
21787 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21788 ins_encode %{
21789 int opcode = this->ideal_Opcode();
21790 int vlen_enc = vector_length_encoding(this);
21791 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21792 $vtmp$$XMMRegister);
21793 %}
21794 ins_pipe( pipe_slow );
21795 %}
21796
21797 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21798 predicate(n->as_ShiftV()->is_var_shift() &&
21799 UseAVX > 2);
21800 match(Set dst (RShiftVL src shift));
21801 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21802 ins_encode %{
21803 int opcode = this->ideal_Opcode();
21804 int vlen_enc = vector_length_encoding(this);
21805 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21806 %}
21807 ins_pipe( pipe_slow );
21808 %}
21809
21810 // --------------------------------- AND --------------------------------------
21811
21812 instruct vand(vec dst, vec src) %{
21813 predicate(UseAVX == 0);
21814 match(Set dst (AndV dst src));
21815 format %{ "pand $dst,$src\t! and vectors" %}
21816 ins_encode %{
21817 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21818 %}
21819 ins_pipe( pipe_slow );
21820 %}
21821
21822 instruct vand_reg(vec dst, vec src1, vec src2) %{
21823 predicate(UseAVX > 0);
21824 match(Set dst (AndV src1 src2));
21825 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21826 ins_encode %{
21827 int vlen_enc = vector_length_encoding(this);
21828 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21829 %}
21830 ins_pipe( pipe_slow );
21831 %}
21832
21833 instruct vand_mem(vec dst, vec src, memory mem) %{
21834 predicate((UseAVX > 0) &&
21835 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21836 match(Set dst (AndV src (LoadVector mem)));
21837 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21838 ins_encode %{
21839 int vlen_enc = vector_length_encoding(this);
21840 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21841 %}
21842 ins_pipe( pipe_slow );
21843 %}
21844
21845 // --------------------------------- OR ---------------------------------------
21846
21847 instruct vor(vec dst, vec src) %{
21848 predicate(UseAVX == 0);
21849 match(Set dst (OrV dst src));
21850 format %{ "por $dst,$src\t! or vectors" %}
21851 ins_encode %{
21852 __ por($dst$$XMMRegister, $src$$XMMRegister);
21853 %}
21854 ins_pipe( pipe_slow );
21855 %}
21856
21857 instruct vor_reg(vec dst, vec src1, vec src2) %{
21858 predicate(UseAVX > 0);
21859 match(Set dst (OrV src1 src2));
21860 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21861 ins_encode %{
21862 int vlen_enc = vector_length_encoding(this);
21863 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21864 %}
21865 ins_pipe( pipe_slow );
21866 %}
21867
21868 instruct vor_mem(vec dst, vec src, memory mem) %{
21869 predicate((UseAVX > 0) &&
21870 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21871 match(Set dst (OrV src (LoadVector mem)));
21872 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21873 ins_encode %{
21874 int vlen_enc = vector_length_encoding(this);
21875 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21876 %}
21877 ins_pipe( pipe_slow );
21878 %}
21879
21880 // --------------------------------- XOR --------------------------------------
21881
21882 instruct vxor(vec dst, vec src) %{
21883 predicate(UseAVX == 0);
21884 match(Set dst (XorV dst src));
21885 format %{ "pxor $dst,$src\t! xor vectors" %}
21886 ins_encode %{
21887 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21893 predicate(UseAVX > 0);
21894 match(Set dst (XorV src1 src2));
21895 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21896 ins_encode %{
21897 int vlen_enc = vector_length_encoding(this);
21898 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21899 %}
21900 ins_pipe( pipe_slow );
21901 %}
21902
21903 instruct vxor_mem(vec dst, vec src, memory mem) %{
21904 predicate((UseAVX > 0) &&
21905 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21906 match(Set dst (XorV src (LoadVector mem)));
21907 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21908 ins_encode %{
21909 int vlen_enc = vector_length_encoding(this);
21910 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21911 %}
21912 ins_pipe( pipe_slow );
21913 %}
21914
21915 // --------------------------------- VectorCast --------------------------------------
21916
21917 instruct vcastBtoX(vec dst, vec src) %{
21918 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21919 match(Set dst (VectorCastB2X src));
21920 format %{ "vector_cast_b2x $dst,$src\t!" %}
21921 ins_encode %{
21922 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21923 int vlen_enc = vector_length_encoding(this);
21924 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21925 %}
21926 ins_pipe( pipe_slow );
21927 %}
21928
21929 instruct vcastBtoD(legVec dst, legVec src) %{
21930 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21931 match(Set dst (VectorCastB2X src));
21932 format %{ "vector_cast_b2x $dst,$src\t!" %}
21933 ins_encode %{
21934 int vlen_enc = vector_length_encoding(this);
21935 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21936 %}
21937 ins_pipe( pipe_slow );
21938 %}
21939
21940 instruct castStoX(vec dst, vec src) %{
21941 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21942 Matcher::vector_length(n->in(1)) <= 8 && // src
21943 Matcher::vector_element_basic_type(n) == T_BYTE);
21944 match(Set dst (VectorCastS2X src));
21945 format %{ "vector_cast_s2x $dst,$src" %}
21946 ins_encode %{
21947 assert(UseAVX > 0, "required");
21948
21949 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21950 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21951 %}
21952 ins_pipe( pipe_slow );
21953 %}
21954
21955 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21956 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21957 Matcher::vector_length(n->in(1)) == 16 && // src
21958 Matcher::vector_element_basic_type(n) == T_BYTE);
21959 effect(TEMP dst, TEMP vtmp);
21960 match(Set dst (VectorCastS2X src));
21961 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21962 ins_encode %{
21963 assert(UseAVX > 0, "required");
21964
21965 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21966 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21967 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21968 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21969 %}
21970 ins_pipe( pipe_slow );
21971 %}
21972
21973 instruct vcastStoX_evex(vec dst, vec src) %{
21974 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21975 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21976 match(Set dst (VectorCastS2X src));
21977 format %{ "vector_cast_s2x $dst,$src\t!" %}
21978 ins_encode %{
21979 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21980 int src_vlen_enc = vector_length_encoding(this, $src);
21981 int vlen_enc = vector_length_encoding(this);
21982 switch (to_elem_bt) {
21983 case T_BYTE:
21984 if (!VM_Version::supports_avx512vl()) {
21985 vlen_enc = Assembler::AVX_512bit;
21986 }
21987 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21988 break;
21989 case T_INT:
21990 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21991 break;
21992 case T_FLOAT:
21993 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21994 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21995 break;
21996 case T_LONG:
21997 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21998 break;
21999 case T_DOUBLE: {
22000 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22001 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22002 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22003 break;
22004 }
22005 default:
22006 ShouldNotReachHere();
22007 }
22008 %}
22009 ins_pipe( pipe_slow );
22010 %}
22011
22012 instruct castItoX(vec dst, vec src) %{
22013 predicate(UseAVX <= 2 &&
22014 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22015 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22016 match(Set dst (VectorCastI2X src));
22017 format %{ "vector_cast_i2x $dst,$src" %}
22018 ins_encode %{
22019 assert(UseAVX > 0, "required");
22020
22021 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22022 int vlen_enc = vector_length_encoding(this, $src);
22023
22024 if (to_elem_bt == T_BYTE) {
22025 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22026 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22027 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22028 } else {
22029 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22030 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22031 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22032 }
22033 %}
22034 ins_pipe( pipe_slow );
22035 %}
22036
22037 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22038 predicate(UseAVX <= 2 &&
22039 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22040 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22041 match(Set dst (VectorCastI2X src));
22042 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22043 effect(TEMP dst, TEMP vtmp);
22044 ins_encode %{
22045 assert(UseAVX > 0, "required");
22046
22047 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22048 int vlen_enc = vector_length_encoding(this, $src);
22049
22050 if (to_elem_bt == T_BYTE) {
22051 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22052 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22053 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22054 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22055 } else {
22056 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22057 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22058 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22059 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22060 }
22061 %}
22062 ins_pipe( pipe_slow );
22063 %}
22064
22065 instruct vcastItoX_evex(vec dst, vec src) %{
22066 predicate(UseAVX > 2 ||
22067 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22068 match(Set dst (VectorCastI2X src));
22069 format %{ "vector_cast_i2x $dst,$src\t!" %}
22070 ins_encode %{
22071 assert(UseAVX > 0, "required");
22072
22073 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22074 int src_vlen_enc = vector_length_encoding(this, $src);
22075 int dst_vlen_enc = vector_length_encoding(this);
22076 switch (dst_elem_bt) {
22077 case T_BYTE:
22078 if (!VM_Version::supports_avx512vl()) {
22079 src_vlen_enc = Assembler::AVX_512bit;
22080 }
22081 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22082 break;
22083 case T_SHORT:
22084 if (!VM_Version::supports_avx512vl()) {
22085 src_vlen_enc = Assembler::AVX_512bit;
22086 }
22087 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22088 break;
22089 case T_FLOAT:
22090 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22091 break;
22092 case T_LONG:
22093 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22094 break;
22095 case T_DOUBLE:
22096 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22097 break;
22098 default:
22099 ShouldNotReachHere();
22100 }
22101 %}
22102 ins_pipe( pipe_slow );
22103 %}
22104
22105 instruct vcastLtoBS(vec dst, vec src) %{
22106 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22107 UseAVX <= 2);
22108 match(Set dst (VectorCastL2X src));
22109 format %{ "vector_cast_l2x $dst,$src" %}
22110 ins_encode %{
22111 assert(UseAVX > 0, "required");
22112
22113 int vlen = Matcher::vector_length_in_bytes(this, $src);
22114 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22115 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22116 : ExternalAddress(vector_int_to_short_mask());
22117 if (vlen <= 16) {
22118 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22119 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22120 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22121 } else {
22122 assert(vlen <= 32, "required");
22123 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22124 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22125 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22126 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22127 }
22128 if (to_elem_bt == T_BYTE) {
22129 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22130 }
22131 %}
22132 ins_pipe( pipe_slow );
22133 %}
22134
22135 instruct vcastLtoX_evex(vec dst, vec src) %{
22136 predicate(UseAVX > 2 ||
22137 (Matcher::vector_element_basic_type(n) == T_INT ||
22138 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22139 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22140 match(Set dst (VectorCastL2X src));
22141 format %{ "vector_cast_l2x $dst,$src\t!" %}
22142 ins_encode %{
22143 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22144 int vlen = Matcher::vector_length_in_bytes(this, $src);
22145 int vlen_enc = vector_length_encoding(this, $src);
22146 switch (to_elem_bt) {
22147 case T_BYTE:
22148 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22149 vlen_enc = Assembler::AVX_512bit;
22150 }
22151 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22152 break;
22153 case T_SHORT:
22154 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22155 vlen_enc = Assembler::AVX_512bit;
22156 }
22157 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22158 break;
22159 case T_INT:
22160 if (vlen == 8) {
22161 if ($dst$$XMMRegister != $src$$XMMRegister) {
22162 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22163 }
22164 } else if (vlen == 16) {
22165 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22166 } else if (vlen == 32) {
22167 if (UseAVX > 2) {
22168 if (!VM_Version::supports_avx512vl()) {
22169 vlen_enc = Assembler::AVX_512bit;
22170 }
22171 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22172 } else {
22173 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22174 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22175 }
22176 } else { // vlen == 64
22177 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22178 }
22179 break;
22180 case T_FLOAT:
22181 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22182 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22183 break;
22184 case T_DOUBLE:
22185 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22186 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22187 break;
22188
22189 default: assert(false, "%s", type2name(to_elem_bt));
22190 }
22191 %}
22192 ins_pipe( pipe_slow );
22193 %}
22194
22195 instruct vcastFtoD_reg(vec dst, vec src) %{
22196 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22197 match(Set dst (VectorCastF2X src));
22198 format %{ "vector_cast_f2d $dst,$src\t!" %}
22199 ins_encode %{
22200 int vlen_enc = vector_length_encoding(this);
22201 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22202 %}
22203 ins_pipe( pipe_slow );
22204 %}
22205
22206
22207 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22208 predicate(!VM_Version::supports_avx10_2() &&
22209 !VM_Version::supports_avx512vl() &&
22210 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22211 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22212 is_integral_type(Matcher::vector_element_basic_type(n)));
22213 match(Set dst (VectorCastF2X src));
22214 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22215 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22216 ins_encode %{
22217 int vlen_enc = vector_length_encoding(this, $src);
22218 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22219 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22220 // 32 bit addresses for register indirect addressing mode since stub constants
22221 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22222 // However, targets are free to increase this limit, but having a large code cache size
22223 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22224 // cap we save a temporary register allocation which in limiting case can prevent
22225 // spilling in high register pressure blocks.
22226 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22227 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22228 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22229 %}
22230 ins_pipe( pipe_slow );
22231 %}
22232
22233 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22234 predicate(!VM_Version::supports_avx10_2() &&
22235 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22236 is_integral_type(Matcher::vector_element_basic_type(n)));
22237 match(Set dst (VectorCastF2X src));
22238 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22239 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22240 ins_encode %{
22241 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22242 if (to_elem_bt == T_LONG) {
22243 int vlen_enc = vector_length_encoding(this);
22244 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22245 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22246 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22247 } else {
22248 int vlen_enc = vector_length_encoding(this, $src);
22249 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22250 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22251 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22252 }
22253 %}
22254 ins_pipe( pipe_slow );
22255 %}
22256
22257 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22258 predicate(VM_Version::supports_avx10_2() &&
22259 is_integral_type(Matcher::vector_element_basic_type(n)));
22260 match(Set dst (VectorCastF2X src));
22261 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22262 ins_encode %{
22263 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22264 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22265 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22266 %}
22267 ins_pipe( pipe_slow );
22268 %}
22269
22270 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22271 predicate(VM_Version::supports_avx10_2() &&
22272 is_integral_type(Matcher::vector_element_basic_type(n)));
22273 match(Set dst (VectorCastF2X (LoadVector src)));
22274 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22275 ins_encode %{
22276 int vlen = Matcher::vector_length(this);
22277 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22278 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22279 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22280 %}
22281 ins_pipe( pipe_slow );
22282 %}
22283
22284 instruct vcastDtoF_reg(vec dst, vec src) %{
22285 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22286 match(Set dst (VectorCastD2X src));
22287 format %{ "vector_cast_d2x $dst,$src\t!" %}
22288 ins_encode %{
22289 int vlen_enc = vector_length_encoding(this, $src);
22290 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22291 %}
22292 ins_pipe( pipe_slow );
22293 %}
22294
22295 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22296 predicate(!VM_Version::supports_avx10_2() &&
22297 !VM_Version::supports_avx512vl() &&
22298 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22299 is_integral_type(Matcher::vector_element_basic_type(n)));
22300 match(Set dst (VectorCastD2X src));
22301 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22302 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22303 ins_encode %{
22304 int vlen_enc = vector_length_encoding(this, $src);
22305 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22306 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22307 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22308 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22309 %}
22310 ins_pipe( pipe_slow );
22311 %}
22312
22313 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22314 predicate(!VM_Version::supports_avx10_2() &&
22315 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22316 is_integral_type(Matcher::vector_element_basic_type(n)));
22317 match(Set dst (VectorCastD2X src));
22318 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22319 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22320 ins_encode %{
22321 int vlen_enc = vector_length_encoding(this, $src);
22322 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22323 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22324 ExternalAddress(vector_float_signflip());
22325 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22326 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22327 %}
22328 ins_pipe( pipe_slow );
22329 %}
22330
22331 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22332 predicate(VM_Version::supports_avx10_2() &&
22333 is_integral_type(Matcher::vector_element_basic_type(n)));
22334 match(Set dst (VectorCastD2X src));
22335 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22336 ins_encode %{
22337 int vlen_enc = vector_length_encoding(this, $src);
22338 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22339 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22340 %}
22341 ins_pipe( pipe_slow );
22342 %}
22343
22344 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22345 predicate(VM_Version::supports_avx10_2() &&
22346 is_integral_type(Matcher::vector_element_basic_type(n)));
22347 match(Set dst (VectorCastD2X (LoadVector src)));
22348 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22349 ins_encode %{
22350 int vlen = Matcher::vector_length(this);
22351 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22352 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22353 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22354 %}
22355 ins_pipe( pipe_slow );
22356 %}
22357
22358 instruct vucast(vec dst, vec src) %{
22359 match(Set dst (VectorUCastB2X src));
22360 match(Set dst (VectorUCastS2X src));
22361 match(Set dst (VectorUCastI2X src));
22362 format %{ "vector_ucast $dst,$src\t!" %}
22363 ins_encode %{
22364 assert(UseAVX > 0, "required");
22365
22366 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22367 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22368 int vlen_enc = vector_length_encoding(this);
22369 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22370 %}
22371 ins_pipe( pipe_slow );
22372 %}
22373
22374 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22375 predicate(!VM_Version::supports_avx512vl() &&
22376 Matcher::vector_length_in_bytes(n) < 64 &&
22377 Matcher::vector_element_basic_type(n) == T_INT);
22378 match(Set dst (RoundVF src));
22379 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22380 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22381 ins_encode %{
22382 int vlen_enc = vector_length_encoding(this);
22383 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22384 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22385 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22386 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22387 %}
22388 ins_pipe( pipe_slow );
22389 %}
22390
22391 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22392 predicate((VM_Version::supports_avx512vl() ||
22393 Matcher::vector_length_in_bytes(n) == 64) &&
22394 Matcher::vector_element_basic_type(n) == T_INT);
22395 match(Set dst (RoundVF src));
22396 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22397 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22398 ins_encode %{
22399 int vlen_enc = vector_length_encoding(this);
22400 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22401 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22402 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22403 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22404 %}
22405 ins_pipe( pipe_slow );
22406 %}
22407
22408 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22409 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22410 match(Set dst (RoundVD src));
22411 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22412 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22413 ins_encode %{
22414 int vlen_enc = vector_length_encoding(this);
22415 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22416 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22417 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22418 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22419 %}
22420 ins_pipe( pipe_slow );
22421 %}
22422
22423 // --------------------------------- VectorMaskCmp --------------------------------------
22424
22425 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22426 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22427 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22428 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22429 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22430 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22431 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22432 ins_encode %{
22433 int vlen_enc = vector_length_encoding(this, $src1);
22434 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22435 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22436 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22437 } else {
22438 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22439 }
22440 %}
22441 ins_pipe( pipe_slow );
22442 %}
22443
22444 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22445 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22446 n->bottom_type()->isa_vectmask() == nullptr &&
22447 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22448 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22449 effect(TEMP ktmp);
22450 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22451 ins_encode %{
22452 int vlen_enc = Assembler::AVX_512bit;
22453 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22454 KRegister mask = k0; // The comparison itself is not being masked.
22455 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22456 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22457 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22458 } else {
22459 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22460 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22461 }
22462 %}
22463 ins_pipe( pipe_slow );
22464 %}
22465
22466 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22467 predicate(n->bottom_type()->isa_vectmask() &&
22468 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22469 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22470 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22471 ins_encode %{
22472 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22473 int vlen_enc = vector_length_encoding(this, $src1);
22474 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22475 KRegister mask = k0; // The comparison itself is not being masked.
22476 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22477 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22478 } else {
22479 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22480 }
22481 %}
22482 ins_pipe( pipe_slow );
22483 %}
22484
22485 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22486 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22487 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22488 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22489 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22490 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22491 (n->in(2)->get_int() == BoolTest::eq ||
22492 n->in(2)->get_int() == BoolTest::lt ||
22493 n->in(2)->get_int() == BoolTest::gt)); // cond
22494 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22495 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22496 ins_encode %{
22497 int vlen_enc = vector_length_encoding(this, $src1);
22498 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22499 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22500 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22501 %}
22502 ins_pipe( pipe_slow );
22503 %}
22504
22505 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22506 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22507 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22508 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22509 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22510 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22511 (n->in(2)->get_int() == BoolTest::ne ||
22512 n->in(2)->get_int() == BoolTest::le ||
22513 n->in(2)->get_int() == BoolTest::ge)); // cond
22514 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22515 effect(TEMP dst, TEMP xtmp);
22516 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22517 ins_encode %{
22518 int vlen_enc = vector_length_encoding(this, $src1);
22519 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22520 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22521 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22522 %}
22523 ins_pipe( pipe_slow );
22524 %}
22525
22526 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22527 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22528 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22529 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22530 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22531 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22532 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22533 effect(TEMP dst, TEMP xtmp);
22534 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22535 ins_encode %{
22536 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22537 int vlen_enc = vector_length_encoding(this, $src1);
22538 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22539 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22540
22541 if (vlen_enc == Assembler::AVX_128bit) {
22542 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22543 } else {
22544 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22545 }
22546 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22547 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22548 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22549 %}
22550 ins_pipe( pipe_slow );
22551 %}
22552
22553 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22554 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22555 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22556 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22557 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22558 effect(TEMP ktmp);
22559 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22560 ins_encode %{
22561 assert(UseAVX > 2, "required");
22562
22563 int vlen_enc = vector_length_encoding(this, $src1);
22564 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22565 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22566 KRegister mask = k0; // The comparison itself is not being masked.
22567 bool merge = false;
22568 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22569
22570 switch (src1_elem_bt) {
22571 case T_INT: {
22572 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22573 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22574 break;
22575 }
22576 case T_LONG: {
22577 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22578 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22579 break;
22580 }
22581 default: assert(false, "%s", type2name(src1_elem_bt));
22582 }
22583 %}
22584 ins_pipe( pipe_slow );
22585 %}
22586
22587
22588 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22589 predicate(n->bottom_type()->isa_vectmask() &&
22590 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22591 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22592 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22593 ins_encode %{
22594 assert(UseAVX > 2, "required");
22595 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22596
22597 int vlen_enc = vector_length_encoding(this, $src1);
22598 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22599 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22600 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22601
22602 // Comparison i
22603 switch (src1_elem_bt) {
22604 case T_BYTE: {
22605 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22606 break;
22607 }
22608 case T_SHORT: {
22609 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22610 break;
22611 }
22612 case T_INT: {
22613 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22614 break;
22615 }
22616 case T_LONG: {
22617 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22618 break;
22619 }
22620 default: assert(false, "%s", type2name(src1_elem_bt));
22621 }
22622 %}
22623 ins_pipe( pipe_slow );
22624 %}
22625
22626 // Extract
22627
22628 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22629 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22630 match(Set dst (ExtractI src idx));
22631 match(Set dst (ExtractS src idx));
22632 match(Set dst (ExtractB src idx));
22633 format %{ "extractI $dst,$src,$idx\t!" %}
22634 ins_encode %{
22635 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22636
22637 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22638 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22639 %}
22640 ins_pipe( pipe_slow );
22641 %}
22642
22643 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22644 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22645 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22646 match(Set dst (ExtractI src idx));
22647 match(Set dst (ExtractS src idx));
22648 match(Set dst (ExtractB src idx));
22649 effect(TEMP vtmp);
22650 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22651 ins_encode %{
22652 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22653
22654 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22655 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22656 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22657 %}
22658 ins_pipe( pipe_slow );
22659 %}
22660
22661 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22662 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22663 match(Set dst (ExtractL src idx));
22664 format %{ "extractL $dst,$src,$idx\t!" %}
22665 ins_encode %{
22666 assert(UseSSE >= 4, "required");
22667 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22668
22669 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22670 %}
22671 ins_pipe( pipe_slow );
22672 %}
22673
22674 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22675 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22676 Matcher::vector_length(n->in(1)) == 8); // src
22677 match(Set dst (ExtractL src idx));
22678 effect(TEMP vtmp);
22679 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22680 ins_encode %{
22681 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22682
22683 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22684 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22685 %}
22686 ins_pipe( pipe_slow );
22687 %}
22688
22689 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22690 predicate(Matcher::vector_length(n->in(1)) <= 4);
22691 match(Set dst (ExtractF src idx));
22692 effect(TEMP dst, TEMP vtmp);
22693 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22694 ins_encode %{
22695 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22696
22697 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22698 %}
22699 ins_pipe( pipe_slow );
22700 %}
22701
22702 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22703 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22704 Matcher::vector_length(n->in(1)/*src*/) == 16);
22705 match(Set dst (ExtractF src idx));
22706 effect(TEMP vtmp);
22707 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22708 ins_encode %{
22709 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22710
22711 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22712 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22713 %}
22714 ins_pipe( pipe_slow );
22715 %}
22716
22717 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22718 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22719 match(Set dst (ExtractD src idx));
22720 format %{ "extractD $dst,$src,$idx\t!" %}
22721 ins_encode %{
22722 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22723
22724 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22725 %}
22726 ins_pipe( pipe_slow );
22727 %}
22728
22729 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22730 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22731 Matcher::vector_length(n->in(1)) == 8); // src
22732 match(Set dst (ExtractD src idx));
22733 effect(TEMP vtmp);
22734 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22735 ins_encode %{
22736 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22737
22738 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22739 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22740 %}
22741 ins_pipe( pipe_slow );
22742 %}
22743
22744 // --------------------------------- Vector Blend --------------------------------------
22745
22746 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22747 predicate(UseAVX == 0);
22748 match(Set dst (VectorBlend (Binary dst src) mask));
22749 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22750 effect(TEMP tmp);
22751 ins_encode %{
22752 assert(UseSSE >= 4, "required");
22753
22754 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22755 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22756 }
22757 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22758 %}
22759 ins_pipe( pipe_slow );
22760 %}
22761
22762 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22763 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22764 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22765 Matcher::vector_length_in_bytes(n) <= 32 &&
22766 is_integral_type(Matcher::vector_element_basic_type(n)));
22767 match(Set dst (VectorBlend (Binary src1 src2) mask));
22768 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22769 ins_encode %{
22770 int vlen_enc = vector_length_encoding(this);
22771 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22772 %}
22773 ins_pipe( pipe_slow );
22774 %}
22775
22776 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22777 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22778 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22779 Matcher::vector_length_in_bytes(n) <= 32 &&
22780 !is_integral_type(Matcher::vector_element_basic_type(n)));
22781 match(Set dst (VectorBlend (Binary src1 src2) mask));
22782 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22783 ins_encode %{
22784 int vlen_enc = vector_length_encoding(this);
22785 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22786 %}
22787 ins_pipe( pipe_slow );
22788 %}
22789
22790 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22791 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22792 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22793 Matcher::vector_length_in_bytes(n) <= 32);
22794 match(Set dst (VectorBlend (Binary src1 src2) mask));
22795 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22796 effect(TEMP vtmp, TEMP dst);
22797 ins_encode %{
22798 int vlen_enc = vector_length_encoding(this);
22799 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22800 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22801 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22802 %}
22803 ins_pipe( pipe_slow );
22804 %}
22805
22806 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22807 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22808 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22809 match(Set dst (VectorBlend (Binary src1 src2) mask));
22810 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22811 effect(TEMP ktmp);
22812 ins_encode %{
22813 int vlen_enc = Assembler::AVX_512bit;
22814 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22815 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22816 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22817 %}
22818 ins_pipe( pipe_slow );
22819 %}
22820
22821
22822 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22823 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22824 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22825 VM_Version::supports_avx512bw()));
22826 match(Set dst (VectorBlend (Binary src1 src2) mask));
22827 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22828 ins_encode %{
22829 int vlen_enc = vector_length_encoding(this);
22830 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22831 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22832 %}
22833 ins_pipe( pipe_slow );
22834 %}
22835
22836 // --------------------------------- ABS --------------------------------------
22837 // a = |a|
22838 instruct vabsB_reg(vec dst, vec src) %{
22839 match(Set dst (AbsVB src));
22840 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22841 ins_encode %{
22842 uint vlen = Matcher::vector_length(this);
22843 if (vlen <= 16) {
22844 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22845 } else {
22846 int vlen_enc = vector_length_encoding(this);
22847 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22848 }
22849 %}
22850 ins_pipe( pipe_slow );
22851 %}
22852
22853 instruct vabsS_reg(vec dst, vec src) %{
22854 match(Set dst (AbsVS src));
22855 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22856 ins_encode %{
22857 uint vlen = Matcher::vector_length(this);
22858 if (vlen <= 8) {
22859 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22860 } else {
22861 int vlen_enc = vector_length_encoding(this);
22862 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22863 }
22864 %}
22865 ins_pipe( pipe_slow );
22866 %}
22867
22868 instruct vabsI_reg(vec dst, vec src) %{
22869 match(Set dst (AbsVI src));
22870 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22871 ins_encode %{
22872 uint vlen = Matcher::vector_length(this);
22873 if (vlen <= 4) {
22874 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22875 } else {
22876 int vlen_enc = vector_length_encoding(this);
22877 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22878 }
22879 %}
22880 ins_pipe( pipe_slow );
22881 %}
22882
22883 instruct vabsL_reg(vec dst, vec src) %{
22884 match(Set dst (AbsVL src));
22885 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22886 ins_encode %{
22887 assert(UseAVX > 2, "required");
22888 int vlen_enc = vector_length_encoding(this);
22889 if (!VM_Version::supports_avx512vl()) {
22890 vlen_enc = Assembler::AVX_512bit;
22891 }
22892 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22893 %}
22894 ins_pipe( pipe_slow );
22895 %}
22896
22897 // --------------------------------- ABSNEG --------------------------------------
22898
22899 instruct vabsnegF(vec dst, vec src) %{
22900 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22901 match(Set dst (AbsVF src));
22902 match(Set dst (NegVF src));
22903 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22904 ins_cost(150);
22905 ins_encode %{
22906 int opcode = this->ideal_Opcode();
22907 int vlen = Matcher::vector_length(this);
22908 if (vlen == 2) {
22909 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22910 } else {
22911 assert(vlen == 8 || vlen == 16, "required");
22912 int vlen_enc = vector_length_encoding(this);
22913 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22914 }
22915 %}
22916 ins_pipe( pipe_slow );
22917 %}
22918
22919 instruct vabsneg4F(vec dst) %{
22920 predicate(Matcher::vector_length(n) == 4);
22921 match(Set dst (AbsVF dst));
22922 match(Set dst (NegVF dst));
22923 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22924 ins_cost(150);
22925 ins_encode %{
22926 int opcode = this->ideal_Opcode();
22927 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22928 %}
22929 ins_pipe( pipe_slow );
22930 %}
22931
22932 instruct vabsnegD(vec dst, vec src) %{
22933 match(Set dst (AbsVD src));
22934 match(Set dst (NegVD src));
22935 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22936 ins_encode %{
22937 int opcode = this->ideal_Opcode();
22938 uint vlen = Matcher::vector_length(this);
22939 if (vlen == 2) {
22940 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22941 } else {
22942 int vlen_enc = vector_length_encoding(this);
22943 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22944 }
22945 %}
22946 ins_pipe( pipe_slow );
22947 %}
22948
22949 //------------------------------------- VectorTest --------------------------------------------
22950
22951 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22952 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22953 match(Set cr (VectorTest src1 src2));
22954 effect(TEMP vtmp);
22955 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22956 ins_encode %{
22957 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22958 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22959 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22960 %}
22961 ins_pipe( pipe_slow );
22962 %}
22963
22964 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22965 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22966 match(Set cr (VectorTest src1 src2));
22967 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22968 ins_encode %{
22969 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22970 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22971 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22972 %}
22973 ins_pipe( pipe_slow );
22974 %}
22975
22976 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22977 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22978 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22979 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22980 match(Set cr (VectorTest src1 src2));
22981 effect(TEMP tmp);
22982 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22983 ins_encode %{
22984 uint masklen = Matcher::vector_length(this, $src1);
22985 __ kmovwl($tmp$$Register, $src1$$KRegister);
22986 __ andl($tmp$$Register, (1 << masklen) - 1);
22987 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22988 %}
22989 ins_pipe( pipe_slow );
22990 %}
22991
22992 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22993 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22994 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22995 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22996 match(Set cr (VectorTest src1 src2));
22997 effect(TEMP tmp);
22998 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22999 ins_encode %{
23000 uint masklen = Matcher::vector_length(this, $src1);
23001 __ kmovwl($tmp$$Register, $src1$$KRegister);
23002 __ andl($tmp$$Register, (1 << masklen) - 1);
23003 %}
23004 ins_pipe( pipe_slow );
23005 %}
23006
23007 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23008 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23009 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23010 match(Set cr (VectorTest src1 src2));
23011 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23012 ins_encode %{
23013 uint masklen = Matcher::vector_length(this, $src1);
23014 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23015 %}
23016 ins_pipe( pipe_slow );
23017 %}
23018
23019 //------------------------------------- LoadMask --------------------------------------------
23020
23021 instruct loadMask(legVec dst, legVec src) %{
23022 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23023 match(Set dst (VectorLoadMask src));
23024 effect(TEMP dst);
23025 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23026 ins_encode %{
23027 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23028 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23029 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23030 %}
23031 ins_pipe( pipe_slow );
23032 %}
23033
23034 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23035 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23036 match(Set dst (VectorLoadMask src));
23037 effect(TEMP xtmp);
23038 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23039 ins_encode %{
23040 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23041 true, Assembler::AVX_512bit);
23042 %}
23043 ins_pipe( pipe_slow );
23044 %}
23045
23046 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23047 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23048 match(Set dst (VectorLoadMask src));
23049 effect(TEMP xtmp);
23050 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23051 ins_encode %{
23052 int vlen_enc = vector_length_encoding(in(1));
23053 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23054 false, vlen_enc);
23055 %}
23056 ins_pipe( pipe_slow );
23057 %}
23058
23059 //------------------------------------- StoreMask --------------------------------------------
23060
23061 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23062 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23063 match(Set dst (VectorStoreMask src size));
23064 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23065 ins_encode %{
23066 int vlen = Matcher::vector_length(this);
23067 if (vlen <= 16 && UseAVX <= 2) {
23068 assert(UseSSE >= 3, "required");
23069 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23070 } else {
23071 assert(UseAVX > 0, "required");
23072 int src_vlen_enc = vector_length_encoding(this, $src);
23073 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23074 }
23075 %}
23076 ins_pipe( pipe_slow );
23077 %}
23078
23079 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23080 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23081 match(Set dst (VectorStoreMask src size));
23082 effect(TEMP_DEF dst, TEMP xtmp);
23083 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23084 ins_encode %{
23085 int vlen_enc = Assembler::AVX_128bit;
23086 int vlen = Matcher::vector_length(this);
23087 if (vlen <= 8) {
23088 assert(UseSSE >= 3, "required");
23089 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23090 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23091 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23092 } else {
23093 assert(UseAVX > 0, "required");
23094 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23095 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23096 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23097 }
23098 %}
23099 ins_pipe( pipe_slow );
23100 %}
23101
23102 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23103 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23104 match(Set dst (VectorStoreMask src size));
23105 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23106 effect(TEMP_DEF dst, TEMP xtmp);
23107 ins_encode %{
23108 int vlen_enc = Assembler::AVX_128bit;
23109 int vlen = Matcher::vector_length(this);
23110 if (vlen <= 4) {
23111 assert(UseSSE >= 3, "required");
23112 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23113 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23114 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23115 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23116 } else {
23117 assert(UseAVX > 0, "required");
23118 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23119 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23120 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23121 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23122 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23123 }
23124 %}
23125 ins_pipe( pipe_slow );
23126 %}
23127
23128 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23129 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23130 match(Set dst (VectorStoreMask src size));
23131 effect(TEMP_DEF dst, TEMP xtmp);
23132 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23133 ins_encode %{
23134 assert(UseSSE >= 3, "required");
23135 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23136 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23137 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23138 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23139 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23140 %}
23141 ins_pipe( pipe_slow );
23142 %}
23143
23144 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23145 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23146 match(Set dst (VectorStoreMask src size));
23147 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23148 effect(TEMP_DEF dst, TEMP vtmp);
23149 ins_encode %{
23150 int vlen_enc = Assembler::AVX_128bit;
23151 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23152 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23153 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23154 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23155 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23156 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23157 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23158 %}
23159 ins_pipe( pipe_slow );
23160 %}
23161
23162 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23163 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23164 match(Set dst (VectorStoreMask src size));
23165 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23166 ins_encode %{
23167 int src_vlen_enc = vector_length_encoding(this, $src);
23168 int dst_vlen_enc = vector_length_encoding(this);
23169 if (!VM_Version::supports_avx512vl()) {
23170 src_vlen_enc = Assembler::AVX_512bit;
23171 }
23172 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23173 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23174 %}
23175 ins_pipe( pipe_slow );
23176 %}
23177
23178 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23179 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23180 match(Set dst (VectorStoreMask src size));
23181 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23182 ins_encode %{
23183 int src_vlen_enc = vector_length_encoding(this, $src);
23184 int dst_vlen_enc = vector_length_encoding(this);
23185 if (!VM_Version::supports_avx512vl()) {
23186 src_vlen_enc = Assembler::AVX_512bit;
23187 }
23188 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23189 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23190 %}
23191 ins_pipe( pipe_slow );
23192 %}
23193
23194 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23195 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23196 match(Set dst (VectorStoreMask mask size));
23197 effect(TEMP_DEF dst);
23198 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23199 ins_encode %{
23200 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23201 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23202 false, Assembler::AVX_512bit, noreg);
23203 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23204 %}
23205 ins_pipe( pipe_slow );
23206 %}
23207
23208 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23209 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23210 match(Set dst (VectorStoreMask mask size));
23211 effect(TEMP_DEF dst);
23212 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23213 ins_encode %{
23214 int dst_vlen_enc = vector_length_encoding(this);
23215 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23216 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23217 %}
23218 ins_pipe( pipe_slow );
23219 %}
23220
23221 instruct vmaskcast_evex(kReg dst) %{
23222 match(Set dst (VectorMaskCast dst));
23223 ins_cost(0);
23224 format %{ "vector_mask_cast $dst" %}
23225 ins_encode %{
23226 // empty
23227 %}
23228 ins_pipe(empty);
23229 %}
23230
23231 instruct vmaskcast(vec dst) %{
23232 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23233 match(Set dst (VectorMaskCast dst));
23234 ins_cost(0);
23235 format %{ "vector_mask_cast $dst" %}
23236 ins_encode %{
23237 // empty
23238 %}
23239 ins_pipe(empty);
23240 %}
23241
23242 instruct vmaskcast_avx(vec dst, vec src) %{
23243 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23244 match(Set dst (VectorMaskCast src));
23245 format %{ "vector_mask_cast $dst, $src" %}
23246 ins_encode %{
23247 int vlen = Matcher::vector_length(this);
23248 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23249 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23250 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23251 %}
23252 ins_pipe(pipe_slow);
23253 %}
23254
23255 //-------------------------------- Load Iota Indices ----------------------------------
23256
23257 instruct loadIotaIndices(vec dst, immI_0 src) %{
23258 match(Set dst (VectorLoadConst src));
23259 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23260 ins_encode %{
23261 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23262 BasicType bt = Matcher::vector_element_basic_type(this);
23263 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23264 %}
23265 ins_pipe( pipe_slow );
23266 %}
23267
23268 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23269 match(Set dst (PopulateIndex src1 src2));
23270 effect(TEMP dst, TEMP vtmp);
23271 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23272 ins_encode %{
23273 assert($src2$$constant == 1, "required");
23274 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23275 int vlen_enc = vector_length_encoding(this);
23276 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23277 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23278 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23279 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23280 %}
23281 ins_pipe( pipe_slow );
23282 %}
23283
23284 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23285 match(Set dst (PopulateIndex src1 src2));
23286 effect(TEMP dst, TEMP vtmp);
23287 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23288 ins_encode %{
23289 assert($src2$$constant == 1, "required");
23290 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23291 int vlen_enc = vector_length_encoding(this);
23292 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23293 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23294 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23295 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23296 %}
23297 ins_pipe( pipe_slow );
23298 %}
23299
23300 //-------------------------------- Rearrange ----------------------------------
23301
23302 // LoadShuffle/Rearrange for Byte
23303 instruct rearrangeB(vec dst, vec shuffle) %{
23304 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23305 Matcher::vector_length(n) < 32);
23306 match(Set dst (VectorRearrange dst shuffle));
23307 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23308 ins_encode %{
23309 assert(UseSSE >= 4, "required");
23310 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23311 %}
23312 ins_pipe( pipe_slow );
23313 %}
23314
23315 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23316 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23317 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23318 match(Set dst (VectorRearrange src shuffle));
23319 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23320 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23321 ins_encode %{
23322 assert(UseAVX >= 2, "required");
23323 // Swap src into vtmp1
23324 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23325 // Shuffle swapped src to get entries from other 128 bit lane
23326 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23327 // Shuffle original src to get entries from self 128 bit lane
23328 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23329 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23330 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23331 // Perform the blend
23332 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23333 %}
23334 ins_pipe( pipe_slow );
23335 %}
23336
23337
23338 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23339 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23340 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23341 match(Set dst (VectorRearrange src shuffle));
23342 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23343 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23344 ins_encode %{
23345 int vlen_enc = vector_length_encoding(this);
23346 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23347 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23348 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23349 %}
23350 ins_pipe( pipe_slow );
23351 %}
23352
23353 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23354 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23355 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23356 match(Set dst (VectorRearrange src shuffle));
23357 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23358 ins_encode %{
23359 int vlen_enc = vector_length_encoding(this);
23360 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23361 %}
23362 ins_pipe( pipe_slow );
23363 %}
23364
23365 // LoadShuffle/Rearrange for Short
23366
23367 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23368 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23369 !VM_Version::supports_avx512bw());
23370 match(Set dst (VectorLoadShuffle src));
23371 effect(TEMP dst, TEMP vtmp);
23372 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23373 ins_encode %{
23374 // Create a byte shuffle mask from short shuffle mask
23375 // only byte shuffle instruction available on these platforms
23376 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23377 if (UseAVX == 0) {
23378 assert(vlen_in_bytes <= 16, "required");
23379 // Multiply each shuffle by two to get byte index
23380 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23381 __ psllw($vtmp$$XMMRegister, 1);
23382
23383 // Duplicate to create 2 copies of byte index
23384 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23385 __ psllw($dst$$XMMRegister, 8);
23386 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23387
23388 // Add one to get alternate byte index
23389 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23390 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23391 } else {
23392 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23393 int vlen_enc = vector_length_encoding(this);
23394 // Multiply each shuffle by two to get byte index
23395 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23396
23397 // Duplicate to create 2 copies of byte index
23398 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23399 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23400
23401 // Add one to get alternate byte index
23402 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23403 }
23404 %}
23405 ins_pipe( pipe_slow );
23406 %}
23407
23408 instruct rearrangeS(vec dst, vec shuffle) %{
23409 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23410 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23411 match(Set dst (VectorRearrange dst shuffle));
23412 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23413 ins_encode %{
23414 assert(UseSSE >= 4, "required");
23415 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23416 %}
23417 ins_pipe( pipe_slow );
23418 %}
23419
23420 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23421 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23422 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23423 match(Set dst (VectorRearrange src shuffle));
23424 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23425 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23426 ins_encode %{
23427 assert(UseAVX >= 2, "required");
23428 // Swap src into vtmp1
23429 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23430 // Shuffle swapped src to get entries from other 128 bit lane
23431 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23432 // Shuffle original src to get entries from self 128 bit lane
23433 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23434 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23435 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23436 // Perform the blend
23437 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23438 %}
23439 ins_pipe( pipe_slow );
23440 %}
23441
23442 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23443 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23444 VM_Version::supports_avx512bw());
23445 match(Set dst (VectorRearrange src shuffle));
23446 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23447 ins_encode %{
23448 int vlen_enc = vector_length_encoding(this);
23449 if (!VM_Version::supports_avx512vl()) {
23450 vlen_enc = Assembler::AVX_512bit;
23451 }
23452 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23453 %}
23454 ins_pipe( pipe_slow );
23455 %}
23456
23457 // LoadShuffle/Rearrange for Integer and Float
23458
23459 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23460 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23461 Matcher::vector_length(n) == 4 && UseAVX == 0);
23462 match(Set dst (VectorLoadShuffle src));
23463 effect(TEMP dst, TEMP vtmp);
23464 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23465 ins_encode %{
23466 assert(UseSSE >= 4, "required");
23467
23468 // Create a byte shuffle mask from int shuffle mask
23469 // only byte shuffle instruction available on these platforms
23470
23471 // Duplicate and multiply each shuffle by 4
23472 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23473 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23474 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23475 __ psllw($vtmp$$XMMRegister, 2);
23476
23477 // Duplicate again to create 4 copies of byte index
23478 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23479 __ psllw($dst$$XMMRegister, 8);
23480 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23481
23482 // Add 3,2,1,0 to get alternate byte index
23483 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23484 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23485 %}
23486 ins_pipe( pipe_slow );
23487 %}
23488
23489 instruct rearrangeI(vec dst, vec shuffle) %{
23490 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23491 UseAVX == 0);
23492 match(Set dst (VectorRearrange dst shuffle));
23493 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23494 ins_encode %{
23495 assert(UseSSE >= 4, "required");
23496 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23497 %}
23498 ins_pipe( pipe_slow );
23499 %}
23500
23501 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23502 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23503 UseAVX > 0);
23504 match(Set dst (VectorRearrange src shuffle));
23505 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23506 ins_encode %{
23507 int vlen_enc = vector_length_encoding(this);
23508 BasicType bt = Matcher::vector_element_basic_type(this);
23509 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23510 %}
23511 ins_pipe( pipe_slow );
23512 %}
23513
23514 // LoadShuffle/Rearrange for Long and Double
23515
23516 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23517 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23518 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23519 match(Set dst (VectorLoadShuffle src));
23520 effect(TEMP dst, TEMP vtmp);
23521 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23522 ins_encode %{
23523 assert(UseAVX >= 2, "required");
23524
23525 int vlen_enc = vector_length_encoding(this);
23526 // Create a double word shuffle mask from long shuffle mask
23527 // only double word shuffle instruction available on these platforms
23528
23529 // Multiply each shuffle by two to get double word index
23530 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23531
23532 // Duplicate each double word shuffle
23533 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23534 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23535
23536 // Add one to get alternate double word index
23537 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23538 %}
23539 ins_pipe( pipe_slow );
23540 %}
23541
23542 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23543 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23544 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23545 match(Set dst (VectorRearrange src shuffle));
23546 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23547 ins_encode %{
23548 assert(UseAVX >= 2, "required");
23549
23550 int vlen_enc = vector_length_encoding(this);
23551 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23552 %}
23553 ins_pipe( pipe_slow );
23554 %}
23555
23556 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23557 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23558 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23559 match(Set dst (VectorRearrange src shuffle));
23560 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23561 ins_encode %{
23562 assert(UseAVX > 2, "required");
23563
23564 int vlen_enc = vector_length_encoding(this);
23565 if (vlen_enc == Assembler::AVX_128bit) {
23566 vlen_enc = Assembler::AVX_256bit;
23567 }
23568 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23569 %}
23570 ins_pipe( pipe_slow );
23571 %}
23572
23573 // --------------------------------- FMA --------------------------------------
23574 // a * b + c
23575
23576 instruct vfmaF_reg(vec a, vec b, vec c) %{
23577 match(Set c (FmaVF c (Binary a b)));
23578 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23579 ins_cost(150);
23580 ins_encode %{
23581 assert(UseFMA, "not enabled");
23582 int vlen_enc = vector_length_encoding(this);
23583 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23584 %}
23585 ins_pipe( pipe_slow );
23586 %}
23587
23588 instruct vfmaF_mem(vec a, memory b, vec c) %{
23589 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23590 match(Set c (FmaVF c (Binary a (LoadVector b))));
23591 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23592 ins_cost(150);
23593 ins_encode %{
23594 assert(UseFMA, "not enabled");
23595 int vlen_enc = vector_length_encoding(this);
23596 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23597 %}
23598 ins_pipe( pipe_slow );
23599 %}
23600
23601 instruct vfmaD_reg(vec a, vec b, vec c) %{
23602 match(Set c (FmaVD c (Binary a b)));
23603 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23604 ins_cost(150);
23605 ins_encode %{
23606 assert(UseFMA, "not enabled");
23607 int vlen_enc = vector_length_encoding(this);
23608 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23609 %}
23610 ins_pipe( pipe_slow );
23611 %}
23612
23613 instruct vfmaD_mem(vec a, memory b, vec c) %{
23614 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23615 match(Set c (FmaVD c (Binary a (LoadVector b))));
23616 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23617 ins_cost(150);
23618 ins_encode %{
23619 assert(UseFMA, "not enabled");
23620 int vlen_enc = vector_length_encoding(this);
23621 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23622 %}
23623 ins_pipe( pipe_slow );
23624 %}
23625
23626 // --------------------------------- Vector Multiply Add --------------------------------------
23627
23628 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23629 predicate(UseAVX == 0);
23630 match(Set dst (MulAddVS2VI dst src1));
23631 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23632 ins_encode %{
23633 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23634 %}
23635 ins_pipe( pipe_slow );
23636 %}
23637
23638 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23639 predicate(UseAVX > 0);
23640 match(Set dst (MulAddVS2VI src1 src2));
23641 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23642 ins_encode %{
23643 int vlen_enc = vector_length_encoding(this);
23644 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23645 %}
23646 ins_pipe( pipe_slow );
23647 %}
23648
23649 // --------------------------------- Vector Multiply Add Add ----------------------------------
23650
23651 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23652 predicate(VM_Version::supports_avx512_vnni());
23653 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23654 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23655 ins_encode %{
23656 assert(UseAVX > 2, "required");
23657 int vlen_enc = vector_length_encoding(this);
23658 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23659 %}
23660 ins_pipe( pipe_slow );
23661 ins_cost(10);
23662 %}
23663
23664 // --------------------------------- PopCount --------------------------------------
23665
23666 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23667 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23668 match(Set dst (PopCountVI src));
23669 match(Set dst (PopCountVL src));
23670 format %{ "vector_popcount_integral $dst, $src" %}
23671 ins_encode %{
23672 int opcode = this->ideal_Opcode();
23673 int vlen_enc = vector_length_encoding(this, $src);
23674 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23675 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23676 %}
23677 ins_pipe( pipe_slow );
23678 %}
23679
23680 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23681 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23682 match(Set dst (PopCountVI src mask));
23683 match(Set dst (PopCountVL src mask));
23684 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23685 ins_encode %{
23686 int vlen_enc = vector_length_encoding(this, $src);
23687 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23688 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23689 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23690 %}
23691 ins_pipe( pipe_slow );
23692 %}
23693
23694 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23695 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23696 match(Set dst (PopCountVI src));
23697 match(Set dst (PopCountVL src));
23698 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23699 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23700 ins_encode %{
23701 int opcode = this->ideal_Opcode();
23702 int vlen_enc = vector_length_encoding(this, $src);
23703 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23704 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23705 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23706 %}
23707 ins_pipe( pipe_slow );
23708 %}
23709
23710 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23711
23712 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23713 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23714 Matcher::vector_length_in_bytes(n->in(1))));
23715 match(Set dst (CountTrailingZerosV src));
23716 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23717 ins_cost(400);
23718 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23719 ins_encode %{
23720 int vlen_enc = vector_length_encoding(this, $src);
23721 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23722 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23723 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23724 %}
23725 ins_pipe( pipe_slow );
23726 %}
23727
23728 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23729 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23730 VM_Version::supports_avx512cd() &&
23731 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23732 match(Set dst (CountTrailingZerosV src));
23733 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23734 ins_cost(400);
23735 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23736 ins_encode %{
23737 int vlen_enc = vector_length_encoding(this, $src);
23738 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23739 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23740 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23741 %}
23742 ins_pipe( pipe_slow );
23743 %}
23744
23745 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23746 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23747 match(Set dst (CountTrailingZerosV src));
23748 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23749 ins_cost(400);
23750 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23751 ins_encode %{
23752 int vlen_enc = vector_length_encoding(this, $src);
23753 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23754 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23755 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23756 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23757 %}
23758 ins_pipe( pipe_slow );
23759 %}
23760
23761 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23762 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23763 match(Set dst (CountTrailingZerosV src));
23764 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23765 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23766 ins_encode %{
23767 int vlen_enc = vector_length_encoding(this, $src);
23768 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23769 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23770 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23771 %}
23772 ins_pipe( pipe_slow );
23773 %}
23774
23775
23776 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23777
23778 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23779 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23780 effect(TEMP dst);
23781 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23782 ins_encode %{
23783 int vector_len = vector_length_encoding(this);
23784 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23785 %}
23786 ins_pipe( pipe_slow );
23787 %}
23788
23789 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23790 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23791 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23792 effect(TEMP dst);
23793 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23794 ins_encode %{
23795 int vector_len = vector_length_encoding(this);
23796 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23797 %}
23798 ins_pipe( pipe_slow );
23799 %}
23800
23801 // --------------------------------- Rotation Operations ----------------------------------
23802 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23803 match(Set dst (RotateLeftV src shift));
23804 match(Set dst (RotateRightV src shift));
23805 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23806 ins_encode %{
23807 int opcode = this->ideal_Opcode();
23808 int vector_len = vector_length_encoding(this);
23809 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23810 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23811 %}
23812 ins_pipe( pipe_slow );
23813 %}
23814
23815 instruct vprorate(vec dst, vec src, vec shift) %{
23816 match(Set dst (RotateLeftV src shift));
23817 match(Set dst (RotateRightV src shift));
23818 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23819 ins_encode %{
23820 int opcode = this->ideal_Opcode();
23821 int vector_len = vector_length_encoding(this);
23822 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23823 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23824 %}
23825 ins_pipe( pipe_slow );
23826 %}
23827
23828 // ---------------------------------- Masked Operations ------------------------------------
23829 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23830 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23831 match(Set dst (LoadVectorMasked mem mask));
23832 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23833 ins_encode %{
23834 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23835 int vlen_enc = vector_length_encoding(this);
23836 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23837 %}
23838 ins_pipe( pipe_slow );
23839 %}
23840
23841
23842 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23843 predicate(n->in(3)->bottom_type()->isa_vectmask());
23844 match(Set dst (LoadVectorMasked mem mask));
23845 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23846 ins_encode %{
23847 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23848 int vector_len = vector_length_encoding(this);
23849 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23850 %}
23851 ins_pipe( pipe_slow );
23852 %}
23853
23854 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23855 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23856 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23857 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23858 ins_encode %{
23859 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23860 int vlen_enc = vector_length_encoding(src_node);
23861 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23862 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23863 %}
23864 ins_pipe( pipe_slow );
23865 %}
23866
23867 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23868 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23869 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23870 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23871 ins_encode %{
23872 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23873 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23874 int vlen_enc = vector_length_encoding(src_node);
23875 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23876 %}
23877 ins_pipe( pipe_slow );
23878 %}
23879
23880 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23881 match(Set addr (VerifyVectorAlignment addr mask));
23882 effect(KILL cr);
23883 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23884 ins_encode %{
23885 Label Lskip;
23886 // check if masked bits of addr are zero
23887 __ testq($addr$$Register, $mask$$constant);
23888 __ jccb(Assembler::equal, Lskip);
23889 __ stop("verify_vector_alignment found a misaligned vector memory access");
23890 __ bind(Lskip);
23891 %}
23892 ins_pipe(pipe_slow);
23893 %}
23894
23895 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23896 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23897 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23898 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23899 ins_encode %{
23900 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23901 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23902
23903 Label DONE;
23904 int vlen_enc = vector_length_encoding(this, $src1);
23905 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23906
23907 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23908 __ mov64($dst$$Register, -1L);
23909 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23910 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23911 __ jccb(Assembler::carrySet, DONE);
23912 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23913 __ notq($dst$$Register);
23914 __ tzcntq($dst$$Register, $dst$$Register);
23915 __ bind(DONE);
23916 %}
23917 ins_pipe( pipe_slow );
23918 %}
23919
23920
23921 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23922 match(Set dst (VectorMaskGen len));
23923 effect(TEMP temp, KILL cr);
23924 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23925 ins_encode %{
23926 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23927 %}
23928 ins_pipe( pipe_slow );
23929 %}
23930
23931 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23932 match(Set dst (VectorMaskGen len));
23933 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23934 effect(TEMP temp);
23935 ins_encode %{
23936 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23937 __ kmovql($dst$$KRegister, $temp$$Register);
23938 %}
23939 ins_pipe( pipe_slow );
23940 %}
23941
23942 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23943 predicate(n->in(1)->bottom_type()->isa_vectmask());
23944 match(Set dst (VectorMaskToLong mask));
23945 effect(TEMP dst, KILL cr);
23946 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23947 ins_encode %{
23948 int opcode = this->ideal_Opcode();
23949 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23950 int mask_len = Matcher::vector_length(this, $mask);
23951 int mask_size = mask_len * type2aelembytes(mbt);
23952 int vlen_enc = vector_length_encoding(this, $mask);
23953 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23954 $dst$$Register, mask_len, mask_size, vlen_enc);
23955 %}
23956 ins_pipe( pipe_slow );
23957 %}
23958
23959 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23960 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23961 match(Set dst (VectorMaskToLong mask));
23962 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23963 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23964 ins_encode %{
23965 int opcode = this->ideal_Opcode();
23966 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23967 int mask_len = Matcher::vector_length(this, $mask);
23968 int vlen_enc = vector_length_encoding(this, $mask);
23969 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23970 $dst$$Register, mask_len, mbt, vlen_enc);
23971 %}
23972 ins_pipe( pipe_slow );
23973 %}
23974
23975 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23976 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23977 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23978 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23979 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23980 ins_encode %{
23981 int opcode = this->ideal_Opcode();
23982 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23983 int mask_len = Matcher::vector_length(this, $mask);
23984 int vlen_enc = vector_length_encoding(this, $mask);
23985 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23986 $dst$$Register, mask_len, mbt, vlen_enc);
23987 %}
23988 ins_pipe( pipe_slow );
23989 %}
23990
23991 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23992 predicate(n->in(1)->bottom_type()->isa_vectmask());
23993 match(Set dst (VectorMaskTrueCount mask));
23994 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23995 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23996 ins_encode %{
23997 int opcode = this->ideal_Opcode();
23998 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23999 int mask_len = Matcher::vector_length(this, $mask);
24000 int mask_size = mask_len * type2aelembytes(mbt);
24001 int vlen_enc = vector_length_encoding(this, $mask);
24002 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24003 $tmp$$Register, mask_len, mask_size, vlen_enc);
24004 %}
24005 ins_pipe( pipe_slow );
24006 %}
24007
24008 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24009 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24010 match(Set dst (VectorMaskTrueCount mask));
24011 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24012 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24013 ins_encode %{
24014 int opcode = this->ideal_Opcode();
24015 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24016 int mask_len = Matcher::vector_length(this, $mask);
24017 int vlen_enc = vector_length_encoding(this, $mask);
24018 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24019 $tmp$$Register, mask_len, mbt, vlen_enc);
24020 %}
24021 ins_pipe( pipe_slow );
24022 %}
24023
24024 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24025 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24026 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24027 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24028 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24029 ins_encode %{
24030 int opcode = this->ideal_Opcode();
24031 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24032 int mask_len = Matcher::vector_length(this, $mask);
24033 int vlen_enc = vector_length_encoding(this, $mask);
24034 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24035 $tmp$$Register, mask_len, mbt, vlen_enc);
24036 %}
24037 ins_pipe( pipe_slow );
24038 %}
24039
24040 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24041 predicate(n->in(1)->bottom_type()->isa_vectmask());
24042 match(Set dst (VectorMaskFirstTrue mask));
24043 match(Set dst (VectorMaskLastTrue mask));
24044 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24045 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24046 ins_encode %{
24047 int opcode = this->ideal_Opcode();
24048 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24049 int mask_len = Matcher::vector_length(this, $mask);
24050 int mask_size = mask_len * type2aelembytes(mbt);
24051 int vlen_enc = vector_length_encoding(this, $mask);
24052 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24053 $tmp$$Register, mask_len, mask_size, vlen_enc);
24054 %}
24055 ins_pipe( pipe_slow );
24056 %}
24057
24058 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24059 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24060 match(Set dst (VectorMaskFirstTrue mask));
24061 match(Set dst (VectorMaskLastTrue mask));
24062 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24063 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24064 ins_encode %{
24065 int opcode = this->ideal_Opcode();
24066 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24067 int mask_len = Matcher::vector_length(this, $mask);
24068 int vlen_enc = vector_length_encoding(this, $mask);
24069 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24070 $tmp$$Register, mask_len, mbt, vlen_enc);
24071 %}
24072 ins_pipe( pipe_slow );
24073 %}
24074
24075 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24076 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24077 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24078 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24079 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24080 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24081 ins_encode %{
24082 int opcode = this->ideal_Opcode();
24083 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24084 int mask_len = Matcher::vector_length(this, $mask);
24085 int vlen_enc = vector_length_encoding(this, $mask);
24086 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24087 $tmp$$Register, mask_len, mbt, vlen_enc);
24088 %}
24089 ins_pipe( pipe_slow );
24090 %}
24091
24092 // --------------------------------- Compress/Expand Operations ---------------------------
24093 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24094 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24095 match(Set dst (CompressV src mask));
24096 match(Set dst (ExpandV src mask));
24097 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24098 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24099 ins_encode %{
24100 int opcode = this->ideal_Opcode();
24101 int vlen_enc = vector_length_encoding(this);
24102 BasicType bt = Matcher::vector_element_basic_type(this);
24103 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24104 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24105 %}
24106 ins_pipe( pipe_slow );
24107 %}
24108
24109 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24110 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24111 match(Set dst (CompressV src mask));
24112 match(Set dst (ExpandV src mask));
24113 format %{ "vector_compress_expand $dst, $src, $mask" %}
24114 ins_encode %{
24115 int opcode = this->ideal_Opcode();
24116 int vector_len = vector_length_encoding(this);
24117 BasicType bt = Matcher::vector_element_basic_type(this);
24118 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24119 %}
24120 ins_pipe( pipe_slow );
24121 %}
24122
24123 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24124 match(Set dst (CompressM mask));
24125 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24126 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24127 ins_encode %{
24128 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24129 int mask_len = Matcher::vector_length(this);
24130 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24136
24137 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24138 predicate(!VM_Version::supports_gfni());
24139 match(Set dst (ReverseV src));
24140 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24141 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24142 ins_encode %{
24143 int vec_enc = vector_length_encoding(this);
24144 BasicType bt = Matcher::vector_element_basic_type(this);
24145 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24146 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24147 %}
24148 ins_pipe( pipe_slow );
24149 %}
24150
24151 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24152 predicate(VM_Version::supports_gfni());
24153 match(Set dst (ReverseV src));
24154 effect(TEMP dst, TEMP xtmp);
24155 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24156 ins_encode %{
24157 int vec_enc = vector_length_encoding(this);
24158 BasicType bt = Matcher::vector_element_basic_type(this);
24159 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24160 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24161 $xtmp$$XMMRegister);
24162 %}
24163 ins_pipe( pipe_slow );
24164 %}
24165
24166 instruct vreverse_byte_reg(vec dst, vec src) %{
24167 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24168 match(Set dst (ReverseBytesV src));
24169 effect(TEMP dst);
24170 format %{ "vector_reverse_byte $dst, $src" %}
24171 ins_encode %{
24172 int vec_enc = vector_length_encoding(this);
24173 BasicType bt = Matcher::vector_element_basic_type(this);
24174 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24175 %}
24176 ins_pipe( pipe_slow );
24177 %}
24178
24179 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24180 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24181 match(Set dst (ReverseBytesV src));
24182 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24183 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24184 ins_encode %{
24185 int vec_enc = vector_length_encoding(this);
24186 BasicType bt = Matcher::vector_element_basic_type(this);
24187 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24188 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24189 %}
24190 ins_pipe( pipe_slow );
24191 %}
24192
24193 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24194
24195 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24196 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24197 Matcher::vector_length_in_bytes(n->in(1))));
24198 match(Set dst (CountLeadingZerosV src));
24199 format %{ "vector_count_leading_zeros $dst, $src" %}
24200 ins_encode %{
24201 int vlen_enc = vector_length_encoding(this, $src);
24202 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24203 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24204 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24205 %}
24206 ins_pipe( pipe_slow );
24207 %}
24208
24209 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24210 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24211 Matcher::vector_length_in_bytes(n->in(1))));
24212 match(Set dst (CountLeadingZerosV src mask));
24213 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24214 ins_encode %{
24215 int vlen_enc = vector_length_encoding(this, $src);
24216 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24217 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24218 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24219 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24220 %}
24221 ins_pipe( pipe_slow );
24222 %}
24223
24224 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24225 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24226 VM_Version::supports_avx512cd() &&
24227 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24228 match(Set dst (CountLeadingZerosV src));
24229 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24230 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24231 ins_encode %{
24232 int vlen_enc = vector_length_encoding(this, $src);
24233 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24234 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24235 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24236 %}
24237 ins_pipe( pipe_slow );
24238 %}
24239
24240 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24241 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24242 match(Set dst (CountLeadingZerosV src));
24243 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24244 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24245 ins_encode %{
24246 int vlen_enc = vector_length_encoding(this, $src);
24247 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24248 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24249 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24250 $rtmp$$Register, true, vlen_enc);
24251 %}
24252 ins_pipe( pipe_slow );
24253 %}
24254
24255 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24256 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24257 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24258 match(Set dst (CountLeadingZerosV src));
24259 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24260 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24261 ins_encode %{
24262 int vlen_enc = vector_length_encoding(this, $src);
24263 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24264 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24265 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24266 %}
24267 ins_pipe( pipe_slow );
24268 %}
24269
24270 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24271 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24272 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24273 match(Set dst (CountLeadingZerosV src));
24274 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24275 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24276 ins_encode %{
24277 int vlen_enc = vector_length_encoding(this, $src);
24278 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24279 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24280 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24281 %}
24282 ins_pipe( pipe_slow );
24283 %}
24284
24285 // ---------------------------------- Vector Masked Operations ------------------------------------
24286
24287 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24288 match(Set dst (AddVB (Binary dst src2) mask));
24289 match(Set dst (AddVS (Binary dst src2) mask));
24290 match(Set dst (AddVI (Binary dst src2) mask));
24291 match(Set dst (AddVL (Binary dst src2) mask));
24292 match(Set dst (AddVF (Binary dst src2) mask));
24293 match(Set dst (AddVD (Binary dst src2) mask));
24294 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24295 ins_encode %{
24296 int vlen_enc = vector_length_encoding(this);
24297 BasicType bt = Matcher::vector_element_basic_type(this);
24298 int opc = this->ideal_Opcode();
24299 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24300 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24301 %}
24302 ins_pipe( pipe_slow );
24303 %}
24304
24305 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24306 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24307 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24308 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24309 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24310 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24311 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24312 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24313 ins_encode %{
24314 int vlen_enc = vector_length_encoding(this);
24315 BasicType bt = Matcher::vector_element_basic_type(this);
24316 int opc = this->ideal_Opcode();
24317 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24318 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24319 %}
24320 ins_pipe( pipe_slow );
24321 %}
24322
24323 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24324 match(Set dst (XorV (Binary dst src2) mask));
24325 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24326 ins_encode %{
24327 int vlen_enc = vector_length_encoding(this);
24328 BasicType bt = Matcher::vector_element_basic_type(this);
24329 int opc = this->ideal_Opcode();
24330 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24331 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24332 %}
24333 ins_pipe( pipe_slow );
24334 %}
24335
24336 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24337 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24338 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24339 ins_encode %{
24340 int vlen_enc = vector_length_encoding(this);
24341 BasicType bt = Matcher::vector_element_basic_type(this);
24342 int opc = this->ideal_Opcode();
24343 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24344 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24345 %}
24346 ins_pipe( pipe_slow );
24347 %}
24348
24349 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24350 match(Set dst (OrV (Binary dst src2) mask));
24351 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24352 ins_encode %{
24353 int vlen_enc = vector_length_encoding(this);
24354 BasicType bt = Matcher::vector_element_basic_type(this);
24355 int opc = this->ideal_Opcode();
24356 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24357 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24358 %}
24359 ins_pipe( pipe_slow );
24360 %}
24361
24362 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24363 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24364 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24365 ins_encode %{
24366 int vlen_enc = vector_length_encoding(this);
24367 BasicType bt = Matcher::vector_element_basic_type(this);
24368 int opc = this->ideal_Opcode();
24369 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24370 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24371 %}
24372 ins_pipe( pipe_slow );
24373 %}
24374
24375 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24376 match(Set dst (AndV (Binary dst src2) mask));
24377 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24378 ins_encode %{
24379 int vlen_enc = vector_length_encoding(this);
24380 BasicType bt = Matcher::vector_element_basic_type(this);
24381 int opc = this->ideal_Opcode();
24382 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24383 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24384 %}
24385 ins_pipe( pipe_slow );
24386 %}
24387
24388 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24389 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24390 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24391 ins_encode %{
24392 int vlen_enc = vector_length_encoding(this);
24393 BasicType bt = Matcher::vector_element_basic_type(this);
24394 int opc = this->ideal_Opcode();
24395 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24396 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24397 %}
24398 ins_pipe( pipe_slow );
24399 %}
24400
24401 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24402 match(Set dst (SubVB (Binary dst src2) mask));
24403 match(Set dst (SubVS (Binary dst src2) mask));
24404 match(Set dst (SubVI (Binary dst src2) mask));
24405 match(Set dst (SubVL (Binary dst src2) mask));
24406 match(Set dst (SubVF (Binary dst src2) mask));
24407 match(Set dst (SubVD (Binary dst src2) mask));
24408 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24409 ins_encode %{
24410 int vlen_enc = vector_length_encoding(this);
24411 BasicType bt = Matcher::vector_element_basic_type(this);
24412 int opc = this->ideal_Opcode();
24413 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24414 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24415 %}
24416 ins_pipe( pipe_slow );
24417 %}
24418
24419 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24420 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24421 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24422 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24423 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24424 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24425 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24426 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24427 ins_encode %{
24428 int vlen_enc = vector_length_encoding(this);
24429 BasicType bt = Matcher::vector_element_basic_type(this);
24430 int opc = this->ideal_Opcode();
24431 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24432 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24433 %}
24434 ins_pipe( pipe_slow );
24435 %}
24436
24437 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24438 match(Set dst (MulVS (Binary dst src2) mask));
24439 match(Set dst (MulVI (Binary dst src2) mask));
24440 match(Set dst (MulVL (Binary dst src2) mask));
24441 match(Set dst (MulVF (Binary dst src2) mask));
24442 match(Set dst (MulVD (Binary dst src2) mask));
24443 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24444 ins_encode %{
24445 int vlen_enc = vector_length_encoding(this);
24446 BasicType bt = Matcher::vector_element_basic_type(this);
24447 int opc = this->ideal_Opcode();
24448 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24449 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24450 %}
24451 ins_pipe( pipe_slow );
24452 %}
24453
24454 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24455 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24456 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24457 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24458 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24459 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24460 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24461 ins_encode %{
24462 int vlen_enc = vector_length_encoding(this);
24463 BasicType bt = Matcher::vector_element_basic_type(this);
24464 int opc = this->ideal_Opcode();
24465 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24466 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24467 %}
24468 ins_pipe( pipe_slow );
24469 %}
24470
24471 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24472 match(Set dst (SqrtVF dst mask));
24473 match(Set dst (SqrtVD dst mask));
24474 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24475 ins_encode %{
24476 int vlen_enc = vector_length_encoding(this);
24477 BasicType bt = Matcher::vector_element_basic_type(this);
24478 int opc = this->ideal_Opcode();
24479 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24480 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24481 %}
24482 ins_pipe( pipe_slow );
24483 %}
24484
24485 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24486 match(Set dst (DivVF (Binary dst src2) mask));
24487 match(Set dst (DivVD (Binary dst src2) mask));
24488 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24489 ins_encode %{
24490 int vlen_enc = vector_length_encoding(this);
24491 BasicType bt = Matcher::vector_element_basic_type(this);
24492 int opc = this->ideal_Opcode();
24493 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24495 %}
24496 ins_pipe( pipe_slow );
24497 %}
24498
24499 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24500 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24501 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24502 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24503 ins_encode %{
24504 int vlen_enc = vector_length_encoding(this);
24505 BasicType bt = Matcher::vector_element_basic_type(this);
24506 int opc = this->ideal_Opcode();
24507 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24508 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24509 %}
24510 ins_pipe( pipe_slow );
24511 %}
24512
24513
24514 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24515 match(Set dst (RotateLeftV (Binary dst shift) mask));
24516 match(Set dst (RotateRightV (Binary dst shift) mask));
24517 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24518 ins_encode %{
24519 int vlen_enc = vector_length_encoding(this);
24520 BasicType bt = Matcher::vector_element_basic_type(this);
24521 int opc = this->ideal_Opcode();
24522 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24523 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24524 %}
24525 ins_pipe( pipe_slow );
24526 %}
24527
24528 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24529 match(Set dst (RotateLeftV (Binary dst src2) mask));
24530 match(Set dst (RotateRightV (Binary dst src2) mask));
24531 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24532 ins_encode %{
24533 int vlen_enc = vector_length_encoding(this);
24534 BasicType bt = Matcher::vector_element_basic_type(this);
24535 int opc = this->ideal_Opcode();
24536 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24538 %}
24539 ins_pipe( pipe_slow );
24540 %}
24541
24542 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24543 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24544 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24545 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24546 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24547 ins_encode %{
24548 int vlen_enc = vector_length_encoding(this);
24549 BasicType bt = Matcher::vector_element_basic_type(this);
24550 int opc = this->ideal_Opcode();
24551 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24552 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24553 %}
24554 ins_pipe( pipe_slow );
24555 %}
24556
24557 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24558 predicate(!n->as_ShiftV()->is_var_shift());
24559 match(Set dst (LShiftVS (Binary dst src2) mask));
24560 match(Set dst (LShiftVI (Binary dst src2) mask));
24561 match(Set dst (LShiftVL (Binary dst src2) mask));
24562 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24563 ins_encode %{
24564 int vlen_enc = vector_length_encoding(this);
24565 BasicType bt = Matcher::vector_element_basic_type(this);
24566 int opc = this->ideal_Opcode();
24567 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24568 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24569 %}
24570 ins_pipe( pipe_slow );
24571 %}
24572
24573 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24574 predicate(n->as_ShiftV()->is_var_shift());
24575 match(Set dst (LShiftVS (Binary dst src2) mask));
24576 match(Set dst (LShiftVI (Binary dst src2) mask));
24577 match(Set dst (LShiftVL (Binary dst src2) mask));
24578 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24579 ins_encode %{
24580 int vlen_enc = vector_length_encoding(this);
24581 BasicType bt = Matcher::vector_element_basic_type(this);
24582 int opc = this->ideal_Opcode();
24583 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24584 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24585 %}
24586 ins_pipe( pipe_slow );
24587 %}
24588
24589 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24590 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24591 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24592 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24593 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24594 ins_encode %{
24595 int vlen_enc = vector_length_encoding(this);
24596 BasicType bt = Matcher::vector_element_basic_type(this);
24597 int opc = this->ideal_Opcode();
24598 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24599 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24600 %}
24601 ins_pipe( pipe_slow );
24602 %}
24603
24604 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24605 predicate(!n->as_ShiftV()->is_var_shift());
24606 match(Set dst (RShiftVS (Binary dst src2) mask));
24607 match(Set dst (RShiftVI (Binary dst src2) mask));
24608 match(Set dst (RShiftVL (Binary dst src2) mask));
24609 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24610 ins_encode %{
24611 int vlen_enc = vector_length_encoding(this);
24612 BasicType bt = Matcher::vector_element_basic_type(this);
24613 int opc = this->ideal_Opcode();
24614 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24615 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24616 %}
24617 ins_pipe( pipe_slow );
24618 %}
24619
24620 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24621 predicate(n->as_ShiftV()->is_var_shift());
24622 match(Set dst (RShiftVS (Binary dst src2) mask));
24623 match(Set dst (RShiftVI (Binary dst src2) mask));
24624 match(Set dst (RShiftVL (Binary dst src2) mask));
24625 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24626 ins_encode %{
24627 int vlen_enc = vector_length_encoding(this);
24628 BasicType bt = Matcher::vector_element_basic_type(this);
24629 int opc = this->ideal_Opcode();
24630 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24631 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24632 %}
24633 ins_pipe( pipe_slow );
24634 %}
24635
24636 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24637 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24638 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24639 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24640 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24641 ins_encode %{
24642 int vlen_enc = vector_length_encoding(this);
24643 BasicType bt = Matcher::vector_element_basic_type(this);
24644 int opc = this->ideal_Opcode();
24645 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24646 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24647 %}
24648 ins_pipe( pipe_slow );
24649 %}
24650
24651 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24652 predicate(!n->as_ShiftV()->is_var_shift());
24653 match(Set dst (URShiftVS (Binary dst src2) mask));
24654 match(Set dst (URShiftVI (Binary dst src2) mask));
24655 match(Set dst (URShiftVL (Binary dst src2) mask));
24656 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24657 ins_encode %{
24658 int vlen_enc = vector_length_encoding(this);
24659 BasicType bt = Matcher::vector_element_basic_type(this);
24660 int opc = this->ideal_Opcode();
24661 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24662 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24663 %}
24664 ins_pipe( pipe_slow );
24665 %}
24666
24667 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24668 predicate(n->as_ShiftV()->is_var_shift());
24669 match(Set dst (URShiftVS (Binary dst src2) mask));
24670 match(Set dst (URShiftVI (Binary dst src2) mask));
24671 match(Set dst (URShiftVL (Binary dst src2) mask));
24672 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24673 ins_encode %{
24674 int vlen_enc = vector_length_encoding(this);
24675 BasicType bt = Matcher::vector_element_basic_type(this);
24676 int opc = this->ideal_Opcode();
24677 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24678 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24679 %}
24680 ins_pipe( pipe_slow );
24681 %}
24682
24683 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24684 match(Set dst (MaxV (Binary dst src2) mask));
24685 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24686 ins_encode %{
24687 int vlen_enc = vector_length_encoding(this);
24688 BasicType bt = Matcher::vector_element_basic_type(this);
24689 int opc = this->ideal_Opcode();
24690 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24691 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24692 %}
24693 ins_pipe( pipe_slow );
24694 %}
24695
24696 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24697 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24698 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24699 ins_encode %{
24700 int vlen_enc = vector_length_encoding(this);
24701 BasicType bt = Matcher::vector_element_basic_type(this);
24702 int opc = this->ideal_Opcode();
24703 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24704 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24705 %}
24706 ins_pipe( pipe_slow );
24707 %}
24708
24709 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24710 match(Set dst (MinV (Binary dst src2) mask));
24711 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24712 ins_encode %{
24713 int vlen_enc = vector_length_encoding(this);
24714 BasicType bt = Matcher::vector_element_basic_type(this);
24715 int opc = this->ideal_Opcode();
24716 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24717 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24718 %}
24719 ins_pipe( pipe_slow );
24720 %}
24721
24722 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24723 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24724 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24725 ins_encode %{
24726 int vlen_enc = vector_length_encoding(this);
24727 BasicType bt = Matcher::vector_element_basic_type(this);
24728 int opc = this->ideal_Opcode();
24729 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24730 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24731 %}
24732 ins_pipe( pipe_slow );
24733 %}
24734
24735 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24736 match(Set dst (VectorRearrange (Binary dst src2) mask));
24737 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24738 ins_encode %{
24739 int vlen_enc = vector_length_encoding(this);
24740 BasicType bt = Matcher::vector_element_basic_type(this);
24741 int opc = this->ideal_Opcode();
24742 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24743 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24744 %}
24745 ins_pipe( pipe_slow );
24746 %}
24747
24748 instruct vabs_masked(vec dst, kReg mask) %{
24749 match(Set dst (AbsVB dst mask));
24750 match(Set dst (AbsVS dst mask));
24751 match(Set dst (AbsVI dst mask));
24752 match(Set dst (AbsVL dst mask));
24753 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24754 ins_encode %{
24755 int vlen_enc = vector_length_encoding(this);
24756 BasicType bt = Matcher::vector_element_basic_type(this);
24757 int opc = this->ideal_Opcode();
24758 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24759 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24760 %}
24761 ins_pipe( pipe_slow );
24762 %}
24763
24764 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24765 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24766 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24767 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24768 ins_encode %{
24769 assert(UseFMA, "Needs FMA instructions support.");
24770 int vlen_enc = vector_length_encoding(this);
24771 BasicType bt = Matcher::vector_element_basic_type(this);
24772 int opc = this->ideal_Opcode();
24773 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24774 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24775 %}
24776 ins_pipe( pipe_slow );
24777 %}
24778
24779 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24780 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24781 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24782 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24783 ins_encode %{
24784 assert(UseFMA, "Needs FMA instructions support.");
24785 int vlen_enc = vector_length_encoding(this);
24786 BasicType bt = Matcher::vector_element_basic_type(this);
24787 int opc = this->ideal_Opcode();
24788 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24789 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24790 %}
24791 ins_pipe( pipe_slow );
24792 %}
24793
24794 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24795 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24796 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24797 ins_encode %{
24798 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24799 int vlen_enc = vector_length_encoding(this, $src1);
24800 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24801
24802 // Comparison i
24803 switch (src1_elem_bt) {
24804 case T_BYTE: {
24805 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24806 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24807 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24808 break;
24809 }
24810 case T_SHORT: {
24811 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24812 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24813 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24814 break;
24815 }
24816 case T_INT: {
24817 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24818 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24819 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24820 break;
24821 }
24822 case T_LONG: {
24823 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24824 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24825 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24826 break;
24827 }
24828 case T_FLOAT: {
24829 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24830 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24831 break;
24832 }
24833 case T_DOUBLE: {
24834 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24835 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24836 break;
24837 }
24838 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24839 }
24840 %}
24841 ins_pipe( pipe_slow );
24842 %}
24843
24844 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24845 predicate(Matcher::vector_length(n) <= 32);
24846 match(Set dst (MaskAll src));
24847 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24848 ins_encode %{
24849 int mask_len = Matcher::vector_length(this);
24850 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24851 %}
24852 ins_pipe( pipe_slow );
24853 %}
24854
24855 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24856 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24857 match(Set dst (XorVMask src (MaskAll cnt)));
24858 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24859 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24860 ins_encode %{
24861 uint masklen = Matcher::vector_length(this);
24862 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24863 %}
24864 ins_pipe( pipe_slow );
24865 %}
24866
24867 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24868 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24869 (Matcher::vector_length(n) == 16) ||
24870 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24871 match(Set dst (XorVMask src (MaskAll cnt)));
24872 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24873 ins_encode %{
24874 uint masklen = Matcher::vector_length(this);
24875 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24876 %}
24877 ins_pipe( pipe_slow );
24878 %}
24879
24880 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24881 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24882 match(Set dst (VectorLongToMask src));
24883 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24884 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24885 ins_encode %{
24886 int mask_len = Matcher::vector_length(this);
24887 int vec_enc = vector_length_encoding(mask_len);
24888 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24889 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24890 %}
24891 ins_pipe( pipe_slow );
24892 %}
24893
24894
24895 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24896 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24897 match(Set dst (VectorLongToMask src));
24898 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24899 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24900 ins_encode %{
24901 int mask_len = Matcher::vector_length(this);
24902 assert(mask_len <= 32, "invalid mask length");
24903 int vec_enc = vector_length_encoding(mask_len);
24904 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24905 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24906 %}
24907 ins_pipe( pipe_slow );
24908 %}
24909
24910 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24911 predicate(n->bottom_type()->isa_vectmask());
24912 match(Set dst (VectorLongToMask src));
24913 format %{ "long_to_mask_evex $dst, $src\t!" %}
24914 ins_encode %{
24915 __ kmov($dst$$KRegister, $src$$Register);
24916 %}
24917 ins_pipe( pipe_slow );
24918 %}
24919
24920 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24921 match(Set dst (AndVMask src1 src2));
24922 match(Set dst (OrVMask src1 src2));
24923 match(Set dst (XorVMask src1 src2));
24924 effect(TEMP kscratch);
24925 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24926 ins_encode %{
24927 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24928 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24929 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24930 uint masklen = Matcher::vector_length(this);
24931 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24932 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24933 %}
24934 ins_pipe( pipe_slow );
24935 %}
24936
24937 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24938 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24939 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24940 ins_encode %{
24941 int vlen_enc = vector_length_encoding(this);
24942 BasicType bt = Matcher::vector_element_basic_type(this);
24943 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24944 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24945 %}
24946 ins_pipe( pipe_slow );
24947 %}
24948
24949 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24950 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24951 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24952 ins_encode %{
24953 int vlen_enc = vector_length_encoding(this);
24954 BasicType bt = Matcher::vector_element_basic_type(this);
24955 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24956 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24957 %}
24958 ins_pipe( pipe_slow );
24959 %}
24960
24961 instruct castMM(kReg dst)
24962 %{
24963 match(Set dst (CastVV dst));
24964
24965 size(0);
24966 format %{ "# castVV of $dst" %}
24967 ins_encode(/* empty encoding */);
24968 ins_cost(0);
24969 ins_pipe(empty);
24970 %}
24971
24972 instruct castVV(vec dst)
24973 %{
24974 match(Set dst (CastVV dst));
24975
24976 size(0);
24977 format %{ "# castVV of $dst" %}
24978 ins_encode(/* empty encoding */);
24979 ins_cost(0);
24980 ins_pipe(empty);
24981 %}
24982
24983 instruct castVVLeg(legVec dst)
24984 %{
24985 match(Set dst (CastVV dst));
24986
24987 size(0);
24988 format %{ "# castVV of $dst" %}
24989 ins_encode(/* empty encoding */);
24990 ins_cost(0);
24991 ins_pipe(empty);
24992 %}
24993
24994 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24995 %{
24996 match(Set dst (IsInfiniteF src));
24997 effect(TEMP ktmp, KILL cr);
24998 format %{ "float_class_check $dst, $src" %}
24999 ins_encode %{
25000 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25001 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25002 %}
25003 ins_pipe(pipe_slow);
25004 %}
25005
25006 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25007 %{
25008 match(Set dst (IsInfiniteD src));
25009 effect(TEMP ktmp, KILL cr);
25010 format %{ "double_class_check $dst, $src" %}
25011 ins_encode %{
25012 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25013 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25014 %}
25015 ins_pipe(pipe_slow);
25016 %}
25017
25018 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25019 %{
25020 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25021 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25022 match(Set dst (SaturatingAddV src1 src2));
25023 match(Set dst (SaturatingSubV src1 src2));
25024 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25025 ins_encode %{
25026 int vlen_enc = vector_length_encoding(this);
25027 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25028 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25029 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25030 %}
25031 ins_pipe(pipe_slow);
25032 %}
25033
25034 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25035 %{
25036 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25037 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25038 match(Set dst (SaturatingAddV src1 src2));
25039 match(Set dst (SaturatingSubV src1 src2));
25040 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25041 ins_encode %{
25042 int vlen_enc = vector_length_encoding(this);
25043 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25044 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25045 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25046 %}
25047 ins_pipe(pipe_slow);
25048 %}
25049
25050 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25051 %{
25052 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25053 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25054 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25055 match(Set dst (SaturatingAddV src1 src2));
25056 match(Set dst (SaturatingSubV src1 src2));
25057 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25058 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25059 ins_encode %{
25060 int vlen_enc = vector_length_encoding(this);
25061 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25062 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25063 $src1$$XMMRegister, $src2$$XMMRegister,
25064 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25065 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25066 %}
25067 ins_pipe(pipe_slow);
25068 %}
25069
25070 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25071 %{
25072 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25073 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25074 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25075 match(Set dst (SaturatingAddV src1 src2));
25076 match(Set dst (SaturatingSubV src1 src2));
25077 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25078 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25079 ins_encode %{
25080 int vlen_enc = vector_length_encoding(this);
25081 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25082 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25083 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25084 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25085 %}
25086 ins_pipe(pipe_slow);
25087 %}
25088
25089 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25090 %{
25091 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25092 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25093 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25094 match(Set dst (SaturatingAddV src1 src2));
25095 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25096 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25097 ins_encode %{
25098 int vlen_enc = vector_length_encoding(this);
25099 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25100 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25101 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25102 %}
25103 ins_pipe(pipe_slow);
25104 %}
25105
25106 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25107 %{
25108 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25109 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25110 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25111 match(Set dst (SaturatingAddV src1 src2));
25112 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25113 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25114 ins_encode %{
25115 int vlen_enc = vector_length_encoding(this);
25116 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25117 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25118 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25119 %}
25120 ins_pipe(pipe_slow);
25121 %}
25122
25123 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25124 %{
25125 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25126 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25127 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25128 match(Set dst (SaturatingSubV src1 src2));
25129 effect(TEMP ktmp);
25130 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25131 ins_encode %{
25132 int vlen_enc = vector_length_encoding(this);
25133 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25134 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25135 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25136 %}
25137 ins_pipe(pipe_slow);
25138 %}
25139
25140 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25141 %{
25142 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25143 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25144 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25145 match(Set dst (SaturatingSubV src1 src2));
25146 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25147 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25148 ins_encode %{
25149 int vlen_enc = vector_length_encoding(this);
25150 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25151 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25152 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25153 %}
25154 ins_pipe(pipe_slow);
25155 %}
25156
25157 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25158 %{
25159 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25160 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25161 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25162 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25163 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25164 ins_encode %{
25165 int vlen_enc = vector_length_encoding(this);
25166 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25167 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25168 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25169 %}
25170 ins_pipe(pipe_slow);
25171 %}
25172
25173 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25174 %{
25175 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25176 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25177 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25178 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25179 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25180 ins_encode %{
25181 int vlen_enc = vector_length_encoding(this);
25182 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25183 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25184 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25185 %}
25186 ins_pipe(pipe_slow);
25187 %}
25188
25189 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25190 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25191 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25192 match(Set dst (SaturatingAddV (Binary dst src) mask));
25193 match(Set dst (SaturatingSubV (Binary dst src) mask));
25194 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25195 ins_encode %{
25196 int vlen_enc = vector_length_encoding(this);
25197 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25198 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25199 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25200 %}
25201 ins_pipe( pipe_slow );
25202 %}
25203
25204 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25205 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25206 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25207 match(Set dst (SaturatingAddV (Binary dst src) mask));
25208 match(Set dst (SaturatingSubV (Binary dst src) mask));
25209 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25210 ins_encode %{
25211 int vlen_enc = vector_length_encoding(this);
25212 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25213 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25214 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25215 %}
25216 ins_pipe( pipe_slow );
25217 %}
25218
25219 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25220 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25221 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25222 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25223 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25224 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25225 ins_encode %{
25226 int vlen_enc = vector_length_encoding(this);
25227 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25228 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25229 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25230 %}
25231 ins_pipe( pipe_slow );
25232 %}
25233
25234 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25235 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25236 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25237 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25238 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25239 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25240 ins_encode %{
25241 int vlen_enc = vector_length_encoding(this);
25242 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25243 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25244 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25245 %}
25246 ins_pipe( pipe_slow );
25247 %}
25248
25249 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25250 %{
25251 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25252 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25253 ins_encode %{
25254 int vlen_enc = vector_length_encoding(this);
25255 BasicType bt = Matcher::vector_element_basic_type(this);
25256 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25257 %}
25258 ins_pipe(pipe_slow);
25259 %}
25260
25261 instruct reinterpretS2HF(regF dst, rRegI src)
25262 %{
25263 match(Set dst (ReinterpretS2HF src));
25264 format %{ "vmovw $dst, $src" %}
25265 ins_encode %{
25266 __ vmovw($dst$$XMMRegister, $src$$Register);
25267 %}
25268 ins_pipe(pipe_slow);
25269 %}
25270
25271 instruct reinterpretHF2S(rRegI dst, regF src)
25272 %{
25273 match(Set dst (ReinterpretHF2S src));
25274 format %{ "vmovw $dst, $src" %}
25275 ins_encode %{
25276 __ vmovw($dst$$Register, $src$$XMMRegister);
25277 %}
25278 ins_pipe(pipe_slow);
25279 %}
25280
25281 instruct convF2HFAndS2HF(regF dst, regF src)
25282 %{
25283 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25284 format %{ "convF2HFAndS2HF $dst, $src" %}
25285 ins_encode %{
25286 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25287 %}
25288 ins_pipe(pipe_slow);
25289 %}
25290
25291 instruct convHF2SAndHF2F(regF dst, regF src)
25292 %{
25293 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25294 format %{ "convHF2SAndHF2F $dst, $src" %}
25295 ins_encode %{
25296 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25297 %}
25298 ins_pipe(pipe_slow);
25299 %}
25300
25301 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25302 %{
25303 match(Set dst (SqrtHF src));
25304 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25305 ins_encode %{
25306 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25307 %}
25308 ins_pipe(pipe_slow);
25309 %}
25310
25311 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25312 %{
25313 match(Set dst (AddHF src1 src2));
25314 match(Set dst (DivHF src1 src2));
25315 match(Set dst (MulHF src1 src2));
25316 match(Set dst (SubHF src1 src2));
25317 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25318 ins_encode %{
25319 int opcode = this->ideal_Opcode();
25320 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25321 %}
25322 ins_pipe(pipe_slow);
25323 %}
25324
25325 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25326 %{
25327 predicate(VM_Version::supports_avx10_2());
25328 match(Set dst (MaxHF src1 src2));
25329 match(Set dst (MinHF src1 src2));
25330 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25331 ins_encode %{
25332 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25333 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25334 %}
25335 ins_pipe( pipe_slow );
25336 %}
25337
25338 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25339 %{
25340 predicate(!VM_Version::supports_avx10_2());
25341 match(Set dst (MaxHF src1 src2));
25342 match(Set dst (MinHF src1 src2));
25343 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25344 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25345 ins_encode %{
25346 int opcode = this->ideal_Opcode();
25347 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25348 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25349 %}
25350 ins_pipe( pipe_slow );
25351 %}
25352
25353 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25354 %{
25355 match(Set dst (FmaHF src2 (Binary dst src1)));
25356 effect(DEF dst);
25357 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25358 ins_encode %{
25359 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25360 %}
25361 ins_pipe( pipe_slow );
25362 %}
25363
25364
25365 instruct vector_sqrt_HF_reg(vec dst, vec src)
25366 %{
25367 match(Set dst (SqrtVHF src));
25368 format %{ "vector_sqrt_fp16 $dst, $src" %}
25369 ins_encode %{
25370 int vlen_enc = vector_length_encoding(this);
25371 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25372 %}
25373 ins_pipe(pipe_slow);
25374 %}
25375
25376 instruct vector_sqrt_HF_mem(vec dst, memory src)
25377 %{
25378 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25379 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25380 ins_encode %{
25381 int vlen_enc = vector_length_encoding(this);
25382 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25383 %}
25384 ins_pipe(pipe_slow);
25385 %}
25386
25387 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25388 %{
25389 match(Set dst (AddVHF src1 src2));
25390 match(Set dst (DivVHF src1 src2));
25391 match(Set dst (MulVHF src1 src2));
25392 match(Set dst (SubVHF src1 src2));
25393 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25394 ins_encode %{
25395 int vlen_enc = vector_length_encoding(this);
25396 int opcode = this->ideal_Opcode();
25397 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25398 %}
25399 ins_pipe(pipe_slow);
25400 %}
25401
25402
25403 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25404 %{
25405 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25406 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25407 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25408 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25409 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25410 ins_encode %{
25411 int vlen_enc = vector_length_encoding(this);
25412 int opcode = this->ideal_Opcode();
25413 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25414 %}
25415 ins_pipe(pipe_slow);
25416 %}
25417
25418 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25419 %{
25420 match(Set dst (FmaVHF src2 (Binary dst src1)));
25421 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25422 ins_encode %{
25423 int vlen_enc = vector_length_encoding(this);
25424 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25425 %}
25426 ins_pipe( pipe_slow );
25427 %}
25428
25429 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25430 %{
25431 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25432 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25433 ins_encode %{
25434 int vlen_enc = vector_length_encoding(this);
25435 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25436 %}
25437 ins_pipe( pipe_slow );
25438 %}
25439
25440 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25441 %{
25442 predicate(VM_Version::supports_avx10_2());
25443 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25444 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25445 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25446 ins_encode %{
25447 int vlen_enc = vector_length_encoding(this);
25448 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25449 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25450 %}
25451 ins_pipe( pipe_slow );
25452 %}
25453
25454 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25455 %{
25456 predicate(VM_Version::supports_avx10_2());
25457 match(Set dst (MinVHF src1 src2));
25458 match(Set dst (MaxVHF src1 src2));
25459 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25460 ins_encode %{
25461 int vlen_enc = vector_length_encoding(this);
25462 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25463 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25464 %}
25465 ins_pipe( pipe_slow );
25466 %}
25467
25468 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25469 %{
25470 predicate(!VM_Version::supports_avx10_2());
25471 match(Set dst (MinVHF src1 src2));
25472 match(Set dst (MaxVHF src1 src2));
25473 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25474 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25475 ins_encode %{
25476 int vlen_enc = vector_length_encoding(this);
25477 int opcode = this->ideal_Opcode();
25478 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25479 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25480 %}
25481 ins_pipe( pipe_slow );
25482 %}
25483
25484 //----------PEEPHOLE RULES-----------------------------------------------------
25485 // These must follow all instruction definitions as they use the names
25486 // defined in the instructions definitions.
25487 //
25488 // peeppredicate ( rule_predicate );
25489 // // the predicate unless which the peephole rule will be ignored
25490 //
25491 // peepmatch ( root_instr_name [preceding_instruction]* );
25492 //
25493 // peepprocedure ( procedure_name );
25494 // // provide a procedure name to perform the optimization, the procedure should
25495 // // reside in the architecture dependent peephole file, the method has the
25496 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25497 // // with the arguments being the basic block, the current node index inside the
25498 // // block, the register allocator, the functions upon invoked return a new node
25499 // // defined in peepreplace, and the rules of the nodes appearing in the
25500 // // corresponding peepmatch, the function return true if successful, else
25501 // // return false
25502 //
25503 // peepconstraint %{
25504 // (instruction_number.operand_name relational_op instruction_number.operand_name
25505 // [, ...] );
25506 // // instruction numbers are zero-based using left to right order in peepmatch
25507 //
25508 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25509 // // provide an instruction_number.operand_name for each operand that appears
25510 // // in the replacement instruction's match rule
25511 //
25512 // ---------VM FLAGS---------------------------------------------------------
25513 //
25514 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25515 //
25516 // Each peephole rule is given an identifying number starting with zero and
25517 // increasing by one in the order seen by the parser. An individual peephole
25518 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25519 // on the command-line.
25520 //
25521 // ---------CURRENT LIMITATIONS----------------------------------------------
25522 //
25523 // Only transformations inside a basic block (do we need more for peephole)
25524 //
25525 // ---------EXAMPLE----------------------------------------------------------
25526 //
25527 // // pertinent parts of existing instructions in architecture description
25528 // instruct movI(rRegI dst, rRegI src)
25529 // %{
25530 // match(Set dst (CopyI src));
25531 // %}
25532 //
25533 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25534 // %{
25535 // match(Set dst (AddI dst src));
25536 // effect(KILL cr);
25537 // %}
25538 //
25539 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25540 // %{
25541 // match(Set dst (AddI dst src));
25542 // %}
25543 //
25544 // 1. Simple replacement
25545 // - Only match adjacent instructions in same basic block
25546 // - Only equality constraints
25547 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25548 // - Only one replacement instruction
25549 //
25550 // // Change (inc mov) to lea
25551 // peephole %{
25552 // // lea should only be emitted when beneficial
25553 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25554 // // increment preceded by register-register move
25555 // peepmatch ( incI_rReg movI );
25556 // // require that the destination register of the increment
25557 // // match the destination register of the move
25558 // peepconstraint ( 0.dst == 1.dst );
25559 // // construct a replacement instruction that sets
25560 // // the destination to ( move's source register + one )
25561 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25562 // %}
25563 //
25564 // 2. Procedural replacement
25565 // - More flexible finding relevent nodes
25566 // - More flexible constraints
25567 // - More flexible transformations
25568 // - May utilise architecture-dependent API more effectively
25569 // - Currently only one replacement instruction due to adlc parsing capabilities
25570 //
25571 // // Change (inc mov) to lea
25572 // peephole %{
25573 // // lea should only be emitted when beneficial
25574 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25575 // // the rule numbers of these nodes inside are passed into the function below
25576 // peepmatch ( incI_rReg movI );
25577 // // the method that takes the responsibility of transformation
25578 // peepprocedure ( inc_mov_to_lea );
25579 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25580 // // node is passed into the function above
25581 // peepreplace ( leaI_rReg_immI() );
25582 // %}
25583
25584 // These instructions is not matched by the matcher but used by the peephole
25585 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25586 %{
25587 predicate(false);
25588 match(Set dst (AddI src1 src2));
25589 format %{ "leal $dst, [$src1 + $src2]" %}
25590 ins_encode %{
25591 Register dst = $dst$$Register;
25592 Register src1 = $src1$$Register;
25593 Register src2 = $src2$$Register;
25594 if (src1 != rbp && src1 != r13) {
25595 __ leal(dst, Address(src1, src2, Address::times_1));
25596 } else {
25597 assert(src2 != rbp && src2 != r13, "");
25598 __ leal(dst, Address(src2, src1, Address::times_1));
25599 }
25600 %}
25601 ins_pipe(ialu_reg_reg);
25602 %}
25603
25604 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25605 %{
25606 predicate(false);
25607 match(Set dst (AddI src1 src2));
25608 format %{ "leal $dst, [$src1 + $src2]" %}
25609 ins_encode %{
25610 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25611 %}
25612 ins_pipe(ialu_reg_reg);
25613 %}
25614
25615 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25616 %{
25617 predicate(false);
25618 match(Set dst (LShiftI src shift));
25619 format %{ "leal $dst, [$src << $shift]" %}
25620 ins_encode %{
25621 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25622 Register src = $src$$Register;
25623 if (scale == Address::times_2 && src != rbp && src != r13) {
25624 __ leal($dst$$Register, Address(src, src, Address::times_1));
25625 } else {
25626 __ leal($dst$$Register, Address(noreg, src, scale));
25627 }
25628 %}
25629 ins_pipe(ialu_reg_reg);
25630 %}
25631
25632 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25633 %{
25634 predicate(false);
25635 match(Set dst (AddL src1 src2));
25636 format %{ "leaq $dst, [$src1 + $src2]" %}
25637 ins_encode %{
25638 Register dst = $dst$$Register;
25639 Register src1 = $src1$$Register;
25640 Register src2 = $src2$$Register;
25641 if (src1 != rbp && src1 != r13) {
25642 __ leaq(dst, Address(src1, src2, Address::times_1));
25643 } else {
25644 assert(src2 != rbp && src2 != r13, "");
25645 __ leaq(dst, Address(src2, src1, Address::times_1));
25646 }
25647 %}
25648 ins_pipe(ialu_reg_reg);
25649 %}
25650
25651 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25652 %{
25653 predicate(false);
25654 match(Set dst (AddL src1 src2));
25655 format %{ "leaq $dst, [$src1 + $src2]" %}
25656 ins_encode %{
25657 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25658 %}
25659 ins_pipe(ialu_reg_reg);
25660 %}
25661
25662 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25663 %{
25664 predicate(false);
25665 match(Set dst (LShiftL src shift));
25666 format %{ "leaq $dst, [$src << $shift]" %}
25667 ins_encode %{
25668 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25669 Register src = $src$$Register;
25670 if (scale == Address::times_2 && src != rbp && src != r13) {
25671 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25672 } else {
25673 __ leaq($dst$$Register, Address(noreg, src, scale));
25674 }
25675 %}
25676 ins_pipe(ialu_reg_reg);
25677 %}
25678
25679 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25680 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25681 // processors with at least partial ALU support for lea
25682 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25683 // beneficial for processors with full ALU support
25684 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25685
25686 peephole
25687 %{
25688 peeppredicate(VM_Version::supports_fast_2op_lea());
25689 peepmatch (addI_rReg);
25690 peepprocedure (lea_coalesce_reg);
25691 peepreplace (leaI_rReg_rReg_peep());
25692 %}
25693
25694 peephole
25695 %{
25696 peeppredicate(VM_Version::supports_fast_2op_lea());
25697 peepmatch (addI_rReg_imm);
25698 peepprocedure (lea_coalesce_imm);
25699 peepreplace (leaI_rReg_immI_peep());
25700 %}
25701
25702 peephole
25703 %{
25704 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25705 VM_Version::is_intel_cascade_lake());
25706 peepmatch (incI_rReg);
25707 peepprocedure (lea_coalesce_imm);
25708 peepreplace (leaI_rReg_immI_peep());
25709 %}
25710
25711 peephole
25712 %{
25713 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25714 VM_Version::is_intel_cascade_lake());
25715 peepmatch (decI_rReg);
25716 peepprocedure (lea_coalesce_imm);
25717 peepreplace (leaI_rReg_immI_peep());
25718 %}
25719
25720 peephole
25721 %{
25722 peeppredicate(VM_Version::supports_fast_2op_lea());
25723 peepmatch (salI_rReg_immI2);
25724 peepprocedure (lea_coalesce_imm);
25725 peepreplace (leaI_rReg_immI2_peep());
25726 %}
25727
25728 peephole
25729 %{
25730 peeppredicate(VM_Version::supports_fast_2op_lea());
25731 peepmatch (addL_rReg);
25732 peepprocedure (lea_coalesce_reg);
25733 peepreplace (leaL_rReg_rReg_peep());
25734 %}
25735
25736 peephole
25737 %{
25738 peeppredicate(VM_Version::supports_fast_2op_lea());
25739 peepmatch (addL_rReg_imm);
25740 peepprocedure (lea_coalesce_imm);
25741 peepreplace (leaL_rReg_immL32_peep());
25742 %}
25743
25744 peephole
25745 %{
25746 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25747 VM_Version::is_intel_cascade_lake());
25748 peepmatch (incL_rReg);
25749 peepprocedure (lea_coalesce_imm);
25750 peepreplace (leaL_rReg_immL32_peep());
25751 %}
25752
25753 peephole
25754 %{
25755 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25756 VM_Version::is_intel_cascade_lake());
25757 peepmatch (decL_rReg);
25758 peepprocedure (lea_coalesce_imm);
25759 peepreplace (leaL_rReg_immL32_peep());
25760 %}
25761
25762 peephole
25763 %{
25764 peeppredicate(VM_Version::supports_fast_2op_lea());
25765 peepmatch (salL_rReg_immI2);
25766 peepprocedure (lea_coalesce_imm);
25767 peepreplace (leaL_rReg_immI2_peep());
25768 %}
25769
25770 peephole
25771 %{
25772 peepmatch (leaPCompressedOopOffset);
25773 peepprocedure (lea_remove_redundant);
25774 %}
25775
25776 peephole
25777 %{
25778 peepmatch (leaP8Narrow);
25779 peepprocedure (lea_remove_redundant);
25780 %}
25781
25782 peephole
25783 %{
25784 peepmatch (leaP32Narrow);
25785 peepprocedure (lea_remove_redundant);
25786 %}
25787
25788 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25789 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25790
25791 //int variant
25792 peephole
25793 %{
25794 peepmatch (testI_reg);
25795 peepprocedure (test_may_remove);
25796 %}
25797
25798 //long variant
25799 peephole
25800 %{
25801 peepmatch (testL_reg);
25802 peepprocedure (test_may_remove);
25803 %}
25804
25805
25806 //----------SMARTSPILL RULES---------------------------------------------------
25807 // These must follow all instruction definitions as they use the names
25808 // defined in the instructions definitions.