1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je #
1746 // |-jz -> a | b # a & b
1747 // | -> a #
1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1749 XMMRegister a, XMMRegister b,
1750 XMMRegister xmmt, Register rt,
1751 bool min, enum FP_PREC pt) {
1752
1753 Label nan, zero, below, above, done;
1754
1755 emit_fp_ucom(masm, pt, a, b);
1756
1757 if (dst->encoding() != (min ? b : a)->encoding()) {
1758 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1759 } else {
1760 __ jccb(Assembler::above, done);
1761 }
1762
1763 __ jccb(Assembler::parity, nan); // PF=1
1764 __ jccb(Assembler::below, below); // CF=1
1765
1766 // equal
1767 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1768 emit_fp_ucom(masm, pt, a, xmmt);
1769
1770 __ jccb(Assembler::equal, zero);
1771 movfp(masm, pt, dst, a, rt);
1772
1773 __ jmp(done);
1774
1775 __ bind(zero);
1776 if (min) {
1777 __ vpor(dst, a, b, Assembler::AVX_128bit);
1778 } else {
1779 __ vpand(dst, a, b, Assembler::AVX_128bit);
1780 }
1781
1782 __ jmp(done);
1783
1784 __ bind(above);
1785 movfp(masm, pt, dst, min ? b : a, rt);
1786
1787 __ jmp(done);
1788
1789 __ bind(nan);
1790 if (pt == fp_prec_hlf) {
1791 __ movl(rt, 0x00007e00); // Float16.NaN
1792 __ evmovw(dst, rt);
1793 } else if (pt == fp_prec_flt) {
1794 __ movl(rt, 0x7fc00000); // Float.NaN
1795 __ movdl(dst, rt);
1796 } else {
1797 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1798 __ movdq(dst, rt);
1799 }
1800 __ jmp(done);
1801
1802 __ bind(below);
1803 movfp(masm, pt, dst, min ? a : b, rt);
1804
1805 __ bind(done);
1806 }
1807
1808 //=============================================================================
1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1810
1811 int ConstantTable::calculate_table_base_offset() const {
1812 return 0; // absolute addressing, no offset
1813 }
1814
1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1817 ShouldNotReachHere();
1818 }
1819
1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1821 // Empty encoding
1822 }
1823
1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1825 return 0;
1826 }
1827
1828 #ifndef PRODUCT
1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1830 st->print("# MachConstantBaseNode (empty encoding)");
1831 }
1832 #endif
1833
1834
1835 //=============================================================================
1836 #ifndef PRODUCT
1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1838 Compile* C = ra_->C;
1839
1840 int framesize = C->output()->frame_size_in_bytes();
1841 int bangsize = C->output()->bang_size_in_bytes();
1842 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1843 // Remove wordSize for return addr which is already pushed.
1844 framesize -= wordSize;
1845
1846 if (C->output()->need_stack_bang(bangsize)) {
1847 framesize -= wordSize;
1848 st->print("# stack bang (%d bytes)", bangsize);
1849 st->print("\n\t");
1850 st->print("pushq rbp\t# Save rbp");
1851 if (PreserveFramePointer) {
1852 st->print("\n\t");
1853 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1854 }
1855 if (framesize) {
1856 st->print("\n\t");
1857 st->print("subq rsp, #%d\t# Create frame",framesize);
1858 }
1859 } else {
1860 st->print("subq rsp, #%d\t# Create frame",framesize);
1861 st->print("\n\t");
1862 framesize -= wordSize;
1863 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1864 if (PreserveFramePointer) {
1865 st->print("\n\t");
1866 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1867 if (framesize > 0) {
1868 st->print("\n\t");
1869 st->print("addq rbp, #%d", framesize);
1870 }
1871 }
1872 }
1873
1874 if (VerifyStackAtCalls) {
1875 st->print("\n\t");
1876 framesize -= wordSize;
1877 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1878 #ifdef ASSERT
1879 st->print("\n\t");
1880 st->print("# stack alignment check");
1881 #endif
1882 }
1883 if (C->stub_function() != nullptr) {
1884 st->print("\n\t");
1885 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1886 st->print("\n\t");
1887 st->print("je fast_entry\t");
1888 st->print("\n\t");
1889 st->print("call #nmethod_entry_barrier_stub\t");
1890 st->print("\n\tfast_entry:");
1891 }
1892 st->cr();
1893 }
1894 #endif
1895
1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1897 Compile* C = ra_->C;
1898
1899 int framesize = C->output()->frame_size_in_bytes();
1900 int bangsize = C->output()->bang_size_in_bytes();
1901
1902 if (C->clinit_barrier_on_entry()) {
1903 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1904 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1905
1906 Label L_skip_barrier;
1907 Register klass = rscratch1;
1908
1909 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1910 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1911
1912 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1913
1914 __ bind(L_skip_barrier);
1915 }
1916
1917 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1918
1919 C->output()->set_frame_complete(__ offset());
1920
1921 if (C->has_mach_constant_base_node()) {
1922 // NOTE: We set the table base offset here because users might be
1923 // emitted before MachConstantBaseNode.
1924 ConstantTable& constant_table = C->output()->constant_table();
1925 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1926 }
1927 }
1928
1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1930 {
1931 return MachNode::size(ra_); // too many variables; just compute it
1932 // the hard way
1933 }
1934
1935 int MachPrologNode::reloc() const
1936 {
1937 return 0; // a large enough number
1938 }
1939
1940 //=============================================================================
1941 #ifndef PRODUCT
1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1943 {
1944 Compile* C = ra_->C;
1945 if (generate_vzeroupper(C)) {
1946 st->print("vzeroupper");
1947 st->cr(); st->print("\t");
1948 }
1949
1950 int framesize = C->output()->frame_size_in_bytes();
1951 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1952 // Remove word for return adr already pushed
1953 // and RBP
1954 framesize -= 2*wordSize;
1955
1956 if (framesize) {
1957 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1958 st->print("\t");
1959 }
1960
1961 st->print_cr("popq rbp");
1962 if (do_polling() && C->is_method_compilation()) {
1963 st->print("\t");
1964 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1965 "ja #safepoint_stub\t"
1966 "# Safepoint: poll for GC");
1967 }
1968 }
1969 #endif
1970
1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1972 {
1973 Compile* C = ra_->C;
1974
1975 if (generate_vzeroupper(C)) {
1976 // Clear upper bits of YMM registers when current compiled code uses
1977 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1978 __ vzeroupper();
1979 }
1980
1981 int framesize = C->output()->frame_size_in_bytes();
1982 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1983 // Remove word for return adr already pushed
1984 // and RBP
1985 framesize -= 2*wordSize;
1986
1987 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1988
1989 if (framesize) {
1990 __ addq(rsp, framesize);
1991 }
1992
1993 __ popq(rbp);
1994
1995 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1996 __ reserved_stack_check();
1997 }
1998
1999 if (do_polling() && C->is_method_compilation()) {
2000 Label dummy_label;
2001 Label* code_stub = &dummy_label;
2002 if (!C->output()->in_scratch_emit_size()) {
2003 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
2004 C->output()->add_stub(stub);
2005 code_stub = &stub->entry();
2006 }
2007 __ relocate(relocInfo::poll_return_type);
2008 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2009 }
2010 }
2011
2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2013 {
2014 return MachNode::size(ra_); // too many variables; just compute it
2015 // the hard way
2016 }
2017
2018 int MachEpilogNode::reloc() const
2019 {
2020 return 2; // a large enough number
2021 }
2022
2023 const Pipeline* MachEpilogNode::pipeline() const
2024 {
2025 return MachNode::pipeline_class();
2026 }
2027
2028 //=============================================================================
2029
2030 enum RC {
2031 rc_bad,
2032 rc_int,
2033 rc_kreg,
2034 rc_float,
2035 rc_stack
2036 };
2037
2038 static enum RC rc_class(OptoReg::Name reg)
2039 {
2040 if( !OptoReg::is_valid(reg) ) return rc_bad;
2041
2042 if (OptoReg::is_stack(reg)) return rc_stack;
2043
2044 VMReg r = OptoReg::as_VMReg(reg);
2045
2046 if (r->is_Register()) return rc_int;
2047
2048 if (r->is_KRegister()) return rc_kreg;
2049
2050 assert(r->is_XMMRegister(), "must be");
2051 return rc_float;
2052 }
2053
2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2056 int src_hi, int dst_hi, uint ireg, outputStream* st);
2057
2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2059 int stack_offset, int reg, uint ireg, outputStream* st);
2060
2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2062 int dst_offset, uint ireg, outputStream* st) {
2063 if (masm) {
2064 switch (ireg) {
2065 case Op_VecS:
2066 __ movq(Address(rsp, -8), rax);
2067 __ movl(rax, Address(rsp, src_offset));
2068 __ movl(Address(rsp, dst_offset), rax);
2069 __ movq(rax, Address(rsp, -8));
2070 break;
2071 case Op_VecD:
2072 __ pushq(Address(rsp, src_offset));
2073 __ popq (Address(rsp, dst_offset));
2074 break;
2075 case Op_VecX:
2076 __ pushq(Address(rsp, src_offset));
2077 __ popq (Address(rsp, dst_offset));
2078 __ pushq(Address(rsp, src_offset+8));
2079 __ popq (Address(rsp, dst_offset+8));
2080 break;
2081 case Op_VecY:
2082 __ vmovdqu(Address(rsp, -32), xmm0);
2083 __ vmovdqu(xmm0, Address(rsp, src_offset));
2084 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2085 __ vmovdqu(xmm0, Address(rsp, -32));
2086 break;
2087 case Op_VecZ:
2088 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2089 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2090 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2091 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2092 break;
2093 default:
2094 ShouldNotReachHere();
2095 }
2096 #ifndef PRODUCT
2097 } else {
2098 switch (ireg) {
2099 case Op_VecS:
2100 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2101 "movl rax, [rsp + #%d]\n\t"
2102 "movl [rsp + #%d], rax\n\t"
2103 "movq rax, [rsp - #8]",
2104 src_offset, dst_offset);
2105 break;
2106 case Op_VecD:
2107 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2108 "popq [rsp + #%d]",
2109 src_offset, dst_offset);
2110 break;
2111 case Op_VecX:
2112 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2113 "popq [rsp + #%d]\n\t"
2114 "pushq [rsp + #%d]\n\t"
2115 "popq [rsp + #%d]",
2116 src_offset, dst_offset, src_offset+8, dst_offset+8);
2117 break;
2118 case Op_VecY:
2119 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2120 "vmovdqu xmm0, [rsp + #%d]\n\t"
2121 "vmovdqu [rsp + #%d], xmm0\n\t"
2122 "vmovdqu xmm0, [rsp - #32]",
2123 src_offset, dst_offset);
2124 break;
2125 case Op_VecZ:
2126 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2127 "vmovdqu xmm0, [rsp + #%d]\n\t"
2128 "vmovdqu [rsp + #%d], xmm0\n\t"
2129 "vmovdqu xmm0, [rsp - #64]",
2130 src_offset, dst_offset);
2131 break;
2132 default:
2133 ShouldNotReachHere();
2134 }
2135 #endif
2136 }
2137 }
2138
2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2140 PhaseRegAlloc* ra_,
2141 bool do_size,
2142 outputStream* st) const {
2143 assert(masm != nullptr || st != nullptr, "sanity");
2144 // Get registers to move
2145 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2146 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2147 OptoReg::Name dst_second = ra_->get_reg_second(this);
2148 OptoReg::Name dst_first = ra_->get_reg_first(this);
2149
2150 enum RC src_second_rc = rc_class(src_second);
2151 enum RC src_first_rc = rc_class(src_first);
2152 enum RC dst_second_rc = rc_class(dst_second);
2153 enum RC dst_first_rc = rc_class(dst_first);
2154
2155 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2156 "must move at least 1 register" );
2157
2158 if (src_first == dst_first && src_second == dst_second) {
2159 // Self copy, no move
2160 return 0;
2161 }
2162 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2163 uint ireg = ideal_reg();
2164 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2165 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2166 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2167 // mem -> mem
2168 int src_offset = ra_->reg2offset(src_first);
2169 int dst_offset = ra_->reg2offset(dst_first);
2170 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2171 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2172 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2173 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2174 int stack_offset = ra_->reg2offset(dst_first);
2175 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2176 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2177 int stack_offset = ra_->reg2offset(src_first);
2178 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2179 } else {
2180 ShouldNotReachHere();
2181 }
2182 return 0;
2183 }
2184 if (src_first_rc == rc_stack) {
2185 // mem ->
2186 if (dst_first_rc == rc_stack) {
2187 // mem -> mem
2188 assert(src_second != dst_first, "overlap");
2189 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2190 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2191 // 64-bit
2192 int src_offset = ra_->reg2offset(src_first);
2193 int dst_offset = ra_->reg2offset(dst_first);
2194 if (masm) {
2195 __ pushq(Address(rsp, src_offset));
2196 __ popq (Address(rsp, dst_offset));
2197 #ifndef PRODUCT
2198 } else {
2199 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2200 "popq [rsp + #%d]",
2201 src_offset, dst_offset);
2202 #endif
2203 }
2204 } else {
2205 // 32-bit
2206 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2207 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2208 // No pushl/popl, so:
2209 int src_offset = ra_->reg2offset(src_first);
2210 int dst_offset = ra_->reg2offset(dst_first);
2211 if (masm) {
2212 __ movq(Address(rsp, -8), rax);
2213 __ movl(rax, Address(rsp, src_offset));
2214 __ movl(Address(rsp, dst_offset), rax);
2215 __ movq(rax, Address(rsp, -8));
2216 #ifndef PRODUCT
2217 } else {
2218 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2219 "movl rax, [rsp + #%d]\n\t"
2220 "movl [rsp + #%d], rax\n\t"
2221 "movq rax, [rsp - #8]",
2222 src_offset, dst_offset);
2223 #endif
2224 }
2225 }
2226 return 0;
2227 } else if (dst_first_rc == rc_int) {
2228 // mem -> gpr
2229 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2230 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2231 // 64-bit
2232 int offset = ra_->reg2offset(src_first);
2233 if (masm) {
2234 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2235 #ifndef PRODUCT
2236 } else {
2237 st->print("movq %s, [rsp + #%d]\t# spill",
2238 Matcher::regName[dst_first],
2239 offset);
2240 #endif
2241 }
2242 } else {
2243 // 32-bit
2244 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2245 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2246 int offset = ra_->reg2offset(src_first);
2247 if (masm) {
2248 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2249 #ifndef PRODUCT
2250 } else {
2251 st->print("movl %s, [rsp + #%d]\t# spill",
2252 Matcher::regName[dst_first],
2253 offset);
2254 #endif
2255 }
2256 }
2257 return 0;
2258 } else if (dst_first_rc == rc_float) {
2259 // mem-> xmm
2260 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2261 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2262 // 64-bit
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("%s %s, [rsp + #%d]\t# spill",
2269 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2270 Matcher::regName[dst_first],
2271 offset);
2272 #endif
2273 }
2274 } else {
2275 // 32-bit
2276 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2277 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2278 int offset = ra_->reg2offset(src_first);
2279 if (masm) {
2280 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2281 #ifndef PRODUCT
2282 } else {
2283 st->print("movss %s, [rsp + #%d]\t# spill",
2284 Matcher::regName[dst_first],
2285 offset);
2286 #endif
2287 }
2288 }
2289 return 0;
2290 } else if (dst_first_rc == rc_kreg) {
2291 // mem -> kreg
2292 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2293 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2294 // 64-bit
2295 int offset = ra_->reg2offset(src_first);
2296 if (masm) {
2297 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2298 #ifndef PRODUCT
2299 } else {
2300 st->print("kmovq %s, [rsp + #%d]\t# spill",
2301 Matcher::regName[dst_first],
2302 offset);
2303 #endif
2304 }
2305 }
2306 return 0;
2307 }
2308 } else if (src_first_rc == rc_int) {
2309 // gpr ->
2310 if (dst_first_rc == rc_stack) {
2311 // gpr -> mem
2312 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2313 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2314 // 64-bit
2315 int offset = ra_->reg2offset(dst_first);
2316 if (masm) {
2317 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2318 #ifndef PRODUCT
2319 } else {
2320 st->print("movq [rsp + #%d], %s\t# spill",
2321 offset,
2322 Matcher::regName[src_first]);
2323 #endif
2324 }
2325 } else {
2326 // 32-bit
2327 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2328 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2329 int offset = ra_->reg2offset(dst_first);
2330 if (masm) {
2331 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2332 #ifndef PRODUCT
2333 } else {
2334 st->print("movl [rsp + #%d], %s\t# spill",
2335 offset,
2336 Matcher::regName[src_first]);
2337 #endif
2338 }
2339 }
2340 return 0;
2341 } else if (dst_first_rc == rc_int) {
2342 // gpr -> gpr
2343 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2344 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2345 // 64-bit
2346 if (masm) {
2347 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movq %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 } else {
2358 // 32-bit
2359 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2360 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2361 if (masm) {
2362 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2363 as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movl %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 return 0;
2372 }
2373 } else if (dst_first_rc == rc_float) {
2374 // gpr -> xmm
2375 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2376 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2377 // 64-bit
2378 if (masm) {
2379 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2380 #ifndef PRODUCT
2381 } else {
2382 st->print("movdq %s, %s\t# spill",
2383 Matcher::regName[dst_first],
2384 Matcher::regName[src_first]);
2385 #endif
2386 }
2387 } else {
2388 // 32-bit
2389 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2390 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2391 if (masm) {
2392 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("movdl %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 return 0;
2402 } else if (dst_first_rc == rc_kreg) {
2403 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2404 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2405 // 64-bit
2406 if (masm) {
2407 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2408 #ifndef PRODUCT
2409 } else {
2410 st->print("kmovq %s, %s\t# spill",
2411 Matcher::regName[dst_first],
2412 Matcher::regName[src_first]);
2413 #endif
2414 }
2415 }
2416 Unimplemented();
2417 return 0;
2418 }
2419 } else if (src_first_rc == rc_float) {
2420 // xmm ->
2421 if (dst_first_rc == rc_stack) {
2422 // xmm -> mem
2423 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2424 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2425 // 64-bit
2426 int offset = ra_->reg2offset(dst_first);
2427 if (masm) {
2428 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2429 #ifndef PRODUCT
2430 } else {
2431 st->print("movsd [rsp + #%d], %s\t# spill",
2432 offset,
2433 Matcher::regName[src_first]);
2434 #endif
2435 }
2436 } else {
2437 // 32-bit
2438 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2439 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2440 int offset = ra_->reg2offset(dst_first);
2441 if (masm) {
2442 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movss [rsp + #%d], %s\t# spill",
2446 offset,
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 }
2451 return 0;
2452 } else if (dst_first_rc == rc_int) {
2453 // xmm -> gpr
2454 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2455 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2456 // 64-bit
2457 if (masm) {
2458 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2459 #ifndef PRODUCT
2460 } else {
2461 st->print("movdq %s, %s\t# spill",
2462 Matcher::regName[dst_first],
2463 Matcher::regName[src_first]);
2464 #endif
2465 }
2466 } else {
2467 // 32-bit
2468 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2469 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2470 if (masm) {
2471 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("movdl %s, %s\t# spill",
2475 Matcher::regName[dst_first],
2476 Matcher::regName[src_first]);
2477 #endif
2478 }
2479 }
2480 return 0;
2481 } else if (dst_first_rc == rc_float) {
2482 // xmm -> xmm
2483 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2484 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2485 // 64-bit
2486 if (masm) {
2487 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2488 #ifndef PRODUCT
2489 } else {
2490 st->print("%s %s, %s\t# spill",
2491 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2492 Matcher::regName[dst_first],
2493 Matcher::regName[src_first]);
2494 #endif
2495 }
2496 } else {
2497 // 32-bit
2498 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2499 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2500 if (masm) {
2501 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2502 #ifndef PRODUCT
2503 } else {
2504 st->print("%s %s, %s\t# spill",
2505 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2506 Matcher::regName[dst_first],
2507 Matcher::regName[src_first]);
2508 #endif
2509 }
2510 }
2511 return 0;
2512 } else if (dst_first_rc == rc_kreg) {
2513 assert(false, "Illegal spilling");
2514 return 0;
2515 }
2516 } else if (src_first_rc == rc_kreg) {
2517 if (dst_first_rc == rc_stack) {
2518 // mem -> kreg
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 int offset = ra_->reg2offset(dst_first);
2523 if (masm) {
2524 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq [rsp + #%d] , %s\t# spill",
2528 offset,
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 return 0;
2534 } else if (dst_first_rc == rc_int) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 Unimplemented();
2549 return 0;
2550 } else if (dst_first_rc == rc_kreg) {
2551 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2552 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2553 // 64-bit
2554 if (masm) {
2555 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2556 #ifndef PRODUCT
2557 } else {
2558 st->print("kmovq %s, %s\t# spill",
2559 Matcher::regName[dst_first],
2560 Matcher::regName[src_first]);
2561 #endif
2562 }
2563 }
2564 return 0;
2565 } else if (dst_first_rc == rc_float) {
2566 assert(false, "Illegal spill");
2567 return 0;
2568 }
2569 }
2570
2571 assert(0," foo ");
2572 Unimplemented();
2573 return 0;
2574 }
2575
2576 #ifndef PRODUCT
2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2578 implementation(nullptr, ra_, false, st);
2579 }
2580 #endif
2581
2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2583 implementation(masm, ra_, false, nullptr);
2584 }
2585
2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2587 return MachNode::size(ra_);
2588 }
2589
2590 //=============================================================================
2591 #ifndef PRODUCT
2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_reg_first(this);
2596 st->print("leaq %s, [rsp + #%d]\t# box lock",
2597 Matcher::regName[reg], offset);
2598 }
2599 #endif
2600
2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2604 int reg = ra_->get_encode(this);
2605
2606 __ lea(as_Register(reg), Address(rsp, offset));
2607 }
2608
2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2610 {
2611 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2612 if (ra_->get_encode(this) > 15) {
2613 return (offset < 0x80) ? 6 : 9; // REX2
2614 } else {
2615 return (offset < 0x80) ? 5 : 8; // REX
2616 }
2617 }
2618
2619 //=============================================================================
2620 #ifndef PRODUCT
2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2622 {
2623 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2624 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2625 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2626 }
2627 #endif
2628
2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2630 {
2631 __ ic_check(InteriorEntryAlignment);
2632 }
2633
2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2635 {
2636 return MachNode::size(ra_); // too many variables; just compute it
2637 // the hard way
2638 }
2639
2640
2641 //=============================================================================
2642
2643 bool Matcher::supports_vector_calling_convention(void) {
2644 return EnableVectorSupport;
2645 }
2646
2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2648 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2649 }
2650
2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2652 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2653 }
2654
2655 #ifdef ASSERT
2656 static bool is_ndd_demotable(const MachNode* mdef) {
2657 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2658 }
2659 #endif
2660
2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2662 int oper_index) {
2663 if (mdef == nullptr) {
2664 return false;
2665 }
2666
2667 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2668 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2669 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2670 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2671 return false;
2672 }
2673
2674 // Complex memory operand covers multiple incoming edges needed for
2675 // address computation. Biasing def towards any address component will not
2676 // result in NDD demotion by assembler.
2677 if (mdef->operand_num_edges(oper_index) != 1) {
2678 return false;
2679 }
2680
2681 // Demotion candidate must be register mask compatible with definition.
2682 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2683 if (!oper_mask.overlap(mdef->out_RegMask())) {
2684 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2685 return false;
2686 }
2687
2688 switch (oper_index) {
2689 // First operand of MachNode corresponding to Intel APX NDD selection
2690 // pattern can share its assigned register with definition operand if
2691 // their live ranges do not overlap. In such a scenario we can demote
2692 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2693 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2694 // are decorated with a special flag by instruction selector.
2695 case 1:
2696 return is_ndd_demotable_opr1(mdef);
2697
2698 // Definition operand of commutative operation can be biased towards second
2699 // operand.
2700 case 2:
2701 return is_ndd_demotable_opr2(mdef);
2702
2703 // Current scheme only selects up to two biasing candidates
2704 default:
2705 assert(false, "unhandled operand index: %s", mdef->Name());
2706 break;
2707 }
2708
2709 return false;
2710 }
2711
2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2713 assert(EnableVectorSupport, "sanity");
2714 int lo = XMM0_num;
2715 int hi = XMM0b_num;
2716 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2717 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2718 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2719 return OptoRegPair(hi, lo);
2720 }
2721
2722 // Is this branch offset short enough that a short branch can be used?
2723 //
2724 // NOTE: If the platform does not provide any short branch variants, then
2725 // this method should return false for offset 0.
2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2727 // The passed offset is relative to address of the branch.
2728 // On 86 a branch displacement is calculated relative to address
2729 // of a next instruction.
2730 offset -= br_size;
2731
2732 // the short version of jmpConUCF2 contains multiple branches,
2733 // making the reach slightly less
2734 if (rule == jmpConUCF2_rule)
2735 return (-126 <= offset && offset <= 125);
2736 return (-128 <= offset && offset <= 127);
2737 }
2738
2739 #ifdef ASSERT
2740 // Return whether or not this register is ever used as an argument.
2741 bool Matcher::can_be_java_arg(int reg)
2742 {
2743 return
2744 reg == RDI_num || reg == RDI_H_num ||
2745 reg == RSI_num || reg == RSI_H_num ||
2746 reg == RDX_num || reg == RDX_H_num ||
2747 reg == RCX_num || reg == RCX_H_num ||
2748 reg == R8_num || reg == R8_H_num ||
2749 reg == R9_num || reg == R9_H_num ||
2750 reg == R12_num || reg == R12_H_num ||
2751 reg == XMM0_num || reg == XMM0b_num ||
2752 reg == XMM1_num || reg == XMM1b_num ||
2753 reg == XMM2_num || reg == XMM2b_num ||
2754 reg == XMM3_num || reg == XMM3b_num ||
2755 reg == XMM4_num || reg == XMM4b_num ||
2756 reg == XMM5_num || reg == XMM5b_num ||
2757 reg == XMM6_num || reg == XMM6b_num ||
2758 reg == XMM7_num || reg == XMM7b_num;
2759 }
2760 #endif
2761
2762 uint Matcher::int_pressure_limit()
2763 {
2764 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2765 }
2766
2767 uint Matcher::float_pressure_limit()
2768 {
2769 // After experiment around with different values, the following default threshold
2770 // works best for LCM's register pressure scheduling on x64.
2771 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2772 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2773 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2774 }
2775
2776 // Register for DIVI projection of divmodI
2777 const RegMask& Matcher::divI_proj_mask() {
2778 return INT_RAX_REG_mask();
2779 }
2780
2781 // Register for MODI projection of divmodI
2782 const RegMask& Matcher::modI_proj_mask() {
2783 return INT_RDX_REG_mask();
2784 }
2785
2786 // Register for DIVL projection of divmodL
2787 const RegMask& Matcher::divL_proj_mask() {
2788 return LONG_RAX_REG_mask();
2789 }
2790
2791 // Register for MODL projection of divmodL
2792 const RegMask& Matcher::modL_proj_mask() {
2793 return LONG_RDX_REG_mask();
2794 }
2795
2796 %}
2797
2798 source_hpp %{
2799 // Header information of the source block.
2800 // Method declarations/definitions which are used outside
2801 // the ad-scope can conveniently be defined here.
2802 //
2803 // To keep related declarations/definitions/uses close together,
2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2805
2806 #include "runtime/vm_version.hpp"
2807
2808 class NativeJump;
2809
2810 class CallStubImpl {
2811
2812 //--------------------------------------------------------------
2813 //---< Used for optimization in Compile::shorten_branches >---
2814 //--------------------------------------------------------------
2815
2816 public:
2817 // Size of call trampoline stub.
2818 static uint size_call_trampoline() {
2819 return 0; // no call trampolines on this platform
2820 }
2821
2822 // number of relocations needed by a call trampoline stub
2823 static uint reloc_call_trampoline() {
2824 return 0; // no call trampolines on this platform
2825 }
2826 };
2827
2828 class HandlerImpl {
2829
2830 public:
2831
2832 static int emit_deopt_handler(C2_MacroAssembler* masm);
2833
2834 static uint size_deopt_handler() {
2835 // one call and one jmp.
2836 return 7;
2837 }
2838 };
2839
2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2841 switch(bytes) {
2842 case 4: // fall-through
2843 case 8: // fall-through
2844 case 16: return Assembler::AVX_128bit;
2845 case 32: return Assembler::AVX_256bit;
2846 case 64: return Assembler::AVX_512bit;
2847
2848 default: {
2849 ShouldNotReachHere();
2850 return Assembler::AVX_NoVec;
2851 }
2852 }
2853 }
2854
2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2856 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2857 }
2858
2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2860 uint def_idx = use->operand_index(opnd);
2861 Node* def = use->in(def_idx);
2862 return vector_length_encoding(def);
2863 }
2864
2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
2866 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2867 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2868 }
2869
2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2871 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2872 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2873 }
2874
2875 class Node::PD {
2876 public:
2877 enum NodeFlags : uint64_t {
2878 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2879 Flag_sets_carry_flag = Node::_last_flag << 2,
2880 Flag_sets_parity_flag = Node::_last_flag << 3,
2881 Flag_sets_zero_flag = Node::_last_flag << 4,
2882 Flag_sets_overflow_flag = Node::_last_flag << 5,
2883 Flag_sets_sign_flag = Node::_last_flag << 6,
2884 Flag_clears_carry_flag = Node::_last_flag << 7,
2885 Flag_clears_parity_flag = Node::_last_flag << 8,
2886 Flag_clears_zero_flag = Node::_last_flag << 9,
2887 Flag_clears_overflow_flag = Node::_last_flag << 10,
2888 Flag_clears_sign_flag = Node::_last_flag << 11,
2889 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2890 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2891 _last_flag = Flag_ndd_demotable_opr2
2892 };
2893 };
2894
2895 %} // end source_hpp
2896
2897 source %{
2898
2899 #include "opto/addnode.hpp"
2900 #include "c2_intelJccErratum_x86.hpp"
2901
2902 void PhaseOutput::pd_perform_mach_node_analysis() {
2903 if (VM_Version::has_intel_jcc_erratum()) {
2904 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2905 _buf_sizes._code += extra_padding;
2906 }
2907 }
2908
2909 int MachNode::pd_alignment_required() const {
2910 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2911 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2912 return IntelJccErratum::largest_jcc_size() + 1;
2913 } else {
2914 return 1;
2915 }
2916 }
2917
2918 int MachNode::compute_padding(int current_offset) const {
2919 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2920 Compile* C = Compile::current();
2921 PhaseOutput* output = C->output();
2922 Block* block = output->block();
2923 int index = output->index();
2924 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2925 } else {
2926 return 0;
2927 }
2928 }
2929
2930 // Emit deopt handler code.
2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2932
2933 // Note that the code buffer's insts_mark is always relative to insts.
2934 // That's why we must use the macroassembler to generate a handler.
2935 address base = __ start_a_stub(size_deopt_handler());
2936 if (base == nullptr) {
2937 ciEnv::current()->record_failure("CodeCache is full");
2938 return 0; // CodeBuffer::expand failed
2939 }
2940 int offset = __ offset();
2941
2942 Label start;
2943 __ bind(start);
2944
2945 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2946
2947 int entry_offset = __ offset();
2948
2949 __ jmp(start);
2950
2951 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2952 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2953 "out of bounds read in post-call NOP check");
2954 __ end_a_stub();
2955 return entry_offset;
2956 }
2957
2958 static Assembler::Width widthForType(BasicType bt) {
2959 if (bt == T_BYTE) {
2960 return Assembler::B;
2961 } else if (bt == T_SHORT) {
2962 return Assembler::W;
2963 } else if (bt == T_INT) {
2964 return Assembler::D;
2965 } else {
2966 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2967 return Assembler::Q;
2968 }
2969 }
2970
2971 //=============================================================================
2972
2973 // Float masks come from different places depending on platform.
2974 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2975 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2976 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2977 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2978 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2979 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2980 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2981 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2982 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2983 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2984 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2985 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2986 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2987 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2988 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2989 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2990 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2991 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2992 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2993
2994 //=============================================================================
2995 bool Matcher::match_rule_supported(int opcode) {
2996 if (!has_match_rule(opcode)) {
2997 return false; // no match rule present
2998 }
2999 switch (opcode) {
3000 case Op_AbsVL:
3001 case Op_StoreVectorScatter:
3002 if (UseAVX < 3) {
3003 return false;
3004 }
3005 break;
3006 case Op_PopCountI:
3007 case Op_PopCountL:
3008 if (!UsePopCountInstruction) {
3009 return false;
3010 }
3011 break;
3012 case Op_PopCountVI:
3013 if (UseAVX < 2) {
3014 return false;
3015 }
3016 break;
3017 case Op_CompressV:
3018 case Op_ExpandV:
3019 case Op_PopCountVL:
3020 if (UseAVX < 2) {
3021 return false;
3022 }
3023 break;
3024 case Op_MulVI:
3025 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3026 return false;
3027 }
3028 break;
3029 case Op_MulVL:
3030 if (UseSSE < 4) { // only with SSE4_1 or AVX
3031 return false;
3032 }
3033 break;
3034 case Op_MulReductionVL:
3035 if (VM_Version::supports_avx512dq() == false) {
3036 return false;
3037 }
3038 break;
3039 case Op_AbsVB:
3040 case Op_AbsVS:
3041 case Op_AbsVI:
3042 case Op_AddReductionVI:
3043 case Op_AndReductionV:
3044 case Op_OrReductionV:
3045 case Op_XorReductionV:
3046 if (UseSSE < 3) { // requires at least SSSE3
3047 return false;
3048 }
3049 break;
3050 case Op_MaxHF:
3051 case Op_MinHF:
3052 if (!VM_Version::supports_avx512vlbw()) {
3053 return false;
3054 } // fallthrough
3055 case Op_AddHF:
3056 case Op_DivHF:
3057 case Op_FmaHF:
3058 case Op_MulHF:
3059 case Op_ReinterpretS2HF:
3060 case Op_ReinterpretHF2S:
3061 case Op_SubHF:
3062 case Op_SqrtHF:
3063 if (!VM_Version::supports_avx512_fp16()) {
3064 return false;
3065 }
3066 break;
3067 case Op_VectorLoadShuffle:
3068 case Op_VectorRearrange:
3069 case Op_MulReductionVI:
3070 if (UseSSE < 4) { // requires at least SSE4
3071 return false;
3072 }
3073 break;
3074 case Op_IsInfiniteF:
3075 case Op_IsInfiniteD:
3076 if (!VM_Version::supports_avx512dq()) {
3077 return false;
3078 }
3079 break;
3080 case Op_SqrtVD:
3081 case Op_SqrtVF:
3082 case Op_VectorMaskCmp:
3083 case Op_VectorCastB2X:
3084 case Op_VectorCastS2X:
3085 case Op_VectorCastI2X:
3086 case Op_VectorCastL2X:
3087 case Op_VectorCastF2X:
3088 case Op_VectorCastD2X:
3089 case Op_VectorUCastB2X:
3090 case Op_VectorUCastS2X:
3091 case Op_VectorUCastI2X:
3092 case Op_VectorMaskCast:
3093 if (UseAVX < 1) { // enabled for AVX only
3094 return false;
3095 }
3096 break;
3097 case Op_PopulateIndex:
3098 if (UseAVX < 2) {
3099 return false;
3100 }
3101 break;
3102 case Op_RoundVF:
3103 if (UseAVX < 2) { // enabled for AVX2 only
3104 return false;
3105 }
3106 break;
3107 case Op_RoundVD:
3108 if (UseAVX < 3) {
3109 return false; // enabled for AVX3 only
3110 }
3111 break;
3112 case Op_CompareAndSwapL:
3113 case Op_CompareAndSwapP:
3114 break;
3115 case Op_StrIndexOf:
3116 if (!UseSSE42Intrinsics) {
3117 return false;
3118 }
3119 break;
3120 case Op_StrIndexOfChar:
3121 if (!UseSSE42Intrinsics) {
3122 return false;
3123 }
3124 break;
3125 case Op_OnSpinWait:
3126 if (VM_Version::supports_on_spin_wait() == false) {
3127 return false;
3128 }
3129 break;
3130 case Op_MulVB:
3131 case Op_LShiftVB:
3132 case Op_RShiftVB:
3133 case Op_URShiftVB:
3134 case Op_VectorInsert:
3135 case Op_VectorLoadMask:
3136 case Op_VectorStoreMask:
3137 case Op_VectorBlend:
3138 if (UseSSE < 4) {
3139 return false;
3140 }
3141 break;
3142 case Op_MaxD:
3143 case Op_MaxF:
3144 case Op_MinD:
3145 case Op_MinF:
3146 if (UseAVX < 1) { // enabled for AVX only
3147 return false;
3148 }
3149 break;
3150 case Op_CacheWB:
3151 case Op_CacheWBPreSync:
3152 case Op_CacheWBPostSync:
3153 if (!VM_Version::supports_data_cache_line_flush()) {
3154 return false;
3155 }
3156 break;
3157 case Op_ExtractB:
3158 case Op_ExtractL:
3159 case Op_ExtractI:
3160 case Op_RoundDoubleMode:
3161 if (UseSSE < 4) {
3162 return false;
3163 }
3164 break;
3165 case Op_RoundDoubleModeV:
3166 if (VM_Version::supports_avx() == false) {
3167 return false; // 128bit vroundpd is not available
3168 }
3169 break;
3170 case Op_LoadVectorGather:
3171 case Op_LoadVectorGatherMasked:
3172 if (UseAVX < 2) {
3173 return false;
3174 }
3175 break;
3176 case Op_FmaF:
3177 case Op_FmaD:
3178 case Op_FmaVD:
3179 case Op_FmaVF:
3180 if (!UseFMA) {
3181 return false;
3182 }
3183 break;
3184 case Op_MacroLogicV:
3185 if (UseAVX < 3 || !UseVectorMacroLogic) {
3186 return false;
3187 }
3188 break;
3189
3190 case Op_VectorCmpMasked:
3191 case Op_VectorMaskGen:
3192 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3193 return false;
3194 }
3195 break;
3196 case Op_VectorMaskFirstTrue:
3197 case Op_VectorMaskLastTrue:
3198 case Op_VectorMaskTrueCount:
3199 case Op_VectorMaskToLong:
3200 if (UseAVX < 1) {
3201 return false;
3202 }
3203 break;
3204 case Op_RoundF:
3205 case Op_RoundD:
3206 break;
3207 case Op_CopySignD:
3208 case Op_CopySignF:
3209 if (UseAVX < 3) {
3210 return false;
3211 }
3212 if (!VM_Version::supports_avx512vl()) {
3213 return false;
3214 }
3215 break;
3216 case Op_CompressBits:
3217 case Op_ExpandBits:
3218 if (!VM_Version::supports_bmi2()) {
3219 return false;
3220 }
3221 break;
3222 case Op_CompressM:
3223 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3224 return false;
3225 }
3226 break;
3227 case Op_ConvF2HF:
3228 case Op_ConvHF2F:
3229 if (!VM_Version::supports_float16()) {
3230 return false;
3231 }
3232 break;
3233 case Op_VectorCastF2HF:
3234 case Op_VectorCastHF2F:
3235 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3236 return false;
3237 }
3238 break;
3239 }
3240 return true; // Match rules are supported by default.
3241 }
3242
3243 //------------------------------------------------------------------------
3244
3245 static inline bool is_pop_count_instr_target(BasicType bt) {
3246 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3247 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3248 }
3249
3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3251 return match_rule_supported_vector(opcode, vlen, bt);
3252 }
3253
3254 // Identify extra cases that we might want to provide match rules for vector nodes and
3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3257 if (!match_rule_supported(opcode)) {
3258 return false;
3259 }
3260 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3261 // * SSE2 supports 128bit vectors for all types;
3262 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3263 // * AVX2 supports 256bit vectors for all types;
3264 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3265 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3266 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3267 // And MaxVectorSize is taken into account as well.
3268 if (!vector_size_supported(bt, vlen)) {
3269 return false;
3270 }
3271 // Special cases which require vector length follow:
3272 // * implementation limitations
3273 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3274 // * 128bit vroundpd instruction is present only in AVX1
3275 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3276 switch (opcode) {
3277 case Op_MaxVHF:
3278 case Op_MinVHF:
3279 if (!VM_Version::supports_avx512bw()) {
3280 return false;
3281 }
3282 case Op_AddVHF:
3283 case Op_DivVHF:
3284 case Op_FmaVHF:
3285 case Op_MulVHF:
3286 case Op_SubVHF:
3287 case Op_SqrtVHF:
3288 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3289 return false;
3290 }
3291 if (!VM_Version::supports_avx512_fp16()) {
3292 return false;
3293 }
3294 break;
3295 case Op_AbsVF:
3296 case Op_NegVF:
3297 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3298 return false; // 512bit vandps and vxorps are not available
3299 }
3300 break;
3301 case Op_AbsVD:
3302 case Op_NegVD:
3303 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3304 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3305 }
3306 break;
3307 case Op_RotateRightV:
3308 case Op_RotateLeftV:
3309 if (bt != T_INT && bt != T_LONG) {
3310 return false;
3311 } // fallthrough
3312 case Op_MacroLogicV:
3313 if (!VM_Version::supports_evex() ||
3314 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3315 return false;
3316 }
3317 break;
3318 case Op_ClearArray:
3319 case Op_VectorMaskGen:
3320 case Op_VectorCmpMasked:
3321 if (!VM_Version::supports_avx512bw()) {
3322 return false;
3323 }
3324 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3325 return false;
3326 }
3327 break;
3328 case Op_LoadVectorMasked:
3329 case Op_StoreVectorMasked:
3330 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3331 return false;
3332 }
3333 break;
3334 case Op_UMinV:
3335 case Op_UMaxV:
3336 if (UseAVX == 0) {
3337 return false;
3338 }
3339 break;
3340 case Op_UMinReductionV:
3341 case Op_UMaxReductionV:
3342 if (UseAVX == 0) {
3343 return false;
3344 }
3345 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3349 return false;
3350 }
3351 break;
3352 case Op_MaxV:
3353 case Op_MinV:
3354 if (UseSSE < 4 && is_integral_type(bt)) {
3355 return false;
3356 }
3357 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3358 // Float/Double intrinsics are enabled for AVX family currently.
3359 if (UseAVX == 0) {
3360 return false;
3361 }
3362 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3363 return false;
3364 }
3365 }
3366 break;
3367 case Op_CallLeafVector:
3368 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3369 return false;
3370 }
3371 break;
3372 case Op_AddReductionVI:
3373 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3374 return false;
3375 }
3376 // fallthrough
3377 case Op_AndReductionV:
3378 case Op_OrReductionV:
3379 case Op_XorReductionV:
3380 if (is_subword_type(bt) && (UseSSE < 4)) {
3381 return false;
3382 }
3383 break;
3384 case Op_MinReductionV:
3385 case Op_MaxReductionV:
3386 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3387 return false;
3388 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3389 return false;
3390 }
3391 // Float/Double intrinsics enabled for AVX family.
3392 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3393 return false;
3394 }
3395 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3396 return false;
3397 }
3398 break;
3399 case Op_VectorBlend:
3400 if (UseAVX == 0 && size_in_bits < 128) {
3401 return false;
3402 }
3403 break;
3404 case Op_VectorTest:
3405 if (UseSSE < 4) {
3406 return false; // Implementation limitation
3407 } else if (size_in_bits < 32) {
3408 return false; // Implementation limitation
3409 }
3410 break;
3411 case Op_VectorLoadShuffle:
3412 case Op_VectorRearrange:
3413 if(vlen == 2) {
3414 return false; // Implementation limitation due to how shuffle is loaded
3415 } else if (size_in_bits == 256 && UseAVX < 2) {
3416 return false; // Implementation limitation
3417 }
3418 break;
3419 case Op_VectorLoadMask:
3420 case Op_VectorMaskCast:
3421 if (size_in_bits == 256 && UseAVX < 2) {
3422 return false; // Implementation limitation
3423 }
3424 // fallthrough
3425 case Op_VectorStoreMask:
3426 if (vlen == 2) {
3427 return false; // Implementation limitation
3428 }
3429 break;
3430 case Op_PopulateIndex:
3431 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3432 return false;
3433 }
3434 break;
3435 case Op_VectorCastB2X:
3436 case Op_VectorCastS2X:
3437 case Op_VectorCastI2X:
3438 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3439 return false;
3440 }
3441 break;
3442 case Op_VectorCastL2X:
3443 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3444 return false;
3445 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3446 return false;
3447 }
3448 break;
3449 case Op_VectorCastF2X: {
3450 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3451 // happen after intermediate conversion to integer and special handling
3452 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3453 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3454 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3455 return false;
3456 }
3457 }
3458 // fallthrough
3459 case Op_VectorCastD2X:
3460 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3461 return false;
3462 }
3463 break;
3464 case Op_VectorCastF2HF:
3465 case Op_VectorCastHF2F:
3466 if (!VM_Version::supports_f16c() &&
3467 ((!VM_Version::supports_evex() ||
3468 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3469 return false;
3470 }
3471 break;
3472 case Op_RoundVD:
3473 if (!VM_Version::supports_avx512dq()) {
3474 return false;
3475 }
3476 break;
3477 case Op_MulReductionVI:
3478 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3479 return false;
3480 }
3481 break;
3482 case Op_LoadVectorGatherMasked:
3483 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3484 return false;
3485 }
3486 if (is_subword_type(bt) &&
3487 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3488 (size_in_bits < 64) ||
3489 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3490 return false;
3491 }
3492 break;
3493 case Op_StoreVectorScatterMasked:
3494 case Op_StoreVectorScatter:
3495 if (is_subword_type(bt)) {
3496 return false;
3497 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3498 return false;
3499 }
3500 // fallthrough
3501 case Op_LoadVectorGather:
3502 if (!is_subword_type(bt) && size_in_bits == 64) {
3503 return false;
3504 }
3505 if (is_subword_type(bt) && size_in_bits < 64) {
3506 return false;
3507 }
3508 break;
3509 case Op_SaturatingAddV:
3510 case Op_SaturatingSubV:
3511 if (UseAVX < 1) {
3512 return false; // Implementation limitation
3513 }
3514 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3515 return false;
3516 }
3517 break;
3518 case Op_SelectFromTwoVector:
3519 if (size_in_bits < 128) {
3520 return false;
3521 }
3522 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3523 return false;
3524 }
3525 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3526 return false;
3527 }
3528 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3529 return false;
3530 }
3531 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3532 return false;
3533 }
3534 break;
3535 case Op_MaskAll:
3536 if (!VM_Version::supports_evex()) {
3537 return false;
3538 }
3539 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3540 return false;
3541 }
3542 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3543 return false;
3544 }
3545 break;
3546 case Op_VectorMaskCmp:
3547 if (vlen < 2 || size_in_bits < 32) {
3548 return false;
3549 }
3550 break;
3551 case Op_CompressM:
3552 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3553 return false;
3554 }
3555 break;
3556 case Op_CompressV:
3557 case Op_ExpandV:
3558 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3559 return false;
3560 }
3561 if (size_in_bits < 128 ) {
3562 return false;
3563 }
3564 case Op_VectorLongToMask:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3569 return false;
3570 }
3571 break;
3572 case Op_SignumVD:
3573 case Op_SignumVF:
3574 if (UseAVX < 1) {
3575 return false;
3576 }
3577 break;
3578 case Op_PopCountVI:
3579 case Op_PopCountVL: {
3580 if (!is_pop_count_instr_target(bt) &&
3581 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3582 return false;
3583 }
3584 }
3585 break;
3586 case Op_ReverseV:
3587 case Op_ReverseBytesV:
3588 if (UseAVX < 2) {
3589 return false;
3590 }
3591 break;
3592 case Op_CountTrailingZerosV:
3593 case Op_CountLeadingZerosV:
3594 if (UseAVX < 2) {
3595 return false;
3596 }
3597 break;
3598 }
3599 return true; // Per default match rules are supported.
3600 }
3601
3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3603 // ADLC based match_rule_supported routine checks for the existence of pattern based
3604 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3605 // of their non-masked counterpart with mask edge being the differentiator.
3606 // This routine does a strict check on the existence of masked operation patterns
3607 // by returning a default false value for all the other opcodes apart from the
3608 // ones whose masked instruction patterns are defined in this file.
3609 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3610 return false;
3611 }
3612
3613 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3614 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3615 return false;
3616 }
3617 switch(opcode) {
3618 // Unary masked operations
3619 case Op_AbsVB:
3620 case Op_AbsVS:
3621 if(!VM_Version::supports_avx512bw()) {
3622 return false; // Implementation limitation
3623 }
3624 case Op_AbsVI:
3625 case Op_AbsVL:
3626 return true;
3627
3628 // Ternary masked operations
3629 case Op_FmaVF:
3630 case Op_FmaVD:
3631 return true;
3632
3633 case Op_MacroLogicV:
3634 if(bt != T_INT && bt != T_LONG) {
3635 return false;
3636 }
3637 return true;
3638
3639 // Binary masked operations
3640 case Op_AddVB:
3641 case Op_AddVS:
3642 case Op_SubVB:
3643 case Op_SubVS:
3644 case Op_MulVS:
3645 case Op_LShiftVS:
3646 case Op_RShiftVS:
3647 case Op_URShiftVS:
3648 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3649 if (!VM_Version::supports_avx512bw()) {
3650 return false; // Implementation limitation
3651 }
3652 return true;
3653
3654 case Op_MulVL:
3655 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3656 if (!VM_Version::supports_avx512dq()) {
3657 return false; // Implementation limitation
3658 }
3659 return true;
3660
3661 case Op_AndV:
3662 case Op_OrV:
3663 case Op_XorV:
3664 case Op_RotateRightV:
3665 case Op_RotateLeftV:
3666 if (bt != T_INT && bt != T_LONG) {
3667 return false; // Implementation limitation
3668 }
3669 return true;
3670
3671 case Op_VectorLoadMask:
3672 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3673 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3674 return false;
3675 }
3676 return true;
3677
3678 case Op_AddVI:
3679 case Op_AddVL:
3680 case Op_AddVF:
3681 case Op_AddVD:
3682 case Op_SubVI:
3683 case Op_SubVL:
3684 case Op_SubVF:
3685 case Op_SubVD:
3686 case Op_MulVI:
3687 case Op_MulVF:
3688 case Op_MulVD:
3689 case Op_DivVF:
3690 case Op_DivVD:
3691 case Op_SqrtVF:
3692 case Op_SqrtVD:
3693 case Op_LShiftVI:
3694 case Op_LShiftVL:
3695 case Op_RShiftVI:
3696 case Op_RShiftVL:
3697 case Op_URShiftVI:
3698 case Op_URShiftVL:
3699 case Op_LoadVectorMasked:
3700 case Op_StoreVectorMasked:
3701 case Op_LoadVectorGatherMasked:
3702 case Op_StoreVectorScatterMasked:
3703 return true;
3704
3705 case Op_UMinV:
3706 case Op_UMaxV:
3707 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3708 return false;
3709 } // fallthrough
3710 case Op_MaxV:
3711 case Op_MinV:
3712 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3713 return false; // Implementation limitation
3714 }
3715 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719 case Op_SaturatingAddV:
3720 case Op_SaturatingSubV:
3721 if (!is_subword_type(bt)) {
3722 return false;
3723 }
3724 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3725 return false; // Implementation limitation
3726 }
3727 return true;
3728
3729 case Op_VectorMaskCmp:
3730 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3731 return false; // Implementation limitation
3732 }
3733 return true;
3734
3735 case Op_VectorRearrange:
3736 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3737 return false; // Implementation limitation
3738 }
3739 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3740 return false; // Implementation limitation
3741 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 // Binary Logical operations
3747 case Op_AndVMask:
3748 case Op_OrVMask:
3749 case Op_XorVMask:
3750 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3751 return false; // Implementation limitation
3752 }
3753 return true;
3754
3755 case Op_PopCountVI:
3756 case Op_PopCountVL:
3757 if (!is_pop_count_instr_target(bt)) {
3758 return false;
3759 }
3760 return true;
3761
3762 case Op_MaskAll:
3763 return true;
3764
3765 case Op_CountLeadingZerosV:
3766 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3767 return true;
3768 }
3769 default:
3770 return false;
3771 }
3772 }
3773
3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3775 return false;
3776 }
3777
3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3780 switch (elem_bt) {
3781 case T_BYTE: return false;
3782 case T_SHORT: return !VM_Version::supports_avx512bw();
3783 case T_INT: return !VM_Version::supports_avx();
3784 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3785 default:
3786 ShouldNotReachHere();
3787 return false;
3788 }
3789 }
3790
3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3792 // Prefer predicate if the mask type is "TypeVectMask".
3793 return vt->isa_vectmask() != nullptr;
3794 }
3795
3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3797 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3798 bool legacy = (generic_opnd->opcode() == LEGVEC);
3799 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3800 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3801 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3802 return new legVecZOper();
3803 }
3804 if (legacy) {
3805 switch (ideal_reg) {
3806 case Op_VecS: return new legVecSOper();
3807 case Op_VecD: return new legVecDOper();
3808 case Op_VecX: return new legVecXOper();
3809 case Op_VecY: return new legVecYOper();
3810 case Op_VecZ: return new legVecZOper();
3811 }
3812 } else {
3813 switch (ideal_reg) {
3814 case Op_VecS: return new vecSOper();
3815 case Op_VecD: return new vecDOper();
3816 case Op_VecX: return new vecXOper();
3817 case Op_VecY: return new vecYOper();
3818 case Op_VecZ: return new vecZOper();
3819 }
3820 }
3821 ShouldNotReachHere();
3822 return nullptr;
3823 }
3824
3825 bool Matcher::is_reg2reg_move(MachNode* m) {
3826 switch (m->rule()) {
3827 case MoveVec2Leg_rule:
3828 case MoveLeg2Vec_rule:
3829 case MoveF2VL_rule:
3830 case MoveF2LEG_rule:
3831 case MoveVL2F_rule:
3832 case MoveLEG2F_rule:
3833 case MoveD2VL_rule:
3834 case MoveD2LEG_rule:
3835 case MoveVL2D_rule:
3836 case MoveLEG2D_rule:
3837 return true;
3838 default:
3839 return false;
3840 }
3841 }
3842
3843 bool Matcher::is_generic_vector(MachOper* opnd) {
3844 switch (opnd->opcode()) {
3845 case VEC:
3846 case LEGVEC:
3847 return true;
3848 default:
3849 return false;
3850 }
3851 }
3852
3853 //------------------------------------------------------------------------
3854
3855 const RegMask* Matcher::predicate_reg_mask(void) {
3856 return &_VECTMASK_REG_mask;
3857 }
3858
3859 // Max vector size in bytes. 0 if not supported.
3860 int Matcher::vector_width_in_bytes(BasicType bt) {
3861 assert(is_java_primitive(bt), "only primitive type vectors");
3862 // SSE2 supports 128bit vectors for all types.
3863 // AVX2 supports 256bit vectors for all types.
3864 // AVX2/EVEX supports 512bit vectors for all types.
3865 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3866 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3867 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3868 size = (UseAVX > 2) ? 64 : 32;
3869 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3870 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3871 // Use flag to limit vector size.
3872 size = MIN2(size,(int)MaxVectorSize);
3873 // Minimum 2 values in vector (or 4 for bytes).
3874 switch (bt) {
3875 case T_DOUBLE:
3876 case T_LONG:
3877 if (size < 16) return 0;
3878 break;
3879 case T_FLOAT:
3880 case T_INT:
3881 if (size < 8) return 0;
3882 break;
3883 case T_BOOLEAN:
3884 if (size < 4) return 0;
3885 break;
3886 case T_CHAR:
3887 if (size < 4) return 0;
3888 break;
3889 case T_BYTE:
3890 if (size < 4) return 0;
3891 break;
3892 case T_SHORT:
3893 if (size < 4) return 0;
3894 break;
3895 default:
3896 ShouldNotReachHere();
3897 }
3898 return size;
3899 }
3900
3901 // Limits on vector size (number of elements) loaded into vector.
3902 int Matcher::max_vector_size(const BasicType bt) {
3903 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3904 }
3905 int Matcher::min_vector_size(const BasicType bt) {
3906 int max_size = max_vector_size(bt);
3907 // Min size which can be loaded into vector is 4 bytes.
3908 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3909 // Support for calling svml double64 vectors
3910 if (bt == T_DOUBLE) {
3911 size = 1;
3912 }
3913 return MIN2(size,max_size);
3914 }
3915
3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3917 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3918 // by default on Cascade Lake
3919 if (VM_Version::is_default_intel_cascade_lake()) {
3920 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3921 }
3922 return Matcher::max_vector_size(bt);
3923 }
3924
3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3926 return -1;
3927 }
3928
3929 // Vector ideal reg corresponding to specified size in bytes
3930 uint Matcher::vector_ideal_reg(int size) {
3931 assert(MaxVectorSize >= size, "");
3932 switch(size) {
3933 case 4: return Op_VecS;
3934 case 8: return Op_VecD;
3935 case 16: return Op_VecX;
3936 case 32: return Op_VecY;
3937 case 64: return Op_VecZ;
3938 }
3939 ShouldNotReachHere();
3940 return 0;
3941 }
3942
3943 // Check for shift by small constant as well
3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3945 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3946 shift->in(2)->get_int() <= 3 &&
3947 // Are there other uses besides address expressions?
3948 !matcher->is_visited(shift)) {
3949 address_visited.set(shift->_idx); // Flag as address_visited
3950 mstack.push(shift->in(2), Matcher::Visit);
3951 Node *conv = shift->in(1);
3952 // Allow Matcher to match the rule which bypass
3953 // ConvI2L operation for an array index on LP64
3954 // if the index value is positive.
3955 if (conv->Opcode() == Op_ConvI2L &&
3956 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3957 // Are there other uses besides address expressions?
3958 !matcher->is_visited(conv)) {
3959 address_visited.set(conv->_idx); // Flag as address_visited
3960 mstack.push(conv->in(1), Matcher::Pre_Visit);
3961 } else {
3962 mstack.push(conv, Matcher::Pre_Visit);
3963 }
3964 return true;
3965 }
3966 return false;
3967 }
3968
3969 // This function identifies sub-graphs in which a 'load' node is
3970 // input to two different nodes, and such that it can be matched
3971 // with BMI instructions like blsi, blsr, etc.
3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3974 // refers to the same node.
3975 //
3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3977 // This is a temporary solution until we make DAGs expressible in ADL.
3978 template<typename ConType>
3979 class FusedPatternMatcher {
3980 Node* _op1_node;
3981 Node* _mop_node;
3982 int _con_op;
3983
3984 static int match_next(Node* n, int next_op, int next_op_idx) {
3985 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3986 return -1;
3987 }
3988
3989 if (next_op_idx == -1) { // n is commutative, try rotations
3990 if (n->in(1)->Opcode() == next_op) {
3991 return 1;
3992 } else if (n->in(2)->Opcode() == next_op) {
3993 return 2;
3994 }
3995 } else {
3996 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3997 if (n->in(next_op_idx)->Opcode() == next_op) {
3998 return next_op_idx;
3999 }
4000 }
4001 return -1;
4002 }
4003
4004 public:
4005 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4006 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4007
4008 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4009 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4010 typename ConType::NativeType con_value) {
4011 if (_op1_node->Opcode() != op1) {
4012 return false;
4013 }
4014 if (_mop_node->outcnt() > 2) {
4015 return false;
4016 }
4017 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4018 if (op1_op2_idx == -1) {
4019 return false;
4020 }
4021 // Memory operation must be the other edge
4022 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4023
4024 // Check that the mop node is really what we want
4025 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4026 Node* op2_node = _op1_node->in(op1_op2_idx);
4027 if (op2_node->outcnt() > 1) {
4028 return false;
4029 }
4030 assert(op2_node->Opcode() == op2, "Should be");
4031 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4032 if (op2_con_idx == -1) {
4033 return false;
4034 }
4035 // Memory operation must be the other edge
4036 int op2_mop_idx = (op2_con_idx & 1) + 1;
4037 // Check that the memory operation is the same node
4038 if (op2_node->in(op2_mop_idx) == _mop_node) {
4039 // Now check the constant
4040 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4041 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4042 return true;
4043 }
4044 }
4045 }
4046 return false;
4047 }
4048 };
4049
4050 static bool is_bmi_pattern(Node* n, Node* m) {
4051 assert(UseBMI1Instructions, "sanity");
4052 if (n != nullptr && m != nullptr) {
4053 if (m->Opcode() == Op_LoadI) {
4054 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4055 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4056 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4057 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4058 } else if (m->Opcode() == Op_LoadL) {
4059 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4060 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4061 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4062 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4063 }
4064 }
4065 return false;
4066 }
4067
4068 // Should the matcher clone input 'm' of node 'n'?
4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4070 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4071 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4072 mstack.push(m, Visit);
4073 return true;
4074 }
4075 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4076 mstack.push(m, Visit); // m = ShiftCntV
4077 return true;
4078 }
4079 if (is_encode_and_store_pattern(n, m)) {
4080 mstack.push(m, Visit);
4081 return true;
4082 }
4083 return false;
4084 }
4085
4086 // Should the Matcher clone shifts on addressing modes, expecting them
4087 // to be subsumed into complex addressing expressions or compute them
4088 // into registers?
4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4090 Node *off = m->in(AddPNode::Offset);
4091 if (off->is_Con()) {
4092 address_visited.test_set(m->_idx); // Flag as address_visited
4093 Node *adr = m->in(AddPNode::Address);
4094
4095 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4096 // AtomicAdd is not an addressing expression.
4097 // Cheap to find it by looking for screwy base.
4098 if (adr->is_AddP() &&
4099 !adr->in(AddPNode::Base)->is_top() &&
4100 !adr->in(AddPNode::Offset)->is_Con() &&
4101 off->get_long() == (int) (off->get_long()) && // immL32
4102 // Are there other uses besides address expressions?
4103 !is_visited(adr)) {
4104 address_visited.set(adr->_idx); // Flag as address_visited
4105 Node *shift = adr->in(AddPNode::Offset);
4106 if (!clone_shift(shift, this, mstack, address_visited)) {
4107 mstack.push(shift, Pre_Visit);
4108 }
4109 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4110 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4111 } else {
4112 mstack.push(adr, Pre_Visit);
4113 }
4114
4115 // Clone X+offset as it also folds into most addressing expressions
4116 mstack.push(off, Visit);
4117 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4118 return true;
4119 } else if (clone_shift(off, this, mstack, address_visited)) {
4120 address_visited.test_set(m->_idx); // Flag as address_visited
4121 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4122 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4123 return true;
4124 }
4125 return false;
4126 }
4127
4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4129 switch (bt) {
4130 case BoolTest::eq:
4131 return Assembler::eq;
4132 case BoolTest::ne:
4133 return Assembler::neq;
4134 case BoolTest::le:
4135 case BoolTest::ule:
4136 return Assembler::le;
4137 case BoolTest::ge:
4138 case BoolTest::uge:
4139 return Assembler::nlt;
4140 case BoolTest::lt:
4141 case BoolTest::ult:
4142 return Assembler::lt;
4143 case BoolTest::gt:
4144 case BoolTest::ugt:
4145 return Assembler::nle;
4146 default : ShouldNotReachHere(); return Assembler::_false;
4147 }
4148 }
4149
4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4151 switch (bt) {
4152 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4153 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4154 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4155 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4156 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4157 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4158 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4159 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4160 }
4161 }
4162
4163 // Helper methods for MachSpillCopyNode::implementation().
4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4165 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4166 assert(ireg == Op_VecS || // 32bit vector
4167 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4168 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4169 "no non-adjacent vector moves" );
4170 if (masm) {
4171 switch (ireg) {
4172 case Op_VecS: // copy whole register
4173 case Op_VecD:
4174 case Op_VecX:
4175 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4176 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4177 } else {
4178 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4179 }
4180 break;
4181 case Op_VecY:
4182 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4183 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4184 } else {
4185 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4186 }
4187 break;
4188 case Op_VecZ:
4189 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4190 break;
4191 default:
4192 ShouldNotReachHere();
4193 }
4194 #ifndef PRODUCT
4195 } else {
4196 switch (ireg) {
4197 case Op_VecS:
4198 case Op_VecD:
4199 case Op_VecX:
4200 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4201 break;
4202 case Op_VecY:
4203 case Op_VecZ:
4204 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4205 break;
4206 default:
4207 ShouldNotReachHere();
4208 }
4209 #endif
4210 }
4211 }
4212
4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4214 int stack_offset, int reg, uint ireg, outputStream* st) {
4215 if (masm) {
4216 if (is_load) {
4217 switch (ireg) {
4218 case Op_VecS:
4219 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecD:
4222 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4223 break;
4224 case Op_VecX:
4225 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4226 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4227 } else {
4228 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4229 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4230 }
4231 break;
4232 case Op_VecY:
4233 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4234 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4235 } else {
4236 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4237 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4238 }
4239 break;
4240 case Op_VecZ:
4241 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4242 break;
4243 default:
4244 ShouldNotReachHere();
4245 }
4246 } else { // store
4247 switch (ireg) {
4248 case Op_VecS:
4249 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecD:
4252 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4253 break;
4254 case Op_VecX:
4255 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4256 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4257 }
4258 else {
4259 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4260 }
4261 break;
4262 case Op_VecY:
4263 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4264 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4265 }
4266 else {
4267 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4268 }
4269 break;
4270 case Op_VecZ:
4271 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4272 break;
4273 default:
4274 ShouldNotReachHere();
4275 }
4276 }
4277 #ifndef PRODUCT
4278 } else {
4279 if (is_load) {
4280 switch (ireg) {
4281 case Op_VecS:
4282 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecD:
4285 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecX:
4288 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4289 break;
4290 case Op_VecY:
4291 case Op_VecZ:
4292 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4293 break;
4294 default:
4295 ShouldNotReachHere();
4296 }
4297 } else { // store
4298 switch (ireg) {
4299 case Op_VecS:
4300 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecD:
4303 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecX:
4306 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4307 break;
4308 case Op_VecY:
4309 case Op_VecZ:
4310 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4311 break;
4312 default:
4313 ShouldNotReachHere();
4314 }
4315 }
4316 #endif
4317 }
4318 }
4319
4320 template <class T>
4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4322 int size = type2aelembytes(bt) * len;
4323 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4324 for (int i = 0; i < len; i++) {
4325 int offset = i * type2aelembytes(bt);
4326 switch (bt) {
4327 case T_BYTE: val->at(i) = con; break;
4328 case T_SHORT: {
4329 jshort c = con;
4330 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4331 break;
4332 }
4333 case T_INT: {
4334 jint c = con;
4335 memcpy(val->adr_at(offset), &c, sizeof(jint));
4336 break;
4337 }
4338 case T_LONG: {
4339 jlong c = con;
4340 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4341 break;
4342 }
4343 case T_FLOAT: {
4344 jfloat c = con;
4345 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4346 break;
4347 }
4348 case T_DOUBLE: {
4349 jdouble c = con;
4350 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4351 break;
4352 }
4353 default: assert(false, "%s", type2name(bt));
4354 }
4355 }
4356 return val;
4357 }
4358
4359 static inline jlong high_bit_set(BasicType bt) {
4360 switch (bt) {
4361 case T_BYTE: return 0x8080808080808080;
4362 case T_SHORT: return 0x8000800080008000;
4363 case T_INT: return 0x8000000080000000;
4364 case T_LONG: return 0x8000000000000000;
4365 default:
4366 ShouldNotReachHere();
4367 return 0;
4368 }
4369 }
4370
4371 #ifndef PRODUCT
4372 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4373 st->print("nop \t# %d bytes pad for loops and calls", _count);
4374 }
4375 #endif
4376
4377 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4378 __ nop(_count);
4379 }
4380
4381 uint MachNopNode::size(PhaseRegAlloc*) const {
4382 return _count;
4383 }
4384
4385 #ifndef PRODUCT
4386 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4387 st->print("# breakpoint");
4388 }
4389 #endif
4390
4391 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4392 __ int3();
4393 }
4394
4395 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4396 return MachNode::size(ra_);
4397 }
4398
4399 %}
4400
4401 //----------ENCODING BLOCK-----------------------------------------------------
4402 // This block specifies the encoding classes used by the compiler to
4403 // output byte streams. Encoding classes are parameterized macros
4404 // used by Machine Instruction Nodes in order to generate the bit
4405 // encoding of the instruction. Operands specify their base encoding
4406 // interface with the interface keyword. There are currently
4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4408 // COND_INTER. REG_INTER causes an operand to generate a function
4409 // which returns its register number when queried. CONST_INTER causes
4410 // an operand to generate a function which returns the value of the
4411 // constant when queried. MEMORY_INTER causes an operand to generate
4412 // four functions which return the Base Register, the Index Register,
4413 // the Scale Value, and the Offset Value of the operand when queried.
4414 // COND_INTER causes an operand to generate six functions which return
4415 // the encoding code (ie - encoding bits for the instruction)
4416 // associated with each basic boolean condition for a conditional
4417 // instruction.
4418 //
4419 // Instructions specify two basic values for encoding. Again, a
4420 // function is available to check if the constant displacement is an
4421 // oop. They use the ins_encode keyword to specify their encoding
4422 // classes (which must be a sequence of enc_class names, and their
4423 // parameters, specified in the encoding block), and they use the
4424 // opcode keyword to specify, in order, their primary, secondary, and
4425 // tertiary opcode. Only the opcode sections which a particular
4426 // instruction needs for encoding need to be specified.
4427 encode %{
4428 enc_class cdql_enc(no_rax_rdx_RegI div)
4429 %{
4430 // Full implementation of Java idiv and irem; checks for
4431 // special case as described in JVM spec., p.243 & p.271.
4432 //
4433 // normal case special case
4434 //
4435 // input : rax: dividend min_int
4436 // reg: divisor -1
4437 //
4438 // output: rax: quotient (= rax idiv reg) min_int
4439 // rdx: remainder (= rax irem reg) 0
4440 //
4441 // Code sequnce:
4442 //
4443 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4444 // 5: 75 07/08 jne e <normal>
4445 // 7: 33 d2 xor %edx,%edx
4446 // [div >= 8 -> offset + 1]
4447 // [REX_B]
4448 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4449 // c: 74 03/04 je 11 <done>
4450 // 000000000000000e <normal>:
4451 // e: 99 cltd
4452 // [div >= 8 -> offset + 1]
4453 // [REX_B]
4454 // f: f7 f9 idiv $div
4455 // 0000000000000011 <done>:
4456 Label normal;
4457 Label done;
4458
4459 // cmp $0x80000000,%eax
4460 __ cmpl(as_Register(RAX_enc), 0x80000000);
4461
4462 // jne e <normal>
4463 __ jccb(Assembler::notEqual, normal);
4464
4465 // xor %edx,%edx
4466 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4467
4468 // cmp $0xffffffffffffffff,%ecx
4469 __ cmpl($div$$Register, -1);
4470
4471 // je 11 <done>
4472 __ jccb(Assembler::equal, done);
4473
4474 // <normal>
4475 // cltd
4476 __ bind(normal);
4477 __ cdql();
4478
4479 // idivl
4480 // <done>
4481 __ idivl($div$$Register);
4482 __ bind(done);
4483 %}
4484
4485 enc_class cdqq_enc(no_rax_rdx_RegL div)
4486 %{
4487 // Full implementation of Java ldiv and lrem; checks for
4488 // special case as described in JVM spec., p.243 & p.271.
4489 //
4490 // normal case special case
4491 //
4492 // input : rax: dividend min_long
4493 // reg: divisor -1
4494 //
4495 // output: rax: quotient (= rax idiv reg) min_long
4496 // rdx: remainder (= rax irem reg) 0
4497 //
4498 // Code sequnce:
4499 //
4500 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4501 // 7: 00 00 80
4502 // a: 48 39 d0 cmp %rdx,%rax
4503 // d: 75 08 jne 17 <normal>
4504 // f: 33 d2 xor %edx,%edx
4505 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4506 // 15: 74 05 je 1c <done>
4507 // 0000000000000017 <normal>:
4508 // 17: 48 99 cqto
4509 // 19: 48 f7 f9 idiv $div
4510 // 000000000000001c <done>:
4511 Label normal;
4512 Label done;
4513
4514 // mov $0x8000000000000000,%rdx
4515 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4516
4517 // cmp %rdx,%rax
4518 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4519
4520 // jne 17 <normal>
4521 __ jccb(Assembler::notEqual, normal);
4522
4523 // xor %edx,%edx
4524 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4525
4526 // cmp $0xffffffffffffffff,$div
4527 __ cmpq($div$$Register, -1);
4528
4529 // je 1e <done>
4530 __ jccb(Assembler::equal, done);
4531
4532 // <normal>
4533 // cqto
4534 __ bind(normal);
4535 __ cdqq();
4536
4537 // idivq (note: must be emitted by the user of this rule)
4538 // <done>
4539 __ idivq($div$$Register);
4540 __ bind(done);
4541 %}
4542
4543 enc_class clear_avx %{
4544 DEBUG_ONLY(int off0 = __ offset());
4545 if (generate_vzeroupper(Compile::current())) {
4546 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4547 // Clear upper bits of YMM registers when current compiled code uses
4548 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4549 __ vzeroupper();
4550 }
4551 DEBUG_ONLY(int off1 = __ offset());
4552 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4553 %}
4554
4555 enc_class Java_To_Runtime(method meth) %{
4556 __ lea(r10, RuntimeAddress((address)$meth$$method));
4557 __ call(r10);
4558 __ post_call_nop();
4559 %}
4560
4561 enc_class Java_Static_Call(method meth)
4562 %{
4563 // JAVA STATIC CALL
4564 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4565 // determine who we intended to call.
4566 if (!_method) {
4567 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4568 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4569 // The NOP here is purely to ensure that eliding a call to
4570 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4571 __ addr_nop_5();
4572 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4573 } else {
4574 int method_index = resolved_method_index(masm);
4575 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4576 : static_call_Relocation::spec(method_index);
4577 address mark = __ pc();
4578 int call_offset = __ offset();
4579 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4580 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4581 // Calls of the same statically bound method can share
4582 // a stub to the interpreter.
4583 __ code()->shared_stub_to_interp_for(_method, call_offset);
4584 } else {
4585 // Emit stubs for static call.
4586 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4587 __ clear_inst_mark();
4588 if (stub == nullptr) {
4589 ciEnv::current()->record_failure("CodeCache is full");
4590 return;
4591 }
4592 }
4593 }
4594 __ post_call_nop();
4595 %}
4596
4597 enc_class Java_Dynamic_Call(method meth) %{
4598 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4599 __ post_call_nop();
4600 %}
4601
4602 enc_class call_epilog %{
4603 if (VerifyStackAtCalls) {
4604 // Check that stack depth is unchanged: find majik cookie on stack
4605 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4606 Label L;
4607 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4608 __ jccb(Assembler::equal, L);
4609 // Die if stack mismatch
4610 __ int3();
4611 __ bind(L);
4612 }
4613 %}
4614
4615 %}
4616
4617 //----------FRAME--------------------------------------------------------------
4618 // Definition of frame structure and management information.
4619 //
4620 // S T A C K L A Y O U T Allocators stack-slot number
4621 // | (to get allocators register number
4622 // G Owned by | | v add OptoReg::stack0())
4623 // r CALLER | |
4624 // o | +--------+ pad to even-align allocators stack-slot
4625 // w V | pad0 | numbers; owned by CALLER
4626 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4627 // h ^ | in | 5
4628 // | | args | 4 Holes in incoming args owned by SELF
4629 // | | | | 3
4630 // | | +--------+
4631 // V | | old out| Empty on Intel, window on Sparc
4632 // | old |preserve| Must be even aligned.
4633 // | SP-+--------+----> Matcher::_old_SP, even aligned
4634 // | | in | 3 area for Intel ret address
4635 // Owned by |preserve| Empty on Sparc.
4636 // SELF +--------+
4637 // | | pad2 | 2 pad to align old SP
4638 // | +--------+ 1
4639 // | | locks | 0
4640 // | +--------+----> OptoReg::stack0(), even aligned
4641 // | | pad1 | 11 pad to align new SP
4642 // | +--------+
4643 // | | | 10
4644 // | | spills | 9 spills
4645 // V | | 8 (pad0 slot for callee)
4646 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4647 // ^ | out | 7
4648 // | | args | 6 Holes in outgoing args owned by CALLEE
4649 // Owned by +--------+
4650 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4651 // | new |preserve| Must be even-aligned.
4652 // | SP-+--------+----> Matcher::_new_SP, even aligned
4653 // | | |
4654 //
4655 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4656 // known from SELF's arguments and the Java calling convention.
4657 // Region 6-7 is determined per call site.
4658 // Note 2: If the calling convention leaves holes in the incoming argument
4659 // area, those holes are owned by SELF. Holes in the outgoing area
4660 // are owned by the CALLEE. Holes should not be necessary in the
4661 // incoming area, as the Java calling convention is completely under
4662 // the control of the AD file. Doubles can be sorted and packed to
4663 // avoid holes. Holes in the outgoing arguments may be necessary for
4664 // varargs C calling conventions.
4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4666 // even aligned with pad0 as needed.
4667 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4668 // region 6-11 is even aligned; it may be padded out more so that
4669 // the region from SP to FP meets the minimum stack alignment.
4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4671 // alignment. Region 11, pad1, may be dynamically extended so that
4672 // SP meets the minimum alignment.
4673
4674 frame
4675 %{
4676 // These three registers define part of the calling convention
4677 // between compiled code and the interpreter.
4678 inline_cache_reg(RAX); // Inline Cache Register
4679
4680 // Optional: name the operand used by cisc-spilling to access
4681 // [stack_pointer + offset]
4682 cisc_spilling_operand_name(indOffset32);
4683
4684 // Number of stack slots consumed by locking an object
4685 sync_stack_slots(2);
4686
4687 // Compiled code's Frame Pointer
4688 frame_pointer(RSP);
4689
4690 // Stack alignment requirement
4691 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4692
4693 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4694 // for calls to C. Supports the var-args backing area for register parms.
4695 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4696
4697 // The after-PROLOG location of the return address. Location of
4698 // return address specifies a type (REG or STACK) and a number
4699 // representing the register number (i.e. - use a register name) or
4700 // stack slot.
4701 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4702 // Otherwise, it is above the locks and verification slot and alignment word
4703 return_addr(STACK - 2 +
4704 align_up((Compile::current()->in_preserve_stack_slots() +
4705 Compile::current()->fixed_slots()),
4706 stack_alignment_in_slots()));
4707
4708 // Location of compiled Java return values. Same as C for now.
4709 return_value
4710 %{
4711 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4712 "only return normal values");
4713
4714 static const int lo[Op_RegL + 1] = {
4715 0,
4716 0,
4717 RAX_num, // Op_RegN
4718 RAX_num, // Op_RegI
4719 RAX_num, // Op_RegP
4720 XMM0_num, // Op_RegF
4721 XMM0_num, // Op_RegD
4722 RAX_num // Op_RegL
4723 };
4724 static const int hi[Op_RegL + 1] = {
4725 0,
4726 0,
4727 OptoReg::Bad, // Op_RegN
4728 OptoReg::Bad, // Op_RegI
4729 RAX_H_num, // Op_RegP
4730 OptoReg::Bad, // Op_RegF
4731 XMM0b_num, // Op_RegD
4732 RAX_H_num // Op_RegL
4733 };
4734 // Excluded flags and vector registers.
4735 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4736 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4737 %}
4738 %}
4739
4740 //----------ATTRIBUTES---------------------------------------------------------
4741 //----------Operand Attributes-------------------------------------------------
4742 op_attrib op_cost(0); // Required cost attribute
4743
4744 //----------Instruction Attributes---------------------------------------------
4745 ins_attrib ins_cost(100); // Required cost attribute
4746 ins_attrib ins_size(8); // Required size attribute (in bits)
4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4748 // a non-matching short branch variant
4749 // of some long branch?
4750 ins_attrib ins_alignment(1); // Required alignment attribute (must
4751 // be a power of 2) specifies the
4752 // alignment that some part of the
4753 // instruction (not necessarily the
4754 // start) requires. If > 1, a
4755 // compute_padding() function must be
4756 // provided for the instruction
4757
4758 // Whether this node is expanded during code emission into a sequence of
4759 // instructions and the first instruction can perform an implicit null check.
4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4761
4762 //----------OPERANDS-----------------------------------------------------------
4763 // Operand definitions must precede instruction definitions for correct parsing
4764 // in the ADLC because operands constitute user defined types which are used in
4765 // instruction definitions.
4766
4767 //----------Simple Operands----------------------------------------------------
4768 // Immediate Operands
4769 // Integer Immediate
4770 operand immI()
4771 %{
4772 match(ConI);
4773
4774 op_cost(10);
4775 format %{ %}
4776 interface(CONST_INTER);
4777 %}
4778
4779 // Constant for test vs zero
4780 operand immI_0()
4781 %{
4782 predicate(n->get_int() == 0);
4783 match(ConI);
4784
4785 op_cost(0);
4786 format %{ %}
4787 interface(CONST_INTER);
4788 %}
4789
4790 // Constant for increment
4791 operand immI_1()
4792 %{
4793 predicate(n->get_int() == 1);
4794 match(ConI);
4795
4796 op_cost(0);
4797 format %{ %}
4798 interface(CONST_INTER);
4799 %}
4800
4801 // Constant for decrement
4802 operand immI_M1()
4803 %{
4804 predicate(n->get_int() == -1);
4805 match(ConI);
4806
4807 op_cost(0);
4808 format %{ %}
4809 interface(CONST_INTER);
4810 %}
4811
4812 operand immI_2()
4813 %{
4814 predicate(n->get_int() == 2);
4815 match(ConI);
4816
4817 op_cost(0);
4818 format %{ %}
4819 interface(CONST_INTER);
4820 %}
4821
4822 operand immI_4()
4823 %{
4824 predicate(n->get_int() == 4);
4825 match(ConI);
4826
4827 op_cost(0);
4828 format %{ %}
4829 interface(CONST_INTER);
4830 %}
4831
4832 operand immI_8()
4833 %{
4834 predicate(n->get_int() == 8);
4835 match(ConI);
4836
4837 op_cost(0);
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 // Valid scale values for addressing modes
4843 operand immI2()
4844 %{
4845 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4846 match(ConI);
4847
4848 format %{ %}
4849 interface(CONST_INTER);
4850 %}
4851
4852 operand immU7()
4853 %{
4854 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4855 match(ConI);
4856
4857 op_cost(5);
4858 format %{ %}
4859 interface(CONST_INTER);
4860 %}
4861
4862 operand immI8()
4863 %{
4864 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4865 match(ConI);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 operand immU8()
4873 %{
4874 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4875 match(ConI);
4876
4877 op_cost(5);
4878 format %{ %}
4879 interface(CONST_INTER);
4880 %}
4881
4882 operand immI16()
4883 %{
4884 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4885 match(ConI);
4886
4887 op_cost(10);
4888 format %{ %}
4889 interface(CONST_INTER);
4890 %}
4891
4892 // Int Immediate non-negative
4893 operand immU31()
4894 %{
4895 predicate(n->get_int() >= 0);
4896 match(ConI);
4897
4898 op_cost(0);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 // Pointer Immediate
4904 operand immP()
4905 %{
4906 match(ConP);
4907
4908 op_cost(10);
4909 format %{ %}
4910 interface(CONST_INTER);
4911 %}
4912
4913 // Null Pointer Immediate
4914 operand immP0()
4915 %{
4916 predicate(n->get_ptr() == 0);
4917 match(ConP);
4918
4919 op_cost(5);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 // Pointer Immediate
4925 operand immN() %{
4926 match(ConN);
4927
4928 op_cost(10);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 operand immNKlass() %{
4934 match(ConNKlass);
4935
4936 op_cost(10);
4937 format %{ %}
4938 interface(CONST_INTER);
4939 %}
4940
4941 // Null Pointer Immediate
4942 operand immN0() %{
4943 predicate(n->get_narrowcon() == 0);
4944 match(ConN);
4945
4946 op_cost(5);
4947 format %{ %}
4948 interface(CONST_INTER);
4949 %}
4950
4951 operand immP31()
4952 %{
4953 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4954 && (n->get_ptr() >> 31) == 0);
4955 match(ConP);
4956
4957 op_cost(5);
4958 format %{ %}
4959 interface(CONST_INTER);
4960 %}
4961
4962
4963 // Long Immediate
4964 operand immL()
4965 %{
4966 match(ConL);
4967
4968 op_cost(20);
4969 format %{ %}
4970 interface(CONST_INTER);
4971 %}
4972
4973 // Long Immediate 8-bit
4974 operand immL8()
4975 %{
4976 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4977 match(ConL);
4978
4979 op_cost(5);
4980 format %{ %}
4981 interface(CONST_INTER);
4982 %}
4983
4984 // Long Immediate 32-bit unsigned
4985 operand immUL32()
4986 %{
4987 predicate(n->get_long() == (unsigned int) (n->get_long()));
4988 match(ConL);
4989
4990 op_cost(10);
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 // Long Immediate 32-bit signed
4996 operand immL32()
4997 %{
4998 predicate(n->get_long() == (int) (n->get_long()));
4999 match(ConL);
5000
5001 op_cost(15);
5002 format %{ %}
5003 interface(CONST_INTER);
5004 %}
5005
5006 operand immL_Pow2()
5007 %{
5008 predicate(is_power_of_2((julong)n->get_long()));
5009 match(ConL);
5010
5011 op_cost(15);
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 operand immL_NotPow2()
5017 %{
5018 predicate(is_power_of_2((julong)~n->get_long()));
5019 match(ConL);
5020
5021 op_cost(15);
5022 format %{ %}
5023 interface(CONST_INTER);
5024 %}
5025
5026 // Long Immediate zero
5027 operand immL0()
5028 %{
5029 predicate(n->get_long() == 0L);
5030 match(ConL);
5031
5032 op_cost(10);
5033 format %{ %}
5034 interface(CONST_INTER);
5035 %}
5036
5037 // Constant for increment
5038 operand immL1()
5039 %{
5040 predicate(n->get_long() == 1);
5041 match(ConL);
5042
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 // Constant for decrement
5048 operand immL_M1()
5049 %{
5050 predicate(n->get_long() == -1);
5051 match(ConL);
5052
5053 format %{ %}
5054 interface(CONST_INTER);
5055 %}
5056
5057 // Long Immediate: low 32-bit mask
5058 operand immL_32bits()
5059 %{
5060 predicate(n->get_long() == 0xFFFFFFFFL);
5061 match(ConL);
5062 op_cost(20);
5063
5064 format %{ %}
5065 interface(CONST_INTER);
5066 %}
5067
5068 // Int Immediate: 2^n-1, positive
5069 operand immI_Pow2M1()
5070 %{
5071 predicate((n->get_int() > 0)
5072 && is_power_of_2((juint)n->get_int() + 1));
5073 match(ConI);
5074
5075 op_cost(20);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Float Immediate zero
5081 operand immF0()
5082 %{
5083 predicate(jint_cast(n->getf()) == 0);
5084 match(ConF);
5085
5086 op_cost(5);
5087 format %{ %}
5088 interface(CONST_INTER);
5089 %}
5090
5091 // Float Immediate
5092 operand immF()
5093 %{
5094 match(ConF);
5095
5096 op_cost(15);
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Half Float Immediate
5102 operand immH()
5103 %{
5104 match(ConH);
5105
5106 op_cost(15);
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Double Immediate zero
5112 operand immD0()
5113 %{
5114 predicate(jlong_cast(n->getd()) == 0);
5115 match(ConD);
5116
5117 op_cost(5);
5118 format %{ %}
5119 interface(CONST_INTER);
5120 %}
5121
5122 // Double Immediate
5123 operand immD()
5124 %{
5125 match(ConD);
5126
5127 op_cost(15);
5128 format %{ %}
5129 interface(CONST_INTER);
5130 %}
5131
5132 // Immediates for special shifts (sign extend)
5133
5134 // Constants for increment
5135 operand immI_16()
5136 %{
5137 predicate(n->get_int() == 16);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 operand immI_24()
5145 %{
5146 predicate(n->get_int() == 24);
5147 match(ConI);
5148
5149 format %{ %}
5150 interface(CONST_INTER);
5151 %}
5152
5153 // Constant for byte-wide masking
5154 operand immI_255()
5155 %{
5156 predicate(n->get_int() == 255);
5157 match(ConI);
5158
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Constant for short-wide masking
5164 operand immI_65535()
5165 %{
5166 predicate(n->get_int() == 65535);
5167 match(ConI);
5168
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Constant for byte-wide masking
5174 operand immL_255()
5175 %{
5176 predicate(n->get_long() == 255);
5177 match(ConL);
5178
5179 format %{ %}
5180 interface(CONST_INTER);
5181 %}
5182
5183 // Constant for short-wide masking
5184 operand immL_65535()
5185 %{
5186 predicate(n->get_long() == 65535);
5187 match(ConL);
5188
5189 format %{ %}
5190 interface(CONST_INTER);
5191 %}
5192
5193 // AOT Runtime Constants Address
5194 operand immAOTRuntimeConstantsAddress()
5195 %{
5196 // Check if the address is in the range of AOT Runtime Constants
5197 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5198 match(ConP);
5199
5200 op_cost(0);
5201 format %{ %}
5202 interface(CONST_INTER);
5203 %}
5204
5205 operand kReg()
5206 %{
5207 constraint(ALLOC_IN_RC(vectmask_reg));
5208 match(RegVectMask);
5209 format %{%}
5210 interface(REG_INTER);
5211 %}
5212
5213 // Register Operands
5214 // Integer Register
5215 operand rRegI()
5216 %{
5217 constraint(ALLOC_IN_RC(int_reg));
5218 match(RegI);
5219
5220 match(rax_RegI);
5221 match(rbx_RegI);
5222 match(rcx_RegI);
5223 match(rdx_RegI);
5224 match(rdi_RegI);
5225
5226 format %{ %}
5227 interface(REG_INTER);
5228 %}
5229
5230 // Special Registers
5231 operand rax_RegI()
5232 %{
5233 constraint(ALLOC_IN_RC(int_rax_reg));
5234 match(RegI);
5235 match(rRegI);
5236
5237 format %{ "RAX" %}
5238 interface(REG_INTER);
5239 %}
5240
5241 // Special Registers
5242 operand rbx_RegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_rbx_reg));
5245 match(RegI);
5246 match(rRegI);
5247
5248 format %{ "RBX" %}
5249 interface(REG_INTER);
5250 %}
5251
5252 operand rcx_RegI()
5253 %{
5254 constraint(ALLOC_IN_RC(int_rcx_reg));
5255 match(RegI);
5256 match(rRegI);
5257
5258 format %{ "RCX" %}
5259 interface(REG_INTER);
5260 %}
5261
5262 operand rdx_RegI()
5263 %{
5264 constraint(ALLOC_IN_RC(int_rdx_reg));
5265 match(RegI);
5266 match(rRegI);
5267
5268 format %{ "RDX" %}
5269 interface(REG_INTER);
5270 %}
5271
5272 operand rdi_RegI()
5273 %{
5274 constraint(ALLOC_IN_RC(int_rdi_reg));
5275 match(RegI);
5276 match(rRegI);
5277
5278 format %{ "RDI" %}
5279 interface(REG_INTER);
5280 %}
5281
5282 operand no_rax_rdx_RegI()
5283 %{
5284 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5285 match(RegI);
5286 match(rbx_RegI);
5287 match(rcx_RegI);
5288 match(rdi_RegI);
5289
5290 format %{ %}
5291 interface(REG_INTER);
5292 %}
5293
5294 operand no_rbp_r13_RegI()
5295 %{
5296 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5297 match(RegI);
5298 match(rRegI);
5299 match(rax_RegI);
5300 match(rbx_RegI);
5301 match(rcx_RegI);
5302 match(rdx_RegI);
5303 match(rdi_RegI);
5304
5305 format %{ %}
5306 interface(REG_INTER);
5307 %}
5308
5309 // Pointer Register
5310 operand any_RegP()
5311 %{
5312 constraint(ALLOC_IN_RC(any_reg));
5313 match(RegP);
5314 match(rax_RegP);
5315 match(rbx_RegP);
5316 match(rdi_RegP);
5317 match(rsi_RegP);
5318 match(rbp_RegP);
5319 match(r15_RegP);
5320 match(rRegP);
5321
5322 format %{ %}
5323 interface(REG_INTER);
5324 %}
5325
5326 operand rRegP()
5327 %{
5328 constraint(ALLOC_IN_RC(ptr_reg));
5329 match(RegP);
5330 match(rax_RegP);
5331 match(rbx_RegP);
5332 match(rdi_RegP);
5333 match(rsi_RegP);
5334 match(rbp_RegP); // See Q&A below about
5335 match(r15_RegP); // r15_RegP and rbp_RegP.
5336
5337 format %{ %}
5338 interface(REG_INTER);
5339 %}
5340
5341 operand rRegN() %{
5342 constraint(ALLOC_IN_RC(int_reg));
5343 match(RegN);
5344
5345 format %{ %}
5346 interface(REG_INTER);
5347 %}
5348
5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5352 // The output of an instruction is controlled by the allocator, which respects
5353 // register class masks, not match rules. Unless an instruction mentions
5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5355 // by the allocator as an input.
5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5358 // result, RBP is not included in the output of the instruction either.
5359
5360 // This operand is not allowed to use RBP even if
5361 // RBP is not used to hold the frame pointer.
5362 operand no_rbp_RegP()
5363 %{
5364 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5365 match(RegP);
5366 match(rbx_RegP);
5367 match(rsi_RegP);
5368 match(rdi_RegP);
5369
5370 format %{ %}
5371 interface(REG_INTER);
5372 %}
5373
5374 // Special Registers
5375 // Return a pointer value
5376 operand rax_RegP()
5377 %{
5378 constraint(ALLOC_IN_RC(ptr_rax_reg));
5379 match(RegP);
5380 match(rRegP);
5381
5382 format %{ %}
5383 interface(REG_INTER);
5384 %}
5385
5386 // Special Registers
5387 // Return a compressed pointer value
5388 operand rax_RegN()
5389 %{
5390 constraint(ALLOC_IN_RC(int_rax_reg));
5391 match(RegN);
5392 match(rRegN);
5393
5394 format %{ %}
5395 interface(REG_INTER);
5396 %}
5397
5398 // Used in AtomicAdd
5399 operand rbx_RegP()
5400 %{
5401 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5402 match(RegP);
5403 match(rRegP);
5404
5405 format %{ %}
5406 interface(REG_INTER);
5407 %}
5408
5409 operand rsi_RegP()
5410 %{
5411 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5412 match(RegP);
5413 match(rRegP);
5414
5415 format %{ %}
5416 interface(REG_INTER);
5417 %}
5418
5419 operand rbp_RegP()
5420 %{
5421 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5422 match(RegP);
5423 match(rRegP);
5424
5425 format %{ %}
5426 interface(REG_INTER);
5427 %}
5428
5429 // Used in rep stosq
5430 operand rdi_RegP()
5431 %{
5432 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5433 match(RegP);
5434 match(rRegP);
5435
5436 format %{ %}
5437 interface(REG_INTER);
5438 %}
5439
5440 operand r15_RegP()
5441 %{
5442 constraint(ALLOC_IN_RC(ptr_r15_reg));
5443 match(RegP);
5444 match(rRegP);
5445
5446 format %{ %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand rRegL()
5451 %{
5452 constraint(ALLOC_IN_RC(long_reg));
5453 match(RegL);
5454 match(rax_RegL);
5455 match(rdx_RegL);
5456
5457 format %{ %}
5458 interface(REG_INTER);
5459 %}
5460
5461 // Special Registers
5462 operand no_rax_rdx_RegL()
5463 %{
5464 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5465 match(RegL);
5466 match(rRegL);
5467
5468 format %{ %}
5469 interface(REG_INTER);
5470 %}
5471
5472 operand rax_RegL()
5473 %{
5474 constraint(ALLOC_IN_RC(long_rax_reg));
5475 match(RegL);
5476 match(rRegL);
5477
5478 format %{ "RAX" %}
5479 interface(REG_INTER);
5480 %}
5481
5482 operand rcx_RegL()
5483 %{
5484 constraint(ALLOC_IN_RC(long_rcx_reg));
5485 match(RegL);
5486 match(rRegL);
5487
5488 format %{ %}
5489 interface(REG_INTER);
5490 %}
5491
5492 operand rdx_RegL()
5493 %{
5494 constraint(ALLOC_IN_RC(long_rdx_reg));
5495 match(RegL);
5496 match(rRegL);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 operand r11_RegL()
5503 %{
5504 constraint(ALLOC_IN_RC(long_r11_reg));
5505 match(RegL);
5506 match(rRegL);
5507
5508 format %{ %}
5509 interface(REG_INTER);
5510 %}
5511
5512 operand no_rbp_r13_RegL()
5513 %{
5514 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5515 match(RegL);
5516 match(rRegL);
5517 match(rax_RegL);
5518 match(rcx_RegL);
5519 match(rdx_RegL);
5520
5521 format %{ %}
5522 interface(REG_INTER);
5523 %}
5524
5525 // Flags register, used as output of compare instructions
5526 operand rFlagsReg()
5527 %{
5528 constraint(ALLOC_IN_RC(int_flags));
5529 match(RegFlags);
5530
5531 format %{ "RFLAGS" %}
5532 interface(REG_INTER);
5533 %}
5534
5535 // Flags register, used as output of FLOATING POINT compare instructions
5536 operand rFlagsRegU()
5537 %{
5538 constraint(ALLOC_IN_RC(int_flags));
5539 match(RegFlags);
5540
5541 format %{ "RFLAGS_U" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCF() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CF" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 operand rFlagsRegUCFE() %{
5555 constraint(ALLOC_IN_RC(int_flags));
5556 match(RegFlags);
5557 predicate(UseAPX && VM_Version::supports_avx10_2());
5558
5559 format %{ "RFLAGS_U_CFE" %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand regF() %{
5565 constraint(ALLOC_IN_RC(float_reg));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand legRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_legacy));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Float register operands
5582 operand vlRegF() %{
5583 constraint(ALLOC_IN_RC(float_reg_vl));
5584 match(RegF);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand regD() %{
5592 constraint(ALLOC_IN_RC(double_reg));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand legRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_legacy));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Double register operands
5609 operand vlRegD() %{
5610 constraint(ALLOC_IN_RC(double_reg_vl));
5611 match(RegD);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 //----------Memory Operands----------------------------------------------------
5618 // Direct Memory Operand
5619 // operand direct(immP addr)
5620 // %{
5621 // match(addr);
5622
5623 // format %{ "[$addr]" %}
5624 // interface(MEMORY_INTER) %{
5625 // base(0xFFFFFFFF);
5626 // index(0x4);
5627 // scale(0x0);
5628 // disp($addr);
5629 // %}
5630 // %}
5631
5632 // Indirect Memory Operand
5633 operand indirect(any_RegP reg)
5634 %{
5635 constraint(ALLOC_IN_RC(ptr_reg));
5636 match(reg);
5637
5638 format %{ "[$reg]" %}
5639 interface(MEMORY_INTER) %{
5640 base($reg);
5641 index(0x4);
5642 scale(0x0);
5643 disp(0x0);
5644 %}
5645 %}
5646
5647 // Indirect Memory Plus Short Offset Operand
5648 operand indOffset8(any_RegP reg, immL8 off)
5649 %{
5650 constraint(ALLOC_IN_RC(ptr_reg));
5651 match(AddP reg off);
5652
5653 format %{ "[$reg + $off (8-bit)]" %}
5654 interface(MEMORY_INTER) %{
5655 base($reg);
5656 index(0x4);
5657 scale(0x0);
5658 disp($off);
5659 %}
5660 %}
5661
5662 // Indirect Memory Plus Long Offset Operand
5663 operand indOffset32(any_RegP reg, immL32 off)
5664 %{
5665 constraint(ALLOC_IN_RC(ptr_reg));
5666 match(AddP reg off);
5667
5668 format %{ "[$reg + $off (32-bit)]" %}
5669 interface(MEMORY_INTER) %{
5670 base($reg);
5671 index(0x4);
5672 scale(0x0);
5673 disp($off);
5674 %}
5675 %}
5676
5677 // Indirect Memory Plus Index Register Plus Offset Operand
5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 match(AddP (AddP reg lreg) off);
5682
5683 op_cost(10);
5684 format %{"[$reg + $off + $lreg]" %}
5685 interface(MEMORY_INTER) %{
5686 base($reg);
5687 index($lreg);
5688 scale(0x0);
5689 disp($off);
5690 %}
5691 %}
5692
5693 // Indirect Memory Plus Index Register Plus Offset Operand
5694 operand indIndex(any_RegP reg, rRegL lreg)
5695 %{
5696 constraint(ALLOC_IN_RC(ptr_reg));
5697 match(AddP reg lreg);
5698
5699 op_cost(10);
5700 format %{"[$reg + $lreg]" %}
5701 interface(MEMORY_INTER) %{
5702 base($reg);
5703 index($lreg);
5704 scale(0x0);
5705 disp(0x0);
5706 %}
5707 %}
5708
5709 // Indirect Memory Times Scale Plus Index Register
5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5711 %{
5712 constraint(ALLOC_IN_RC(ptr_reg));
5713 match(AddP reg (LShiftL lreg scale));
5714
5715 op_cost(10);
5716 format %{"[$reg + $lreg << $scale]" %}
5717 interface(MEMORY_INTER) %{
5718 base($reg);
5719 index($lreg);
5720 scale($scale);
5721 disp(0x0);
5722 %}
5723 %}
5724
5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5726 %{
5727 constraint(ALLOC_IN_RC(ptr_reg));
5728 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5729 match(AddP reg (LShiftL (ConvI2L idx) scale));
5730
5731 op_cost(10);
5732 format %{"[$reg + pos $idx << $scale]" %}
5733 interface(MEMORY_INTER) %{
5734 base($reg);
5735 index($idx);
5736 scale($scale);
5737 disp(0x0);
5738 %}
5739 %}
5740
5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5743 %{
5744 constraint(ALLOC_IN_RC(ptr_reg));
5745 match(AddP (AddP reg (LShiftL lreg scale)) off);
5746
5747 op_cost(10);
5748 format %{"[$reg + $off + $lreg << $scale]" %}
5749 interface(MEMORY_INTER) %{
5750 base($reg);
5751 index($lreg);
5752 scale($scale);
5753 disp($off);
5754 %}
5755 %}
5756
5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5759 %{
5760 constraint(ALLOC_IN_RC(ptr_reg));
5761 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5762 match(AddP (AddP reg (ConvI2L idx)) off);
5763
5764 op_cost(10);
5765 format %{"[$reg + $off + $idx]" %}
5766 interface(MEMORY_INTER) %{
5767 base($reg);
5768 index($idx);
5769 scale(0x0);
5770 disp($off);
5771 %}
5772 %}
5773
5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5776 %{
5777 constraint(ALLOC_IN_RC(ptr_reg));
5778 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5779 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5780
5781 op_cost(10);
5782 format %{"[$reg + $off + $idx << $scale]" %}
5783 interface(MEMORY_INTER) %{
5784 base($reg);
5785 index($idx);
5786 scale($scale);
5787 disp($off);
5788 %}
5789 %}
5790
5791 // Indirect Narrow Oop Plus Offset Operand
5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5793 // we can't free r12 even with CompressedOops::base() == nullptr.
5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5795 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 match(AddP (DecodeN reg) off);
5798
5799 op_cost(10);
5800 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5801 interface(MEMORY_INTER) %{
5802 base(0xc); // R12
5803 index($reg);
5804 scale(0x3);
5805 disp($off);
5806 %}
5807 %}
5808
5809 // Indirect Memory Operand
5810 operand indirectNarrow(rRegN reg)
5811 %{
5812 predicate(CompressedOops::shift() == 0);
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 match(DecodeN reg);
5815
5816 format %{ "[$reg]" %}
5817 interface(MEMORY_INTER) %{
5818 base($reg);
5819 index(0x4);
5820 scale(0x0);
5821 disp(0x0);
5822 %}
5823 %}
5824
5825 // Indirect Memory Plus Short Offset Operand
5826 operand indOffset8Narrow(rRegN reg, immL8 off)
5827 %{
5828 predicate(CompressedOops::shift() == 0);
5829 constraint(ALLOC_IN_RC(ptr_reg));
5830 match(AddP (DecodeN reg) off);
5831
5832 format %{ "[$reg + $off (8-bit)]" %}
5833 interface(MEMORY_INTER) %{
5834 base($reg);
5835 index(0x4);
5836 scale(0x0);
5837 disp($off);
5838 %}
5839 %}
5840
5841 // Indirect Memory Plus Long Offset Operand
5842 operand indOffset32Narrow(rRegN reg, immL32 off)
5843 %{
5844 predicate(CompressedOops::shift() == 0);
5845 constraint(ALLOC_IN_RC(ptr_reg));
5846 match(AddP (DecodeN reg) off);
5847
5848 format %{ "[$reg + $off (32-bit)]" %}
5849 interface(MEMORY_INTER) %{
5850 base($reg);
5851 index(0x4);
5852 scale(0x0);
5853 disp($off);
5854 %}
5855 %}
5856
5857 // Indirect Memory Plus Index Register Plus Offset Operand
5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5859 %{
5860 predicate(CompressedOops::shift() == 0);
5861 constraint(ALLOC_IN_RC(ptr_reg));
5862 match(AddP (AddP (DecodeN reg) lreg) off);
5863
5864 op_cost(10);
5865 format %{"[$reg + $off + $lreg]" %}
5866 interface(MEMORY_INTER) %{
5867 base($reg);
5868 index($lreg);
5869 scale(0x0);
5870 disp($off);
5871 %}
5872 %}
5873
5874 // Indirect Memory Plus Index Register Plus Offset Operand
5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
5876 %{
5877 predicate(CompressedOops::shift() == 0);
5878 constraint(ALLOC_IN_RC(ptr_reg));
5879 match(AddP (DecodeN reg) lreg);
5880
5881 op_cost(10);
5882 format %{"[$reg + $lreg]" %}
5883 interface(MEMORY_INTER) %{
5884 base($reg);
5885 index($lreg);
5886 scale(0x0);
5887 disp(0x0);
5888 %}
5889 %}
5890
5891 // Indirect Memory Times Scale Plus Index Register
5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5893 %{
5894 predicate(CompressedOops::shift() == 0);
5895 constraint(ALLOC_IN_RC(ptr_reg));
5896 match(AddP (DecodeN reg) (LShiftL lreg scale));
5897
5898 op_cost(10);
5899 format %{"[$reg + $lreg << $scale]" %}
5900 interface(MEMORY_INTER) %{
5901 base($reg);
5902 index($lreg);
5903 scale($scale);
5904 disp(0x0);
5905 %}
5906 %}
5907
5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5910 %{
5911 predicate(CompressedOops::shift() == 0);
5912 constraint(ALLOC_IN_RC(ptr_reg));
5913 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5914
5915 op_cost(10);
5916 format %{"[$reg + $off + $lreg << $scale]" %}
5917 interface(MEMORY_INTER) %{
5918 base($reg);
5919 index($lreg);
5920 scale($scale);
5921 disp($off);
5922 %}
5923 %}
5924
5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5927 %{
5928 constraint(ALLOC_IN_RC(ptr_reg));
5929 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5930 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5931
5932 op_cost(10);
5933 format %{"[$reg + $off + $idx]" %}
5934 interface(MEMORY_INTER) %{
5935 base($reg);
5936 index($idx);
5937 scale(0x0);
5938 disp($off);
5939 %}
5940 %}
5941
5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5944 %{
5945 constraint(ALLOC_IN_RC(ptr_reg));
5946 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5947 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5948
5949 op_cost(10);
5950 format %{"[$reg + $off + $idx << $scale]" %}
5951 interface(MEMORY_INTER) %{
5952 base($reg);
5953 index($idx);
5954 scale($scale);
5955 disp($off);
5956 %}
5957 %}
5958
5959 //----------Special Memory Operands--------------------------------------------
5960 // Stack Slot Operand - This operand is used for loading and storing temporary
5961 // values on the stack where a match requires a value to
5962 // flow through memory.
5963 operand stackSlotP(sRegP reg)
5964 %{
5965 constraint(ALLOC_IN_RC(stack_slots));
5966 // No match rule because this operand is only generated in matching
5967
5968 format %{ "[$reg]" %}
5969 interface(MEMORY_INTER) %{
5970 base(0x4); // RSP
5971 index(0x4); // No Index
5972 scale(0x0); // No Scale
5973 disp($reg); // Stack Offset
5974 %}
5975 %}
5976
5977 operand stackSlotI(sRegI reg)
5978 %{
5979 constraint(ALLOC_IN_RC(stack_slots));
5980 // No match rule because this operand is only generated in matching
5981
5982 format %{ "[$reg]" %}
5983 interface(MEMORY_INTER) %{
5984 base(0x4); // RSP
5985 index(0x4); // No Index
5986 scale(0x0); // No Scale
5987 disp($reg); // Stack Offset
5988 %}
5989 %}
5990
5991 operand stackSlotF(sRegF reg)
5992 %{
5993 constraint(ALLOC_IN_RC(stack_slots));
5994 // No match rule because this operand is only generated in matching
5995
5996 format %{ "[$reg]" %}
5997 interface(MEMORY_INTER) %{
5998 base(0x4); // RSP
5999 index(0x4); // No Index
6000 scale(0x0); // No Scale
6001 disp($reg); // Stack Offset
6002 %}
6003 %}
6004
6005 operand stackSlotD(sRegD reg)
6006 %{
6007 constraint(ALLOC_IN_RC(stack_slots));
6008 // No match rule because this operand is only generated in matching
6009
6010 format %{ "[$reg]" %}
6011 interface(MEMORY_INTER) %{
6012 base(0x4); // RSP
6013 index(0x4); // No Index
6014 scale(0x0); // No Scale
6015 disp($reg); // Stack Offset
6016 %}
6017 %}
6018 operand stackSlotL(sRegL reg)
6019 %{
6020 constraint(ALLOC_IN_RC(stack_slots));
6021 // No match rule because this operand is only generated in matching
6022
6023 format %{ "[$reg]" %}
6024 interface(MEMORY_INTER) %{
6025 base(0x4); // RSP
6026 index(0x4); // No Index
6027 scale(0x0); // No Scale
6028 disp($reg); // Stack Offset
6029 %}
6030 %}
6031
6032 //----------Conditional Branch Operands----------------------------------------
6033 // Comparison Op - This is the operation of the comparison, and is limited to
6034 // the following set of codes:
6035 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6036 //
6037 // Other attributes of the comparison, such as unsignedness, are specified
6038 // by the comparison instruction that sets a condition code flags register.
6039 // That result is represented by a flags operand whose subtype is appropriate
6040 // to the unsignedness (etc.) of the comparison.
6041 //
6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6045
6046 // Comparison Code
6047 operand cmpOp()
6048 %{
6049 match(Bool);
6050
6051 format %{ "" %}
6052 interface(COND_INTER) %{
6053 equal(0x4, "e");
6054 not_equal(0x5, "ne");
6055 less(0xc, "l");
6056 greater_equal(0xd, "ge");
6057 less_equal(0xe, "le");
6058 greater(0xf, "g");
6059 overflow(0x0, "o");
6060 no_overflow(0x1, "no");
6061 %}
6062 %}
6063
6064 // Comparison Code, unsigned compare. Used by FP also, with
6065 // C2 (unordered) turned into GT or LT already. The other bits
6066 // C0 and C3 are turned into Carry & Zero flags.
6067 operand cmpOpU()
6068 %{
6069 match(Bool);
6070
6071 format %{ "" %}
6072 interface(COND_INTER) %{
6073 equal(0x4, "e");
6074 not_equal(0x5, "ne");
6075 less(0x2, "b");
6076 greater_equal(0x3, "ae");
6077 less_equal(0x6, "be");
6078 greater(0x7, "a");
6079 overflow(0x0, "o");
6080 no_overflow(0x1, "no");
6081 %}
6082 %}
6083
6084
6085 // Floating comparisons that don't require any fixup for the unordered case,
6086 // If both inputs of the comparison are the same, ZF is always set so we
6087 // don't need to use cmpOpUCF2 for eq/ne
6088 operand cmpOpUCF() %{
6089 match(Bool);
6090 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6091 (n->as_Bool()->_test._test == BoolTest::lt ||
6092 n->as_Bool()->_test._test == BoolTest::ge ||
6093 n->as_Bool()->_test._test == BoolTest::le ||
6094 n->as_Bool()->_test._test == BoolTest::gt ||
6095 n->in(1)->in(1) == n->in(1)->in(2)));
6096 format %{ "" %}
6097 interface(COND_INTER) %{
6098 equal(0xb, "np");
6099 not_equal(0xa, "p");
6100 less(0x2, "b");
6101 greater_equal(0x3, "ae");
6102 less_equal(0x6, "be");
6103 greater(0x7, "a");
6104 overflow(0x0, "o");
6105 no_overflow(0x1, "no");
6106 %}
6107 %}
6108
6109
6110 // Floating comparisons that can be fixed up with extra conditional jumps
6111 operand cmpOpUCF2() %{
6112 match(Bool);
6113 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6114 (n->as_Bool()->_test._test == BoolTest::ne ||
6115 n->as_Bool()->_test._test == BoolTest::eq) &&
6116 n->in(1)->in(1) != n->in(1)->in(2));
6117 format %{ "" %}
6118 interface(COND_INTER) %{
6119 equal(0x4, "e");
6120 not_equal(0x5, "ne");
6121 less(0x2, "b");
6122 greater_equal(0x3, "ae");
6123 less_equal(0x6, "be");
6124 greater(0x7, "a");
6125 overflow(0x0, "o");
6126 no_overflow(0x1, "no");
6127 %}
6128 %}
6129
6130
6131 // Floating point comparisons that set condition flags to test more directly,
6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
6134 // latter conditions to ones that use unsigned tests before passing into an
6135 // instruction because the preceding comparison might be based on a three way
6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6137 operand cmpOpUCFE()
6138 %{
6139 match(Bool);
6140 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6141 (n->as_Bool()->_test._test == BoolTest::ne ||
6142 n->as_Bool()->_test._test == BoolTest::eq ||
6143 n->as_Bool()->_test._test == BoolTest::lt ||
6144 n->as_Bool()->_test._test == BoolTest::ge ||
6145 n->as_Bool()->_test._test == BoolTest::le ||
6146 n->as_Bool()->_test._test == BoolTest::gt));
6147
6148 format %{ "" %}
6149 interface(COND_INTER) %{
6150 equal(0x4, "e");
6151 not_equal(0x5, "ne");
6152 less(0x2, "b");
6153 greater_equal(0x3, "ae");
6154 less_equal(0x6, "be");
6155 greater(0x7, "a");
6156 overflow(0x0, "o");
6157 no_overflow(0x1, "no");
6158 %}
6159 %}
6160
6161 // Operands for bound floating pointer register arguments
6162 operand rxmm0() %{
6163 constraint(ALLOC_IN_RC(xmm0_reg));
6164 match(VecX);
6165 format%{%}
6166 interface(REG_INTER);
6167 %}
6168
6169 // Vectors
6170
6171 // Dummy generic vector class. Should be used for all vector operands.
6172 // Replaced with vec[SDXYZ] during post-selection pass.
6173 operand vec() %{
6174 constraint(ALLOC_IN_RC(dynamic));
6175 match(VecX);
6176 match(VecY);
6177 match(VecZ);
6178 match(VecS);
6179 match(VecD);
6180
6181 format %{ %}
6182 interface(REG_INTER);
6183 %}
6184
6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6188 // runtime code generation via reg_class_dynamic.
6189 operand legVec() %{
6190 constraint(ALLOC_IN_RC(dynamic));
6191 match(VecX);
6192 match(VecY);
6193 match(VecZ);
6194 match(VecS);
6195 match(VecD);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces vec during post-selection cleanup. See above.
6202 operand vecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces legVec during post-selection cleanup. See above.
6211 operand legVecS() %{
6212 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6213 match(VecS);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces vec during post-selection cleanup. See above.
6220 operand vecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces legVec during post-selection cleanup. See above.
6229 operand legVecD() %{
6230 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6231 match(VecD);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces vec during post-selection cleanup. See above.
6238 operand vecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces legVec during post-selection cleanup. See above.
6247 operand legVecX() %{
6248 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6249 match(VecX);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces vec during post-selection cleanup. See above.
6256 operand vecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces legVec during post-selection cleanup. See above.
6265 operand legVecY() %{
6266 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6267 match(VecY);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces vec during post-selection cleanup. See above.
6274 operand vecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 // Replaces legVec during post-selection cleanup. See above.
6283 operand legVecZ() %{
6284 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6285 match(VecZ);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 //----------OPERAND CLASSES----------------------------------------------------
6292 // Operand Classes are groups of operands that are used as to simplify
6293 // instruction definitions by not requiring the AD writer to specify separate
6294 // instructions for every form of operand when the instruction accepts
6295 // multiple operand types with the same basic encoding and format. The classic
6296 // case of this is memory operands.
6297
6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6299 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6300 indCompressedOopOffset,
6301 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6302 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6303 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6304
6305 //----------PIPELINE-----------------------------------------------------------
6306 // Rules which define the behavior of the target architectures pipeline.
6307 pipeline %{
6308
6309 //----------ATTRIBUTES---------------------------------------------------------
6310 attributes %{
6311 variable_size_instructions; // Fixed size instructions
6312 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6313 instruction_unit_size = 1; // An instruction is 1 bytes long
6314 instruction_fetch_unit_size = 16; // The processor fetches one line
6315 instruction_fetch_units = 1; // of 16 bytes
6316 %}
6317
6318 //----------RESOURCES----------------------------------------------------------
6319 // Resources are the functional units available to the machine
6320
6321 // Generic P2/P3 pipeline
6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6323 // 3 instructions decoded per cycle.
6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6325 // 3 ALU op, only ALU0 handles mul instructions.
6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6327 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6328 BR, FPU,
6329 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6330
6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
6332 // Pipeline Description specifies the stages in the machine's pipeline
6333
6334 // Generic P2/P3 pipeline
6335 pipe_desc(S0, S1, S2, S3, S4, S5);
6336
6337 //----------PIPELINE CLASSES---------------------------------------------------
6338 // Pipeline Classes describe the stages in which input and output are
6339 // referenced by the hardware pipeline.
6340
6341 // Naming convention: ialu or fpu
6342 // Then: _reg
6343 // Then: _reg if there is a 2nd register
6344 // Then: _long if it's a pair of instructions implementing a long
6345 // Then: _fat if it requires the big decoder
6346 // Or: _mem if it requires the big decoder and a memory unit.
6347
6348 // Integer ALU reg operation
6349 pipe_class ialu_reg(rRegI dst)
6350 %{
6351 single_instruction;
6352 dst : S4(write);
6353 dst : S3(read);
6354 DECODE : S0; // any decoder
6355 ALU : S3; // any alu
6356 %}
6357
6358 // Long ALU reg operation
6359 pipe_class ialu_reg_long(rRegL dst)
6360 %{
6361 instruction_count(2);
6362 dst : S4(write);
6363 dst : S3(read);
6364 DECODE : S0(2); // any 2 decoders
6365 ALU : S3(2); // both alus
6366 %}
6367
6368 // Integer ALU reg operation using big decoder
6369 pipe_class ialu_reg_fat(rRegI dst)
6370 %{
6371 single_instruction;
6372 dst : S4(write);
6373 dst : S3(read);
6374 D0 : S0; // big decoder only
6375 ALU : S3; // any alu
6376 %}
6377
6378 // Integer ALU reg-reg operation
6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6380 %{
6381 single_instruction;
6382 dst : S4(write);
6383 src : S3(read);
6384 DECODE : S0; // any decoder
6385 ALU : S3; // any alu
6386 %}
6387
6388 // Integer ALU reg-reg operation
6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6390 %{
6391 single_instruction;
6392 dst : S4(write);
6393 src : S3(read);
6394 D0 : S0; // big decoder only
6395 ALU : S3; // any alu
6396 %}
6397
6398 // Integer ALU reg-mem operation
6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6400 %{
6401 single_instruction;
6402 dst : S5(write);
6403 mem : S3(read);
6404 D0 : S0; // big decoder only
6405 ALU : S4; // any alu
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer mem operation (prefetch)
6410 pipe_class ialu_mem(memory mem)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 D0 : S0; // big decoder only
6415 MEM : S3; // any mem
6416 %}
6417
6418 // Integer Store to Memory
6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
6420 %{
6421 single_instruction;
6422 mem : S3(read);
6423 src : S5(read);
6424 D0 : S0; // big decoder only
6425 ALU : S4; // any alu
6426 MEM : S3;
6427 %}
6428
6429 // // Long Store to Memory
6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6431 // %{
6432 // instruction_count(2);
6433 // mem : S3(read);
6434 // src : S5(read);
6435 // D0 : S0(2); // big decoder only; twice
6436 // ALU : S4(2); // any 2 alus
6437 // MEM : S3(2); // Both mems
6438 // %}
6439
6440 // Integer Store to Memory
6441 pipe_class ialu_mem_imm(memory mem)
6442 %{
6443 single_instruction;
6444 mem : S3(read);
6445 D0 : S0; // big decoder only
6446 ALU : S4; // any alu
6447 MEM : S3;
6448 %}
6449
6450 // Integer ALU0 reg-reg operation
6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6452 %{
6453 single_instruction;
6454 dst : S4(write);
6455 src : S3(read);
6456 D0 : S0; // Big decoder only
6457 ALU0 : S3; // only alu0
6458 %}
6459
6460 // Integer ALU0 reg-mem operation
6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6462 %{
6463 single_instruction;
6464 dst : S5(write);
6465 mem : S3(read);
6466 D0 : S0; // big decoder only
6467 ALU0 : S4; // ALU0 only
6468 MEM : S3; // any mem
6469 %}
6470
6471 // Integer ALU reg-reg operation
6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6473 %{
6474 single_instruction;
6475 cr : S4(write);
6476 src1 : S3(read);
6477 src2 : S3(read);
6478 DECODE : S0; // any decoder
6479 ALU : S3; // any alu
6480 %}
6481
6482 // Integer ALU reg-imm operation
6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6484 %{
6485 single_instruction;
6486 cr : S4(write);
6487 src1 : S3(read);
6488 DECODE : S0; // any decoder
6489 ALU : S3; // any alu
6490 %}
6491
6492 // Integer ALU reg-mem operation
6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6494 %{
6495 single_instruction;
6496 cr : S4(write);
6497 src1 : S3(read);
6498 src2 : S3(read);
6499 D0 : S0; // big decoder only
6500 ALU : S4; // any alu
6501 MEM : S3;
6502 %}
6503
6504 // Conditional move reg-reg
6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6506 %{
6507 instruction_count(4);
6508 y : S4(read);
6509 q : S3(read);
6510 p : S3(read);
6511 DECODE : S0(4); // any decoder
6512 %}
6513
6514 // Conditional move reg-reg
6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6516 %{
6517 single_instruction;
6518 dst : S4(write);
6519 src : S3(read);
6520 cr : S3(read);
6521 DECODE : S0; // any decoder
6522 %}
6523
6524 // Conditional move reg-mem
6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6526 %{
6527 single_instruction;
6528 dst : S4(write);
6529 src : S3(read);
6530 cr : S3(read);
6531 DECODE : S0; // any decoder
6532 MEM : S3;
6533 %}
6534
6535 // Conditional move reg-reg long
6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6537 %{
6538 single_instruction;
6539 dst : S4(write);
6540 src : S3(read);
6541 cr : S3(read);
6542 DECODE : S0(2); // any 2 decoders
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg(regD dst)
6547 %{
6548 instruction_count(2);
6549 dst : S3(read);
6550 DECODE : S0(2); // any 2 decoders
6551 FPU : S3;
6552 %}
6553
6554 // Float reg-reg operation
6555 pipe_class fpu_reg_reg(regD dst, regD src)
6556 %{
6557 instruction_count(2);
6558 dst : S4(write);
6559 src : S3(read);
6560 DECODE : S0(2); // any 2 decoders
6561 FPU : S3;
6562 %}
6563
6564 // Float reg-reg operation
6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6566 %{
6567 instruction_count(3);
6568 dst : S4(write);
6569 src1 : S3(read);
6570 src2 : S3(read);
6571 DECODE : S0(3); // any 3 decoders
6572 FPU : S3(2);
6573 %}
6574
6575 // Float reg-reg operation
6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6577 %{
6578 instruction_count(4);
6579 dst : S4(write);
6580 src1 : S3(read);
6581 src2 : S3(read);
6582 src3 : S3(read);
6583 DECODE : S0(4); // any 3 decoders
6584 FPU : S3(2);
6585 %}
6586
6587 // Float reg-reg operation
6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6589 %{
6590 instruction_count(4);
6591 dst : S4(write);
6592 src1 : S3(read);
6593 src2 : S3(read);
6594 src3 : S3(read);
6595 DECODE : S1(3); // any 3 decoders
6596 D0 : S0; // Big decoder only
6597 FPU : S3(2);
6598 MEM : S3;
6599 %}
6600
6601 // Float reg-mem operation
6602 pipe_class fpu_reg_mem(regD dst, memory mem)
6603 %{
6604 instruction_count(2);
6605 dst : S5(write);
6606 mem : S3(read);
6607 D0 : S0; // big decoder only
6608 DECODE : S1; // any decoder for FPU POP
6609 FPU : S4;
6610 MEM : S3; // any mem
6611 %}
6612
6613 // Float reg-mem operation
6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6615 %{
6616 instruction_count(3);
6617 dst : S5(write);
6618 src1 : S3(read);
6619 mem : S3(read);
6620 D0 : S0; // big decoder only
6621 DECODE : S1(2); // any decoder for FPU POP
6622 FPU : S4;
6623 MEM : S3; // any mem
6624 %}
6625
6626 // Float mem-reg operation
6627 pipe_class fpu_mem_reg(memory mem, regD src)
6628 %{
6629 instruction_count(2);
6630 src : S5(read);
6631 mem : S3(read);
6632 DECODE : S0; // any decoder for FPU PUSH
6633 D0 : S1; // big decoder only
6634 FPU : S4;
6635 MEM : S3; // any mem
6636 %}
6637
6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6639 %{
6640 instruction_count(3);
6641 src1 : S3(read);
6642 src2 : S3(read);
6643 mem : S3(read);
6644 DECODE : S0(2); // any decoder for FPU PUSH
6645 D0 : S1; // big decoder only
6646 FPU : S4;
6647 MEM : S3; // any mem
6648 %}
6649
6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6651 %{
6652 instruction_count(3);
6653 src1 : S3(read);
6654 src2 : S3(read);
6655 mem : S4(read);
6656 DECODE : S0; // any decoder for FPU PUSH
6657 D0 : S0(2); // big decoder only
6658 FPU : S4;
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem(memory dst, memory src1)
6663 %{
6664 instruction_count(2);
6665 src1 : S3(read);
6666 dst : S4(read);
6667 D0 : S0(2); // big decoder only
6668 MEM : S3(2); // any mem
6669 %}
6670
6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6672 %{
6673 instruction_count(3);
6674 src1 : S3(read);
6675 src2 : S3(read);
6676 dst : S4(read);
6677 D0 : S0(3); // big decoder only
6678 FPU : S4;
6679 MEM : S3(3); // any mem
6680 %}
6681
6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6683 %{
6684 instruction_count(3);
6685 src1 : S4(read);
6686 mem : S4(read);
6687 DECODE : S0; // any decoder for FPU PUSH
6688 D0 : S0(2); // big decoder only
6689 FPU : S4;
6690 MEM : S3(2); // any mem
6691 %}
6692
6693 // Float load constant
6694 pipe_class fpu_reg_con(regD dst)
6695 %{
6696 instruction_count(2);
6697 dst : S5(write);
6698 D0 : S0; // big decoder only for the load
6699 DECODE : S1; // any decoder for FPU POP
6700 FPU : S4;
6701 MEM : S3; // any mem
6702 %}
6703
6704 // Float load constant
6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
6706 %{
6707 instruction_count(3);
6708 dst : S5(write);
6709 src : S3(read);
6710 D0 : S0; // big decoder only for the load
6711 DECODE : S1(2); // any decoder for FPU POP
6712 FPU : S4;
6713 MEM : S3; // any mem
6714 %}
6715
6716 // UnConditional branch
6717 pipe_class pipe_jmp(label labl)
6718 %{
6719 single_instruction;
6720 BR : S3;
6721 %}
6722
6723 // Conditional branch
6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6725 %{
6726 single_instruction;
6727 cr : S1(read);
6728 BR : S3;
6729 %}
6730
6731 // Allocation idiom
6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6733 %{
6734 instruction_count(1); force_serialization;
6735 fixed_latency(6);
6736 heap_ptr : S3(read);
6737 DECODE : S0(3);
6738 D0 : S2;
6739 MEM : S3;
6740 ALU : S3(2);
6741 dst : S5(write);
6742 BR : S5;
6743 %}
6744
6745 // Generic big/slow expanded idiom
6746 pipe_class pipe_slow()
6747 %{
6748 instruction_count(10); multiple_bundles; force_serialization;
6749 fixed_latency(100);
6750 D0 : S0(2);
6751 MEM : S3(2);
6752 %}
6753
6754 // The real do-nothing guy
6755 pipe_class empty()
6756 %{
6757 instruction_count(0);
6758 %}
6759
6760 // Define the class for the Nop node
6761 define
6762 %{
6763 MachNop = empty;
6764 %}
6765
6766 %}
6767
6768 //----------INSTRUCTIONS-------------------------------------------------------
6769 //
6770 // match -- States which machine-independent subtree may be replaced
6771 // by this instruction.
6772 // ins_cost -- The estimated cost of this instruction is used by instruction
6773 // selection to identify a minimum cost tree of machine
6774 // instructions that matches a tree of machine-independent
6775 // instructions.
6776 // format -- A string providing the disassembly for this instruction.
6777 // The value of an instruction's operand may be inserted
6778 // by referring to it with a '$' prefix.
6779 // opcode -- Three instruction opcodes may be provided. These are referred
6780 // to within an encode class as $primary, $secondary, and $tertiary
6781 // rrspectively. The primary opcode is commonly used to
6782 // indicate the type of machine instruction, while secondary
6783 // and tertiary are often used for prefix options or addressing
6784 // modes.
6785 // ins_encode -- A list of encode classes with parameters. The encode class
6786 // name must have been defined in an 'enc_class' specification
6787 // in the encode section of the architecture description.
6788
6789 // ============================================================================
6790
6791 instruct ShouldNotReachHere() %{
6792 match(Halt);
6793 format %{ "stop\t# ShouldNotReachHere" %}
6794 ins_encode %{
6795 if (is_reachable()) {
6796 const char* str = __ code_string(_halt_reason);
6797 __ stop(str);
6798 }
6799 %}
6800 ins_pipe(pipe_slow);
6801 %}
6802
6803 // ============================================================================
6804
6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6806 // Load Float
6807 instruct MoveF2VL(vlRegF dst, regF src) %{
6808 match(Set dst src);
6809 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6810 ins_encode %{
6811 ShouldNotReachHere();
6812 %}
6813 ins_pipe( fpu_reg_reg );
6814 %}
6815
6816 // Load Float
6817 instruct MoveF2LEG(legRegF dst, regF src) %{
6818 match(Set dst src);
6819 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6820 ins_encode %{
6821 ShouldNotReachHere();
6822 %}
6823 ins_pipe( fpu_reg_reg );
6824 %}
6825
6826 // Load Float
6827 instruct MoveVL2F(regF dst, vlRegF src) %{
6828 match(Set dst src);
6829 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6830 ins_encode %{
6831 ShouldNotReachHere();
6832 %}
6833 ins_pipe( fpu_reg_reg );
6834 %}
6835
6836 // Load Float
6837 instruct MoveLEG2F(regF dst, legRegF src) %{
6838 match(Set dst src);
6839 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6840 ins_encode %{
6841 ShouldNotReachHere();
6842 %}
6843 ins_pipe( fpu_reg_reg );
6844 %}
6845
6846 // Load Double
6847 instruct MoveD2VL(vlRegD dst, regD src) %{
6848 match(Set dst src);
6849 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6850 ins_encode %{
6851 ShouldNotReachHere();
6852 %}
6853 ins_pipe( fpu_reg_reg );
6854 %}
6855
6856 // Load Double
6857 instruct MoveD2LEG(legRegD dst, regD src) %{
6858 match(Set dst src);
6859 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6860 ins_encode %{
6861 ShouldNotReachHere();
6862 %}
6863 ins_pipe( fpu_reg_reg );
6864 %}
6865
6866 // Load Double
6867 instruct MoveVL2D(regD dst, vlRegD src) %{
6868 match(Set dst src);
6869 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6870 ins_encode %{
6871 ShouldNotReachHere();
6872 %}
6873 ins_pipe( fpu_reg_reg );
6874 %}
6875
6876 // Load Double
6877 instruct MoveLEG2D(regD dst, legRegD src) %{
6878 match(Set dst src);
6879 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6880 ins_encode %{
6881 ShouldNotReachHere();
6882 %}
6883 ins_pipe( fpu_reg_reg );
6884 %}
6885
6886 //----------Load/Store/Move Instructions---------------------------------------
6887 //----------Load Instructions--------------------------------------------------
6888
6889 // Load Byte (8 bit signed)
6890 instruct loadB(rRegI dst, memory mem)
6891 %{
6892 match(Set dst (LoadB mem));
6893
6894 ins_cost(125);
6895 format %{ "movsbl $dst, $mem\t# byte" %}
6896
6897 ins_encode %{
6898 __ movsbl($dst$$Register, $mem$$Address);
6899 %}
6900
6901 ins_pipe(ialu_reg_mem);
6902 %}
6903
6904 // Load Byte (8 bit signed) into Long Register
6905 instruct loadB2L(rRegL dst, memory mem)
6906 %{
6907 match(Set dst (ConvI2L (LoadB mem)));
6908
6909 ins_cost(125);
6910 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6911
6912 ins_encode %{
6913 __ movsbq($dst$$Register, $mem$$Address);
6914 %}
6915
6916 ins_pipe(ialu_reg_mem);
6917 %}
6918
6919 // Load Unsigned Byte (8 bit UNsigned)
6920 instruct loadUB(rRegI dst, memory mem)
6921 %{
6922 match(Set dst (LoadUB mem));
6923
6924 ins_cost(125);
6925 format %{ "movzbl $dst, $mem\t# ubyte" %}
6926
6927 ins_encode %{
6928 __ movzbl($dst$$Register, $mem$$Address);
6929 %}
6930
6931 ins_pipe(ialu_reg_mem);
6932 %}
6933
6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6935 instruct loadUB2L(rRegL dst, memory mem)
6936 %{
6937 match(Set dst (ConvI2L (LoadUB mem)));
6938
6939 ins_cost(125);
6940 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6941
6942 ins_encode %{
6943 __ movzbq($dst$$Register, $mem$$Address);
6944 %}
6945
6946 ins_pipe(ialu_reg_mem);
6947 %}
6948
6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6951 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6952 effect(KILL cr);
6953
6954 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6955 "andl $dst, right_n_bits($mask, 8)" %}
6956 ins_encode %{
6957 Register Rdst = $dst$$Register;
6958 __ movzbq(Rdst, $mem$$Address);
6959 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6960 %}
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Short (16 bit signed)
6965 instruct loadS(rRegI dst, memory mem)
6966 %{
6967 match(Set dst (LoadS mem));
6968
6969 ins_cost(125);
6970 format %{ "movswl $dst, $mem\t# short" %}
6971
6972 ins_encode %{
6973 __ movswl($dst$$Register, $mem$$Address);
6974 %}
6975
6976 ins_pipe(ialu_reg_mem);
6977 %}
6978
6979 // Load Short (16 bit signed) to Byte (8 bit signed)
6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6981 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6982
6983 ins_cost(125);
6984 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6985 ins_encode %{
6986 __ movsbl($dst$$Register, $mem$$Address);
6987 %}
6988 ins_pipe(ialu_reg_mem);
6989 %}
6990
6991 // Load Short (16 bit signed) into Long Register
6992 instruct loadS2L(rRegL dst, memory mem)
6993 %{
6994 match(Set dst (ConvI2L (LoadS mem)));
6995
6996 ins_cost(125);
6997 format %{ "movswq $dst, $mem\t# short -> long" %}
6998
6999 ins_encode %{
7000 __ movswq($dst$$Register, $mem$$Address);
7001 %}
7002
7003 ins_pipe(ialu_reg_mem);
7004 %}
7005
7006 // Load Unsigned Short/Char (16 bit UNsigned)
7007 instruct loadUS(rRegI dst, memory mem)
7008 %{
7009 match(Set dst (LoadUS mem));
7010
7011 ins_cost(125);
7012 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7013
7014 ins_encode %{
7015 __ movzwl($dst$$Register, $mem$$Address);
7016 %}
7017
7018 ins_pipe(ialu_reg_mem);
7019 %}
7020
7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7023 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7024
7025 ins_cost(125);
7026 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7027 ins_encode %{
7028 __ movsbl($dst$$Register, $mem$$Address);
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7034 instruct loadUS2L(rRegL dst, memory mem)
7035 %{
7036 match(Set dst (ConvI2L (LoadUS mem)));
7037
7038 ins_cost(125);
7039 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7040
7041 ins_encode %{
7042 __ movzwq($dst$$Register, $mem$$Address);
7043 %}
7044
7045 ins_pipe(ialu_reg_mem);
7046 %}
7047
7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7050 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7051
7052 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7053 ins_encode %{
7054 __ movzbq($dst$$Register, $mem$$Address);
7055 %}
7056 ins_pipe(ialu_reg_mem);
7057 %}
7058
7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7061 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7062 effect(KILL cr);
7063
7064 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7065 "andl $dst, right_n_bits($mask, 16)" %}
7066 ins_encode %{
7067 Register Rdst = $dst$$Register;
7068 __ movzwq(Rdst, $mem$$Address);
7069 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7070 %}
7071 ins_pipe(ialu_reg_mem);
7072 %}
7073
7074 // Load Integer
7075 instruct loadI(rRegI dst, memory mem)
7076 %{
7077 match(Set dst (LoadI mem));
7078
7079 ins_cost(125);
7080 format %{ "movl $dst, $mem\t# int" %}
7081
7082 ins_encode %{
7083 __ movl($dst$$Register, $mem$$Address);
7084 %}
7085
7086 ins_pipe(ialu_reg_mem);
7087 %}
7088
7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7091 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7092
7093 ins_cost(125);
7094 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7095 ins_encode %{
7096 __ movsbl($dst$$Register, $mem$$Address);
7097 %}
7098 ins_pipe(ialu_reg_mem);
7099 %}
7100
7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7103 match(Set dst (AndI (LoadI mem) mask));
7104
7105 ins_cost(125);
7106 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7107 ins_encode %{
7108 __ movzbl($dst$$Register, $mem$$Address);
7109 %}
7110 ins_pipe(ialu_reg_mem);
7111 %}
7112
7113 // Load Integer (32 bit signed) to Short (16 bit signed)
7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7115 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7116
7117 ins_cost(125);
7118 format %{ "movswl $dst, $mem\t# int -> short" %}
7119 ins_encode %{
7120 __ movswl($dst$$Register, $mem$$Address);
7121 %}
7122 ins_pipe(ialu_reg_mem);
7123 %}
7124
7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7127 match(Set dst (AndI (LoadI mem) mask));
7128
7129 ins_cost(125);
7130 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7131 ins_encode %{
7132 __ movzwl($dst$$Register, $mem$$Address);
7133 %}
7134 ins_pipe(ialu_reg_mem);
7135 %}
7136
7137 // Load Integer into Long Register
7138 instruct loadI2L(rRegL dst, memory mem)
7139 %{
7140 match(Set dst (ConvI2L (LoadI mem)));
7141
7142 ins_cost(125);
7143 format %{ "movslq $dst, $mem\t# int -> long" %}
7144
7145 ins_encode %{
7146 __ movslq($dst$$Register, $mem$$Address);
7147 %}
7148
7149 ins_pipe(ialu_reg_mem);
7150 %}
7151
7152 // Load Integer with mask 0xFF into Long Register
7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7154 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7155
7156 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7157 ins_encode %{
7158 __ movzbq($dst$$Register, $mem$$Address);
7159 %}
7160 ins_pipe(ialu_reg_mem);
7161 %}
7162
7163 // Load Integer with mask 0xFFFF into Long Register
7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7165 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7166
7167 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7168 ins_encode %{
7169 __ movzwq($dst$$Register, $mem$$Address);
7170 %}
7171 ins_pipe(ialu_reg_mem);
7172 %}
7173
7174 // Load Integer with a 31-bit mask into Long Register
7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7176 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7177 effect(KILL cr);
7178
7179 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7180 "andl $dst, $mask" %}
7181 ins_encode %{
7182 Register Rdst = $dst$$Register;
7183 __ movl(Rdst, $mem$$Address);
7184 __ andl(Rdst, $mask$$constant);
7185 %}
7186 ins_pipe(ialu_reg_mem);
7187 %}
7188
7189 // Load Unsigned Integer into Long Register
7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7191 %{
7192 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7193
7194 ins_cost(125);
7195 format %{ "movl $dst, $mem\t# uint -> long" %}
7196
7197 ins_encode %{
7198 __ movl($dst$$Register, $mem$$Address);
7199 %}
7200
7201 ins_pipe(ialu_reg_mem);
7202 %}
7203
7204 // Load Long
7205 instruct loadL(rRegL dst, memory mem)
7206 %{
7207 match(Set dst (LoadL mem));
7208
7209 ins_cost(125);
7210 format %{ "movq $dst, $mem\t# long" %}
7211
7212 ins_encode %{
7213 __ movq($dst$$Register, $mem$$Address);
7214 %}
7215
7216 ins_pipe(ialu_reg_mem); // XXX
7217 %}
7218
7219 // Load Range
7220 instruct loadRange(rRegI dst, memory mem)
7221 %{
7222 match(Set dst (LoadRange mem));
7223
7224 ins_cost(125); // XXX
7225 format %{ "movl $dst, $mem\t# range" %}
7226 ins_encode %{
7227 __ movl($dst$$Register, $mem$$Address);
7228 %}
7229 ins_pipe(ialu_reg_mem);
7230 %}
7231
7232 // Load Pointer
7233 instruct loadP(rRegP dst, memory mem)
7234 %{
7235 match(Set dst (LoadP mem));
7236 predicate(n->as_Load()->barrier_data() == 0);
7237
7238 ins_cost(125); // XXX
7239 format %{ "movq $dst, $mem\t# ptr" %}
7240 ins_encode %{
7241 __ movq($dst$$Register, $mem$$Address);
7242 %}
7243 ins_pipe(ialu_reg_mem); // XXX
7244 %}
7245
7246 // Load Compressed Pointer
7247 instruct loadN(rRegN dst, memory mem)
7248 %{
7249 predicate(n->as_Load()->barrier_data() == 0);
7250 match(Set dst (LoadN mem));
7251
7252 ins_cost(125); // XXX
7253 format %{ "movl $dst, $mem\t# compressed ptr" %}
7254 ins_encode %{
7255 __ movl($dst$$Register, $mem$$Address);
7256 %}
7257 ins_pipe(ialu_reg_mem); // XXX
7258 %}
7259
7260
7261 // Load Klass Pointer
7262 instruct loadKlass(rRegP dst, memory mem)
7263 %{
7264 match(Set dst (LoadKlass mem));
7265
7266 ins_cost(125); // XXX
7267 format %{ "movq $dst, $mem\t# class" %}
7268 ins_encode %{
7269 __ movq($dst$$Register, $mem$$Address);
7270 %}
7271 ins_pipe(ialu_reg_mem); // XXX
7272 %}
7273
7274 // Load narrow Klass Pointer
7275 instruct loadNKlass(rRegN dst, memory mem)
7276 %{
7277 predicate(!UseCompactObjectHeaders);
7278 match(Set dst (LoadNKlass mem));
7279
7280 ins_cost(125); // XXX
7281 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7282 ins_encode %{
7283 __ movl($dst$$Register, $mem$$Address);
7284 %}
7285 ins_pipe(ialu_reg_mem); // XXX
7286 %}
7287
7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7289 %{
7290 predicate(UseCompactObjectHeaders);
7291 match(Set dst (LoadNKlass mem));
7292 effect(KILL cr);
7293 ins_cost(125);
7294 format %{
7295 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7296 "shrl $dst, markWord::klass_shift_at_offset"
7297 %}
7298 ins_encode %{
7299 if (UseAPX) {
7300 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7301 }
7302 else {
7303 __ movl($dst$$Register, $mem$$Address);
7304 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7305 }
7306 %}
7307 ins_pipe(ialu_reg_mem);
7308 %}
7309
7310 // Load Float
7311 instruct loadF(regF dst, memory mem)
7312 %{
7313 match(Set dst (LoadF mem));
7314
7315 ins_cost(145); // XXX
7316 format %{ "movss $dst, $mem\t# float" %}
7317 ins_encode %{
7318 __ movflt($dst$$XMMRegister, $mem$$Address);
7319 %}
7320 ins_pipe(pipe_slow); // XXX
7321 %}
7322
7323 // Load Double
7324 instruct loadD_partial(regD dst, memory mem)
7325 %{
7326 predicate(!UseXmmLoadAndClearUpper);
7327 match(Set dst (LoadD mem));
7328
7329 ins_cost(145); // XXX
7330 format %{ "movlpd $dst, $mem\t# double" %}
7331 ins_encode %{
7332 __ movdbl($dst$$XMMRegister, $mem$$Address);
7333 %}
7334 ins_pipe(pipe_slow); // XXX
7335 %}
7336
7337 instruct loadD(regD dst, memory mem)
7338 %{
7339 predicate(UseXmmLoadAndClearUpper);
7340 match(Set dst (LoadD mem));
7341
7342 ins_cost(145); // XXX
7343 format %{ "movsd $dst, $mem\t# double" %}
7344 ins_encode %{
7345 __ movdbl($dst$$XMMRegister, $mem$$Address);
7346 %}
7347 ins_pipe(pipe_slow); // XXX
7348 %}
7349
7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7351 %{
7352 match(Set dst con);
7353
7354 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7355
7356 ins_encode %{
7357 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7358 %}
7359
7360 ins_pipe(ialu_reg_fat);
7361 %}
7362
7363 // min = java.lang.Math.min(float a, float b)
7364 // max = java.lang.Math.max(float a, float b)
7365 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7366 %{
7367 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7368 match(Set dst (MaxF a b));
7369 match(Set dst (MinF a b));
7370
7371 format %{ "minmaxF $dst, $a, $b" %}
7372 ins_encode %{
7373 int opcode = this->ideal_Opcode();
7374 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7375 %}
7376 ins_pipe( pipe_slow );
7377 %}
7378
7379 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
7380 %{
7381 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7382 match(Set dst (MaxF a b));
7383 match(Set dst (MinF a b));
7384 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7385
7386 format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7387 ins_encode %{
7388 int opcode = this->ideal_Opcode();
7389 bool min = (opcode == Op_MinF) ? true : false;
7390 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7391 min, fp_prec_flt /*pt*/);
7392 %}
7393 ins_pipe( pipe_slow );
7394 %}
7395
7396 // min = java.lang.Math.min(float a, float b)
7397 // max = java.lang.Math.max(float a, float b)
7398 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7399 %{
7400 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7401 match(Set dst (MaxF a b));
7402 match(Set dst (MinF a b));
7403 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7404
7405 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7406 ins_encode %{
7407 int opcode = this->ideal_Opcode();
7408 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7409 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7410 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7411 %}
7412 ins_pipe( pipe_slow );
7413 %}
7414
7415 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
7416 %{
7417 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7418 match(Set dst (MaxF a b));
7419 match(Set dst (MinF a b));
7420 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7421
7422 format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7423 ins_encode %{
7424 int opcode = this->ideal_Opcode();
7425 bool min = (opcode == Op_MinF) ? true : false;
7426 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7427 min, fp_prec_flt /*pt*/);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 // min = java.lang.Math.min(double a, double b)
7433 // max = java.lang.Math.max(double a, double b)
7434 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7435 %{
7436 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7437 match(Set dst (MaxD a b));
7438 match(Set dst (MinD a b));
7439
7440 format %{ "minmaxD $dst, $a, $b" %}
7441 ins_encode %{
7442 int opcode = this->ideal_Opcode();
7443 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
7449 %{
7450 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7451 match(Set dst (MaxD a b));
7452 match(Set dst (MinD a b));
7453 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7454
7455 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7456 ins_encode %{
7457 int opcode = this->ideal_Opcode();
7458 bool min = (opcode == Op_MinD) ? true : false;
7459 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7460 min, fp_prec_dbl /*pt*/);
7461 %}
7462 ins_pipe( pipe_slow );
7463 %}
7464
7465 // min = java.lang.Math.min(double a, double b)
7466 // max = java.lang.Math.max(double a, double b)
7467 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7468 %{
7469 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7470 match(Set dst (MaxD a b));
7471 match(Set dst (MinD a b));
7472 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7473
7474 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7475 ins_encode %{
7476 int opcode = this->ideal_Opcode();
7477 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7478 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7479 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7480 %}
7481 ins_pipe( pipe_slow );
7482 %}
7483
7484 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
7485 %{
7486 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7487 match(Set dst (MaxD a b));
7488 match(Set dst (MinD a b));
7489 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7490
7491 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7492 ins_encode %{
7493 int opcode = this->ideal_Opcode();
7494 bool min = (opcode == Op_MinD) ? true : false;
7495 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7496 min, fp_prec_dbl /*pt*/);
7497 %}
7498 ins_pipe( pipe_slow );
7499 %}
7500
7501 // Load Effective Address
7502 instruct leaP8(rRegP dst, indOffset8 mem)
7503 %{
7504 match(Set dst mem);
7505
7506 ins_cost(110); // XXX
7507 format %{ "leaq $dst, $mem\t# ptr 8" %}
7508 ins_encode %{
7509 __ leaq($dst$$Register, $mem$$Address);
7510 %}
7511 ins_pipe(ialu_reg_reg_fat);
7512 %}
7513
7514 instruct leaP32(rRegP dst, indOffset32 mem)
7515 %{
7516 match(Set dst mem);
7517
7518 ins_cost(110);
7519 format %{ "leaq $dst, $mem\t# ptr 32" %}
7520 ins_encode %{
7521 __ leaq($dst$$Register, $mem$$Address);
7522 %}
7523 ins_pipe(ialu_reg_reg_fat);
7524 %}
7525
7526 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7527 %{
7528 match(Set dst mem);
7529
7530 ins_cost(110);
7531 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7532 ins_encode %{
7533 __ leaq($dst$$Register, $mem$$Address);
7534 %}
7535 ins_pipe(ialu_reg_reg_fat);
7536 %}
7537
7538 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7539 %{
7540 match(Set dst mem);
7541
7542 ins_cost(110);
7543 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7544 ins_encode %{
7545 __ leaq($dst$$Register, $mem$$Address);
7546 %}
7547 ins_pipe(ialu_reg_reg_fat);
7548 %}
7549
7550 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7551 %{
7552 match(Set dst mem);
7553
7554 ins_cost(110);
7555 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7556 ins_encode %{
7557 __ leaq($dst$$Register, $mem$$Address);
7558 %}
7559 ins_pipe(ialu_reg_reg_fat);
7560 %}
7561
7562 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7563 %{
7564 match(Set dst mem);
7565
7566 ins_cost(110);
7567 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7568 ins_encode %{
7569 __ leaq($dst$$Register, $mem$$Address);
7570 %}
7571 ins_pipe(ialu_reg_reg_fat);
7572 %}
7573
7574 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7575 %{
7576 match(Set dst mem);
7577
7578 ins_cost(110);
7579 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7580 ins_encode %{
7581 __ leaq($dst$$Register, $mem$$Address);
7582 %}
7583 ins_pipe(ialu_reg_reg_fat);
7584 %}
7585
7586 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7587 %{
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 // Load Effective Address which uses Narrow (32-bits) oop
7599 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7600 %{
7601 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7602 match(Set dst mem);
7603
7604 ins_cost(110);
7605 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7606 ins_encode %{
7607 __ leaq($dst$$Register, $mem$$Address);
7608 %}
7609 ins_pipe(ialu_reg_reg_fat);
7610 %}
7611
7612 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7613 %{
7614 predicate(CompressedOops::shift() == 0);
7615 match(Set dst mem);
7616
7617 ins_cost(110); // XXX
7618 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7619 ins_encode %{
7620 __ leaq($dst$$Register, $mem$$Address);
7621 %}
7622 ins_pipe(ialu_reg_reg_fat);
7623 %}
7624
7625 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7626 %{
7627 predicate(CompressedOops::shift() == 0);
7628 match(Set dst mem);
7629
7630 ins_cost(110);
7631 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7632 ins_encode %{
7633 __ leaq($dst$$Register, $mem$$Address);
7634 %}
7635 ins_pipe(ialu_reg_reg_fat);
7636 %}
7637
7638 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7639 %{
7640 predicate(CompressedOops::shift() == 0);
7641 match(Set dst mem);
7642
7643 ins_cost(110);
7644 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7645 ins_encode %{
7646 __ leaq($dst$$Register, $mem$$Address);
7647 %}
7648 ins_pipe(ialu_reg_reg_fat);
7649 %}
7650
7651 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7652 %{
7653 predicate(CompressedOops::shift() == 0);
7654 match(Set dst mem);
7655
7656 ins_cost(110);
7657 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7658 ins_encode %{
7659 __ leaq($dst$$Register, $mem$$Address);
7660 %}
7661 ins_pipe(ialu_reg_reg_fat);
7662 %}
7663
7664 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7665 %{
7666 predicate(CompressedOops::shift() == 0);
7667 match(Set dst mem);
7668
7669 ins_cost(110);
7670 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7671 ins_encode %{
7672 __ leaq($dst$$Register, $mem$$Address);
7673 %}
7674 ins_pipe(ialu_reg_reg_fat);
7675 %}
7676
7677 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7678 %{
7679 predicate(CompressedOops::shift() == 0);
7680 match(Set dst mem);
7681
7682 ins_cost(110);
7683 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7684 ins_encode %{
7685 __ leaq($dst$$Register, $mem$$Address);
7686 %}
7687 ins_pipe(ialu_reg_reg_fat);
7688 %}
7689
7690 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7691 %{
7692 predicate(CompressedOops::shift() == 0);
7693 match(Set dst mem);
7694
7695 ins_cost(110);
7696 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7697 ins_encode %{
7698 __ leaq($dst$$Register, $mem$$Address);
7699 %}
7700 ins_pipe(ialu_reg_reg_fat);
7701 %}
7702
7703 instruct loadConI(rRegI dst, immI src)
7704 %{
7705 match(Set dst src);
7706
7707 format %{ "movl $dst, $src\t# int" %}
7708 ins_encode %{
7709 __ movl($dst$$Register, $src$$constant);
7710 %}
7711 ins_pipe(ialu_reg_fat); // XXX
7712 %}
7713
7714 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7715 %{
7716 match(Set dst src);
7717 effect(KILL cr);
7718
7719 ins_cost(50);
7720 format %{ "xorl $dst, $dst\t# int" %}
7721 ins_encode %{
7722 __ xorl($dst$$Register, $dst$$Register);
7723 %}
7724 ins_pipe(ialu_reg);
7725 %}
7726
7727 instruct loadConL(rRegL dst, immL src)
7728 %{
7729 match(Set dst src);
7730
7731 ins_cost(150);
7732 format %{ "movq $dst, $src\t# long" %}
7733 ins_encode %{
7734 __ mov64($dst$$Register, $src$$constant);
7735 %}
7736 ins_pipe(ialu_reg);
7737 %}
7738
7739 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7740 %{
7741 match(Set dst src);
7742 effect(KILL cr);
7743
7744 ins_cost(50);
7745 format %{ "xorl $dst, $dst\t# long" %}
7746 ins_encode %{
7747 __ xorl($dst$$Register, $dst$$Register);
7748 %}
7749 ins_pipe(ialu_reg); // XXX
7750 %}
7751
7752 instruct loadConUL32(rRegL dst, immUL32 src)
7753 %{
7754 match(Set dst src);
7755
7756 ins_cost(60);
7757 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7758 ins_encode %{
7759 __ movl($dst$$Register, $src$$constant);
7760 %}
7761 ins_pipe(ialu_reg);
7762 %}
7763
7764 instruct loadConL32(rRegL dst, immL32 src)
7765 %{
7766 match(Set dst src);
7767
7768 ins_cost(70);
7769 format %{ "movq $dst, $src\t# long (32-bit)" %}
7770 ins_encode %{
7771 __ movq($dst$$Register, $src$$constant);
7772 %}
7773 ins_pipe(ialu_reg);
7774 %}
7775
7776 instruct loadConP(rRegP dst, immP con) %{
7777 match(Set dst con);
7778
7779 format %{ "movq $dst, $con\t# ptr" %}
7780 ins_encode %{
7781 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7782 %}
7783 ins_pipe(ialu_reg_fat); // XXX
7784 %}
7785
7786 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7787 %{
7788 match(Set dst src);
7789 effect(KILL cr);
7790
7791 ins_cost(50);
7792 format %{ "xorl $dst, $dst\t# ptr" %}
7793 ins_encode %{
7794 __ xorl($dst$$Register, $dst$$Register);
7795 %}
7796 ins_pipe(ialu_reg);
7797 %}
7798
7799 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7800 %{
7801 match(Set dst src);
7802 effect(KILL cr);
7803
7804 ins_cost(60);
7805 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7806 ins_encode %{
7807 __ movl($dst$$Register, $src$$constant);
7808 %}
7809 ins_pipe(ialu_reg);
7810 %}
7811
7812 instruct loadConF(regF dst, immF con) %{
7813 match(Set dst con);
7814 ins_cost(125);
7815 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7816 ins_encode %{
7817 __ movflt($dst$$XMMRegister, $constantaddress($con));
7818 %}
7819 ins_pipe(pipe_slow);
7820 %}
7821
7822 instruct loadConH(regF dst, immH con) %{
7823 match(Set dst con);
7824 ins_cost(125);
7825 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7826 ins_encode %{
7827 __ movflt($dst$$XMMRegister, $constantaddress($con));
7828 %}
7829 ins_pipe(pipe_slow);
7830 %}
7831
7832 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7833 match(Set dst src);
7834 effect(KILL cr);
7835 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7836 ins_encode %{
7837 __ xorq($dst$$Register, $dst$$Register);
7838 %}
7839 ins_pipe(ialu_reg);
7840 %}
7841
7842 instruct loadConN(rRegN dst, immN src) %{
7843 match(Set dst src);
7844
7845 ins_cost(125);
7846 format %{ "movl $dst, $src\t# compressed ptr" %}
7847 ins_encode %{
7848 address con = (address)$src$$constant;
7849 if (con == nullptr) {
7850 ShouldNotReachHere();
7851 } else {
7852 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7853 }
7854 %}
7855 ins_pipe(ialu_reg_fat); // XXX
7856 %}
7857
7858 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7859 match(Set dst src);
7860
7861 ins_cost(125);
7862 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7863 ins_encode %{
7864 address con = (address)$src$$constant;
7865 if (con == nullptr) {
7866 ShouldNotReachHere();
7867 } else {
7868 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7869 }
7870 %}
7871 ins_pipe(ialu_reg_fat); // XXX
7872 %}
7873
7874 instruct loadConF0(regF dst, immF0 src)
7875 %{
7876 match(Set dst src);
7877 ins_cost(100);
7878
7879 format %{ "xorps $dst, $dst\t# float 0.0" %}
7880 ins_encode %{
7881 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7882 %}
7883 ins_pipe(pipe_slow);
7884 %}
7885
7886 // Use the same format since predicate() can not be used here.
7887 instruct loadConD(regD dst, immD con) %{
7888 match(Set dst con);
7889 ins_cost(125);
7890 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7891 ins_encode %{
7892 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7893 %}
7894 ins_pipe(pipe_slow);
7895 %}
7896
7897 instruct loadConD0(regD dst, immD0 src)
7898 %{
7899 match(Set dst src);
7900 ins_cost(100);
7901
7902 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7903 ins_encode %{
7904 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7905 %}
7906 ins_pipe(pipe_slow);
7907 %}
7908
7909 instruct loadSSI(rRegI dst, stackSlotI src)
7910 %{
7911 match(Set dst src);
7912
7913 ins_cost(125);
7914 format %{ "movl $dst, $src\t# int stk" %}
7915 ins_encode %{
7916 __ movl($dst$$Register, $src$$Address);
7917 %}
7918 ins_pipe(ialu_reg_mem);
7919 %}
7920
7921 instruct loadSSL(rRegL dst, stackSlotL src)
7922 %{
7923 match(Set dst src);
7924
7925 ins_cost(125);
7926 format %{ "movq $dst, $src\t# long stk" %}
7927 ins_encode %{
7928 __ movq($dst$$Register, $src$$Address);
7929 %}
7930 ins_pipe(ialu_reg_mem);
7931 %}
7932
7933 instruct loadSSP(rRegP dst, stackSlotP src)
7934 %{
7935 match(Set dst src);
7936
7937 ins_cost(125);
7938 format %{ "movq $dst, $src\t# ptr stk" %}
7939 ins_encode %{
7940 __ movq($dst$$Register, $src$$Address);
7941 %}
7942 ins_pipe(ialu_reg_mem);
7943 %}
7944
7945 instruct loadSSF(regF dst, stackSlotF src)
7946 %{
7947 match(Set dst src);
7948
7949 ins_cost(125);
7950 format %{ "movss $dst, $src\t# float stk" %}
7951 ins_encode %{
7952 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7953 %}
7954 ins_pipe(pipe_slow); // XXX
7955 %}
7956
7957 // Use the same format since predicate() can not be used here.
7958 instruct loadSSD(regD dst, stackSlotD src)
7959 %{
7960 match(Set dst src);
7961
7962 ins_cost(125);
7963 format %{ "movsd $dst, $src\t# double stk" %}
7964 ins_encode %{
7965 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7966 %}
7967 ins_pipe(pipe_slow); // XXX
7968 %}
7969
7970 // Prefetch instructions for allocation.
7971 // Must be safe to execute with invalid address (cannot fault).
7972
7973 instruct prefetchAlloc( memory mem ) %{
7974 predicate(AllocatePrefetchInstr==3);
7975 match(PrefetchAllocation mem);
7976 ins_cost(125);
7977
7978 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7979 ins_encode %{
7980 __ prefetchw($mem$$Address);
7981 %}
7982 ins_pipe(ialu_mem);
7983 %}
7984
7985 instruct prefetchAllocNTA( memory mem ) %{
7986 predicate(AllocatePrefetchInstr==0);
7987 match(PrefetchAllocation mem);
7988 ins_cost(125);
7989
7990 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7991 ins_encode %{
7992 __ prefetchnta($mem$$Address);
7993 %}
7994 ins_pipe(ialu_mem);
7995 %}
7996
7997 instruct prefetchAllocT0( memory mem ) %{
7998 predicate(AllocatePrefetchInstr==1);
7999 match(PrefetchAllocation mem);
8000 ins_cost(125);
8001
8002 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8003 ins_encode %{
8004 __ prefetcht0($mem$$Address);
8005 %}
8006 ins_pipe(ialu_mem);
8007 %}
8008
8009 instruct prefetchAllocT2( memory mem ) %{
8010 predicate(AllocatePrefetchInstr==2);
8011 match(PrefetchAllocation mem);
8012 ins_cost(125);
8013
8014 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8015 ins_encode %{
8016 __ prefetcht2($mem$$Address);
8017 %}
8018 ins_pipe(ialu_mem);
8019 %}
8020
8021 //----------Store Instructions-------------------------------------------------
8022
8023 // Store Byte
8024 instruct storeB(memory mem, rRegI src)
8025 %{
8026 match(Set mem (StoreB mem src));
8027
8028 ins_cost(125); // XXX
8029 format %{ "movb $mem, $src\t# byte" %}
8030 ins_encode %{
8031 __ movb($mem$$Address, $src$$Register);
8032 %}
8033 ins_pipe(ialu_mem_reg);
8034 %}
8035
8036 // Store Char/Short
8037 instruct storeC(memory mem, rRegI src)
8038 %{
8039 match(Set mem (StoreC mem src));
8040
8041 ins_cost(125); // XXX
8042 format %{ "movw $mem, $src\t# char/short" %}
8043 ins_encode %{
8044 __ movw($mem$$Address, $src$$Register);
8045 %}
8046 ins_pipe(ialu_mem_reg);
8047 %}
8048
8049 // Store Integer
8050 instruct storeI(memory mem, rRegI src)
8051 %{
8052 match(Set mem (StoreI mem src));
8053
8054 ins_cost(125); // XXX
8055 format %{ "movl $mem, $src\t# int" %}
8056 ins_encode %{
8057 __ movl($mem$$Address, $src$$Register);
8058 %}
8059 ins_pipe(ialu_mem_reg);
8060 %}
8061
8062 // Store Long
8063 instruct storeL(memory mem, rRegL src)
8064 %{
8065 match(Set mem (StoreL mem src));
8066
8067 ins_cost(125); // XXX
8068 format %{ "movq $mem, $src\t# long" %}
8069 ins_encode %{
8070 __ movq($mem$$Address, $src$$Register);
8071 %}
8072 ins_pipe(ialu_mem_reg); // XXX
8073 %}
8074
8075 // Store Pointer
8076 instruct storeP(memory mem, any_RegP src)
8077 %{
8078 predicate(n->as_Store()->barrier_data() == 0);
8079 match(Set mem (StoreP mem src));
8080
8081 ins_cost(125); // XXX
8082 format %{ "movq $mem, $src\t# ptr" %}
8083 ins_encode %{
8084 __ movq($mem$$Address, $src$$Register);
8085 %}
8086 ins_pipe(ialu_mem_reg);
8087 %}
8088
8089 instruct storeImmP0(memory mem, immP0 zero)
8090 %{
8091 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8092 match(Set mem (StoreP mem zero));
8093
8094 ins_cost(125); // XXX
8095 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8096 ins_encode %{
8097 __ movq($mem$$Address, r12);
8098 %}
8099 ins_pipe(ialu_mem_reg);
8100 %}
8101
8102 // Store Null Pointer, mark word, or other simple pointer constant.
8103 instruct storeImmP(memory mem, immP31 src)
8104 %{
8105 predicate(n->as_Store()->barrier_data() == 0);
8106 match(Set mem (StoreP mem src));
8107
8108 ins_cost(150); // XXX
8109 format %{ "movq $mem, $src\t# ptr" %}
8110 ins_encode %{
8111 __ movq($mem$$Address, $src$$constant);
8112 %}
8113 ins_pipe(ialu_mem_imm);
8114 %}
8115
8116 // Store Compressed Pointer
8117 instruct storeN(memory mem, rRegN src)
8118 %{
8119 predicate(n->as_Store()->barrier_data() == 0);
8120 match(Set mem (StoreN mem src));
8121
8122 ins_cost(125); // XXX
8123 format %{ "movl $mem, $src\t# compressed ptr" %}
8124 ins_encode %{
8125 __ movl($mem$$Address, $src$$Register);
8126 %}
8127 ins_pipe(ialu_mem_reg);
8128 %}
8129
8130 instruct storeNKlass(memory mem, rRegN src)
8131 %{
8132 match(Set mem (StoreNKlass mem src));
8133
8134 ins_cost(125); // XXX
8135 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8136 ins_encode %{
8137 __ movl($mem$$Address, $src$$Register);
8138 %}
8139 ins_pipe(ialu_mem_reg);
8140 %}
8141
8142 instruct storeImmN0(memory mem, immN0 zero)
8143 %{
8144 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8145 match(Set mem (StoreN mem zero));
8146
8147 ins_cost(125); // XXX
8148 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8149 ins_encode %{
8150 __ movl($mem$$Address, r12);
8151 %}
8152 ins_pipe(ialu_mem_reg);
8153 %}
8154
8155 instruct storeImmN(memory mem, immN src)
8156 %{
8157 predicate(n->as_Store()->barrier_data() == 0);
8158 match(Set mem (StoreN mem src));
8159
8160 ins_cost(150); // XXX
8161 format %{ "movl $mem, $src\t# compressed ptr" %}
8162 ins_encode %{
8163 address con = (address)$src$$constant;
8164 if (con == nullptr) {
8165 __ movl($mem$$Address, 0);
8166 } else {
8167 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8168 }
8169 %}
8170 ins_pipe(ialu_mem_imm);
8171 %}
8172
8173 instruct storeImmNKlass(memory mem, immNKlass src)
8174 %{
8175 match(Set mem (StoreNKlass mem src));
8176
8177 ins_cost(150); // XXX
8178 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8179 ins_encode %{
8180 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8181 %}
8182 ins_pipe(ialu_mem_imm);
8183 %}
8184
8185 // Store Integer Immediate
8186 instruct storeImmI0(memory mem, immI_0 zero)
8187 %{
8188 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8189 match(Set mem (StoreI mem zero));
8190
8191 ins_cost(125); // XXX
8192 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8193 ins_encode %{
8194 __ movl($mem$$Address, r12);
8195 %}
8196 ins_pipe(ialu_mem_reg);
8197 %}
8198
8199 instruct storeImmI(memory mem, immI src)
8200 %{
8201 match(Set mem (StoreI mem src));
8202
8203 ins_cost(150);
8204 format %{ "movl $mem, $src\t# int" %}
8205 ins_encode %{
8206 __ movl($mem$$Address, $src$$constant);
8207 %}
8208 ins_pipe(ialu_mem_imm);
8209 %}
8210
8211 // Store Long Immediate
8212 instruct storeImmL0(memory mem, immL0 zero)
8213 %{
8214 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8215 match(Set mem (StoreL mem zero));
8216
8217 ins_cost(125); // XXX
8218 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8219 ins_encode %{
8220 __ movq($mem$$Address, r12);
8221 %}
8222 ins_pipe(ialu_mem_reg);
8223 %}
8224
8225 instruct storeImmL(memory mem, immL32 src)
8226 %{
8227 match(Set mem (StoreL mem src));
8228
8229 ins_cost(150);
8230 format %{ "movq $mem, $src\t# long" %}
8231 ins_encode %{
8232 __ movq($mem$$Address, $src$$constant);
8233 %}
8234 ins_pipe(ialu_mem_imm);
8235 %}
8236
8237 // Store Short/Char Immediate
8238 instruct storeImmC0(memory mem, immI_0 zero)
8239 %{
8240 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8241 match(Set mem (StoreC mem zero));
8242
8243 ins_cost(125); // XXX
8244 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8245 ins_encode %{
8246 __ movw($mem$$Address, r12);
8247 %}
8248 ins_pipe(ialu_mem_reg);
8249 %}
8250
8251 instruct storeImmI16(memory mem, immI16 src)
8252 %{
8253 predicate(UseStoreImmI16);
8254 match(Set mem (StoreC mem src));
8255
8256 ins_cost(150);
8257 format %{ "movw $mem, $src\t# short/char" %}
8258 ins_encode %{
8259 __ movw($mem$$Address, $src$$constant);
8260 %}
8261 ins_pipe(ialu_mem_imm);
8262 %}
8263
8264 // Store Byte Immediate
8265 instruct storeImmB0(memory mem, immI_0 zero)
8266 %{
8267 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8268 match(Set mem (StoreB mem zero));
8269
8270 ins_cost(125); // XXX
8271 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8272 ins_encode %{
8273 __ movb($mem$$Address, r12);
8274 %}
8275 ins_pipe(ialu_mem_reg);
8276 %}
8277
8278 instruct storeImmB(memory mem, immI8 src)
8279 %{
8280 match(Set mem (StoreB mem src));
8281
8282 ins_cost(150); // XXX
8283 format %{ "movb $mem, $src\t# byte" %}
8284 ins_encode %{
8285 __ movb($mem$$Address, $src$$constant);
8286 %}
8287 ins_pipe(ialu_mem_imm);
8288 %}
8289
8290 // Store Float
8291 instruct storeF(memory mem, regF src)
8292 %{
8293 match(Set mem (StoreF mem src));
8294
8295 ins_cost(95); // XXX
8296 format %{ "movss $mem, $src\t# float" %}
8297 ins_encode %{
8298 __ movflt($mem$$Address, $src$$XMMRegister);
8299 %}
8300 ins_pipe(pipe_slow); // XXX
8301 %}
8302
8303 // Store immediate Float value (it is faster than store from XMM register)
8304 instruct storeF0(memory mem, immF0 zero)
8305 %{
8306 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8307 match(Set mem (StoreF mem zero));
8308
8309 ins_cost(25); // XXX
8310 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8311 ins_encode %{
8312 __ movl($mem$$Address, r12);
8313 %}
8314 ins_pipe(ialu_mem_reg);
8315 %}
8316
8317 instruct storeF_imm(memory mem, immF src)
8318 %{
8319 match(Set mem (StoreF mem src));
8320
8321 ins_cost(50);
8322 format %{ "movl $mem, $src\t# float" %}
8323 ins_encode %{
8324 __ movl($mem$$Address, jint_cast($src$$constant));
8325 %}
8326 ins_pipe(ialu_mem_imm);
8327 %}
8328
8329 // Store Double
8330 instruct storeD(memory mem, regD src)
8331 %{
8332 match(Set mem (StoreD mem src));
8333
8334 ins_cost(95); // XXX
8335 format %{ "movsd $mem, $src\t# double" %}
8336 ins_encode %{
8337 __ movdbl($mem$$Address, $src$$XMMRegister);
8338 %}
8339 ins_pipe(pipe_slow); // XXX
8340 %}
8341
8342 // Store immediate double 0.0 (it is faster than store from XMM register)
8343 instruct storeD0_imm(memory mem, immD0 src)
8344 %{
8345 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8346 match(Set mem (StoreD mem src));
8347
8348 ins_cost(50);
8349 format %{ "movq $mem, $src\t# double 0." %}
8350 ins_encode %{
8351 __ movq($mem$$Address, $src$$constant);
8352 %}
8353 ins_pipe(ialu_mem_imm);
8354 %}
8355
8356 instruct storeD0(memory mem, immD0 zero)
8357 %{
8358 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8359 match(Set mem (StoreD mem zero));
8360
8361 ins_cost(25); // XXX
8362 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8363 ins_encode %{
8364 __ movq($mem$$Address, r12);
8365 %}
8366 ins_pipe(ialu_mem_reg);
8367 %}
8368
8369 instruct storeSSI(stackSlotI dst, rRegI src)
8370 %{
8371 match(Set dst src);
8372
8373 ins_cost(100);
8374 format %{ "movl $dst, $src\t# int stk" %}
8375 ins_encode %{
8376 __ movl($dst$$Address, $src$$Register);
8377 %}
8378 ins_pipe( ialu_mem_reg );
8379 %}
8380
8381 instruct storeSSL(stackSlotL dst, rRegL src)
8382 %{
8383 match(Set dst src);
8384
8385 ins_cost(100);
8386 format %{ "movq $dst, $src\t# long stk" %}
8387 ins_encode %{
8388 __ movq($dst$$Address, $src$$Register);
8389 %}
8390 ins_pipe(ialu_mem_reg);
8391 %}
8392
8393 instruct storeSSP(stackSlotP dst, rRegP src)
8394 %{
8395 match(Set dst src);
8396
8397 ins_cost(100);
8398 format %{ "movq $dst, $src\t# ptr stk" %}
8399 ins_encode %{
8400 __ movq($dst$$Address, $src$$Register);
8401 %}
8402 ins_pipe(ialu_mem_reg);
8403 %}
8404
8405 instruct storeSSF(stackSlotF dst, regF src)
8406 %{
8407 match(Set dst src);
8408
8409 ins_cost(95); // XXX
8410 format %{ "movss $dst, $src\t# float stk" %}
8411 ins_encode %{
8412 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8413 %}
8414 ins_pipe(pipe_slow); // XXX
8415 %}
8416
8417 instruct storeSSD(stackSlotD dst, regD src)
8418 %{
8419 match(Set dst src);
8420
8421 ins_cost(95); // XXX
8422 format %{ "movsd $dst, $src\t# double stk" %}
8423 ins_encode %{
8424 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8425 %}
8426 ins_pipe(pipe_slow); // XXX
8427 %}
8428
8429 instruct cacheWB(indirect addr)
8430 %{
8431 predicate(VM_Version::supports_data_cache_line_flush());
8432 match(CacheWB addr);
8433
8434 ins_cost(100);
8435 format %{"cache wb $addr" %}
8436 ins_encode %{
8437 assert($addr->index_position() < 0, "should be");
8438 assert($addr$$disp == 0, "should be");
8439 __ cache_wb(Address($addr$$base$$Register, 0));
8440 %}
8441 ins_pipe(pipe_slow); // XXX
8442 %}
8443
8444 instruct cacheWBPreSync()
8445 %{
8446 predicate(VM_Version::supports_data_cache_line_flush());
8447 match(CacheWBPreSync);
8448
8449 ins_cost(100);
8450 format %{"cache wb presync" %}
8451 ins_encode %{
8452 __ cache_wbsync(true);
8453 %}
8454 ins_pipe(pipe_slow); // XXX
8455 %}
8456
8457 instruct cacheWBPostSync()
8458 %{
8459 predicate(VM_Version::supports_data_cache_line_flush());
8460 match(CacheWBPostSync);
8461
8462 ins_cost(100);
8463 format %{"cache wb postsync" %}
8464 ins_encode %{
8465 __ cache_wbsync(false);
8466 %}
8467 ins_pipe(pipe_slow); // XXX
8468 %}
8469
8470 //----------BSWAP Instructions-------------------------------------------------
8471 instruct bytes_reverse_int(rRegI dst) %{
8472 match(Set dst (ReverseBytesI dst));
8473
8474 format %{ "bswapl $dst" %}
8475 ins_encode %{
8476 __ bswapl($dst$$Register);
8477 %}
8478 ins_pipe( ialu_reg );
8479 %}
8480
8481 instruct bytes_reverse_long(rRegL dst) %{
8482 match(Set dst (ReverseBytesL dst));
8483
8484 format %{ "bswapq $dst" %}
8485 ins_encode %{
8486 __ bswapq($dst$$Register);
8487 %}
8488 ins_pipe( ialu_reg);
8489 %}
8490
8491 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8492 match(Set dst (ReverseBytesUS dst));
8493 effect(KILL cr);
8494
8495 format %{ "bswapl $dst\n\t"
8496 "shrl $dst,16\n\t" %}
8497 ins_encode %{
8498 __ bswapl($dst$$Register);
8499 __ shrl($dst$$Register, 16);
8500 %}
8501 ins_pipe( ialu_reg );
8502 %}
8503
8504 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8505 match(Set dst (ReverseBytesS dst));
8506 effect(KILL cr);
8507
8508 format %{ "bswapl $dst\n\t"
8509 "sar $dst,16\n\t" %}
8510 ins_encode %{
8511 __ bswapl($dst$$Register);
8512 __ sarl($dst$$Register, 16);
8513 %}
8514 ins_pipe( ialu_reg );
8515 %}
8516
8517 //---------- Zeros Count Instructions ------------------------------------------
8518
8519 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8520 predicate(UseCountLeadingZerosInstruction);
8521 match(Set dst (CountLeadingZerosI src));
8522 effect(KILL cr);
8523
8524 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8525 ins_encode %{
8526 __ lzcntl($dst$$Register, $src$$Register);
8527 %}
8528 ins_pipe(ialu_reg);
8529 %}
8530
8531 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8532 predicate(UseCountLeadingZerosInstruction);
8533 match(Set dst (CountLeadingZerosI (LoadI src)));
8534 effect(KILL cr);
8535 ins_cost(175);
8536 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8537 ins_encode %{
8538 __ lzcntl($dst$$Register, $src$$Address);
8539 %}
8540 ins_pipe(ialu_reg_mem);
8541 %}
8542
8543 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8544 predicate(!UseCountLeadingZerosInstruction);
8545 match(Set dst (CountLeadingZerosI src));
8546 effect(KILL cr);
8547
8548 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8549 "jnz skip\n\t"
8550 "movl $dst, -1\n"
8551 "skip:\n\t"
8552 "negl $dst\n\t"
8553 "addl $dst, 31" %}
8554 ins_encode %{
8555 Register Rdst = $dst$$Register;
8556 Register Rsrc = $src$$Register;
8557 Label skip;
8558 __ bsrl(Rdst, Rsrc);
8559 __ jccb(Assembler::notZero, skip);
8560 __ movl(Rdst, -1);
8561 __ bind(skip);
8562 __ negl(Rdst);
8563 __ addl(Rdst, BitsPerInt - 1);
8564 %}
8565 ins_pipe(ialu_reg);
8566 %}
8567
8568 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8569 predicate(UseCountLeadingZerosInstruction);
8570 match(Set dst (CountLeadingZerosL src));
8571 effect(KILL cr);
8572
8573 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8574 ins_encode %{
8575 __ lzcntq($dst$$Register, $src$$Register);
8576 %}
8577 ins_pipe(ialu_reg);
8578 %}
8579
8580 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8581 predicate(UseCountLeadingZerosInstruction);
8582 match(Set dst (CountLeadingZerosL (LoadL src)));
8583 effect(KILL cr);
8584 ins_cost(175);
8585 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8586 ins_encode %{
8587 __ lzcntq($dst$$Register, $src$$Address);
8588 %}
8589 ins_pipe(ialu_reg_mem);
8590 %}
8591
8592 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8593 predicate(!UseCountLeadingZerosInstruction);
8594 match(Set dst (CountLeadingZerosL src));
8595 effect(KILL cr);
8596
8597 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8598 "jnz skip\n\t"
8599 "movl $dst, -1\n"
8600 "skip:\n\t"
8601 "negl $dst\n\t"
8602 "addl $dst, 63" %}
8603 ins_encode %{
8604 Register Rdst = $dst$$Register;
8605 Register Rsrc = $src$$Register;
8606 Label skip;
8607 __ bsrq(Rdst, Rsrc);
8608 __ jccb(Assembler::notZero, skip);
8609 __ movl(Rdst, -1);
8610 __ bind(skip);
8611 __ negl(Rdst);
8612 __ addl(Rdst, BitsPerLong - 1);
8613 %}
8614 ins_pipe(ialu_reg);
8615 %}
8616
8617 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8618 predicate(UseCountTrailingZerosInstruction);
8619 match(Set dst (CountTrailingZerosI src));
8620 effect(KILL cr);
8621
8622 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8623 ins_encode %{
8624 __ tzcntl($dst$$Register, $src$$Register);
8625 %}
8626 ins_pipe(ialu_reg);
8627 %}
8628
8629 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8630 predicate(UseCountTrailingZerosInstruction);
8631 match(Set dst (CountTrailingZerosI (LoadI src)));
8632 effect(KILL cr);
8633 ins_cost(175);
8634 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8635 ins_encode %{
8636 __ tzcntl($dst$$Register, $src$$Address);
8637 %}
8638 ins_pipe(ialu_reg_mem);
8639 %}
8640
8641 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8642 predicate(!UseCountTrailingZerosInstruction);
8643 match(Set dst (CountTrailingZerosI src));
8644 effect(KILL cr);
8645
8646 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8647 "jnz done\n\t"
8648 "movl $dst, 32\n"
8649 "done:" %}
8650 ins_encode %{
8651 Register Rdst = $dst$$Register;
8652 Label done;
8653 __ bsfl(Rdst, $src$$Register);
8654 __ jccb(Assembler::notZero, done);
8655 __ movl(Rdst, BitsPerInt);
8656 __ bind(done);
8657 %}
8658 ins_pipe(ialu_reg);
8659 %}
8660
8661 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8662 predicate(UseCountTrailingZerosInstruction);
8663 match(Set dst (CountTrailingZerosL src));
8664 effect(KILL cr);
8665
8666 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8667 ins_encode %{
8668 __ tzcntq($dst$$Register, $src$$Register);
8669 %}
8670 ins_pipe(ialu_reg);
8671 %}
8672
8673 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8674 predicate(UseCountTrailingZerosInstruction);
8675 match(Set dst (CountTrailingZerosL (LoadL src)));
8676 effect(KILL cr);
8677 ins_cost(175);
8678 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8679 ins_encode %{
8680 __ tzcntq($dst$$Register, $src$$Address);
8681 %}
8682 ins_pipe(ialu_reg_mem);
8683 %}
8684
8685 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8686 predicate(!UseCountTrailingZerosInstruction);
8687 match(Set dst (CountTrailingZerosL src));
8688 effect(KILL cr);
8689
8690 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8691 "jnz done\n\t"
8692 "movl $dst, 64\n"
8693 "done:" %}
8694 ins_encode %{
8695 Register Rdst = $dst$$Register;
8696 Label done;
8697 __ bsfq(Rdst, $src$$Register);
8698 __ jccb(Assembler::notZero, done);
8699 __ movl(Rdst, BitsPerLong);
8700 __ bind(done);
8701 %}
8702 ins_pipe(ialu_reg);
8703 %}
8704
8705 //--------------- Reverse Operation Instructions ----------------
8706 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8707 predicate(!VM_Version::supports_gfni());
8708 match(Set dst (ReverseI src));
8709 effect(TEMP dst, TEMP rtmp, KILL cr);
8710 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8711 ins_encode %{
8712 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8713 %}
8714 ins_pipe( ialu_reg );
8715 %}
8716
8717 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8718 predicate(VM_Version::supports_gfni());
8719 match(Set dst (ReverseI src));
8720 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8721 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8722 ins_encode %{
8723 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8724 %}
8725 ins_pipe( ialu_reg );
8726 %}
8727
8728 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8729 predicate(!VM_Version::supports_gfni());
8730 match(Set dst (ReverseL src));
8731 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8732 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8733 ins_encode %{
8734 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8735 %}
8736 ins_pipe( ialu_reg );
8737 %}
8738
8739 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8740 predicate(VM_Version::supports_gfni());
8741 match(Set dst (ReverseL src));
8742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8743 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8744 ins_encode %{
8745 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8746 %}
8747 ins_pipe( ialu_reg );
8748 %}
8749
8750 //---------- Population Count Instructions -------------------------------------
8751
8752 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8753 predicate(UsePopCountInstruction);
8754 match(Set dst (PopCountI src));
8755 effect(KILL cr);
8756
8757 format %{ "popcnt $dst, $src" %}
8758 ins_encode %{
8759 __ popcntl($dst$$Register, $src$$Register);
8760 %}
8761 ins_pipe(ialu_reg);
8762 %}
8763
8764 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8765 predicate(UsePopCountInstruction);
8766 match(Set dst (PopCountI (LoadI mem)));
8767 effect(KILL cr);
8768
8769 format %{ "popcnt $dst, $mem" %}
8770 ins_encode %{
8771 __ popcntl($dst$$Register, $mem$$Address);
8772 %}
8773 ins_pipe(ialu_reg);
8774 %}
8775
8776 // Note: Long.bitCount(long) returns an int.
8777 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8778 predicate(UsePopCountInstruction);
8779 match(Set dst (PopCountL src));
8780 effect(KILL cr);
8781
8782 format %{ "popcnt $dst, $src" %}
8783 ins_encode %{
8784 __ popcntq($dst$$Register, $src$$Register);
8785 %}
8786 ins_pipe(ialu_reg);
8787 %}
8788
8789 // Note: Long.bitCount(long) returns an int.
8790 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8791 predicate(UsePopCountInstruction);
8792 match(Set dst (PopCountL (LoadL mem)));
8793 effect(KILL cr);
8794
8795 format %{ "popcnt $dst, $mem" %}
8796 ins_encode %{
8797 __ popcntq($dst$$Register, $mem$$Address);
8798 %}
8799 ins_pipe(ialu_reg);
8800 %}
8801
8802
8803 //----------MemBar Instructions-----------------------------------------------
8804 // Memory barrier flavors
8805
8806 instruct membar_acquire()
8807 %{
8808 match(MemBarAcquire);
8809 match(LoadFence);
8810 ins_cost(0);
8811
8812 size(0);
8813 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8814 ins_encode();
8815 ins_pipe(empty);
8816 %}
8817
8818 instruct membar_acquire_lock()
8819 %{
8820 match(MemBarAcquireLock);
8821 ins_cost(0);
8822
8823 size(0);
8824 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8825 ins_encode();
8826 ins_pipe(empty);
8827 %}
8828
8829 instruct membar_release()
8830 %{
8831 match(MemBarRelease);
8832 match(StoreFence);
8833 ins_cost(0);
8834
8835 size(0);
8836 format %{ "MEMBAR-release ! (empty encoding)" %}
8837 ins_encode();
8838 ins_pipe(empty);
8839 %}
8840
8841 instruct membar_release_lock()
8842 %{
8843 match(MemBarReleaseLock);
8844 ins_cost(0);
8845
8846 size(0);
8847 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8848 ins_encode();
8849 ins_pipe(empty);
8850 %}
8851
8852 instruct membar_storeload(rFlagsReg cr) %{
8853 match(MemBarStoreLoad);
8854 effect(KILL cr);
8855 ins_cost(400);
8856
8857 format %{
8858 $$template
8859 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8860 %}
8861 ins_encode %{
8862 __ membar(Assembler::StoreLoad);
8863 %}
8864 ins_pipe(pipe_slow);
8865 %}
8866
8867 instruct membar_volatile(rFlagsReg cr) %{
8868 match(MemBarVolatile);
8869 effect(KILL cr);
8870 ins_cost(400);
8871
8872 format %{
8873 $$template
8874 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8875 %}
8876 ins_encode %{
8877 __ membar(Assembler::StoreLoad);
8878 %}
8879 ins_pipe(pipe_slow);
8880 %}
8881
8882 instruct unnecessary_membar_volatile()
8883 %{
8884 match(MemBarVolatile);
8885 predicate(Matcher::post_store_load_barrier(n));
8886 ins_cost(0);
8887
8888 size(0);
8889 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8890 ins_encode();
8891 ins_pipe(empty);
8892 %}
8893
8894 instruct membar_full(rFlagsReg cr) %{
8895 match(MemBarFull);
8896 effect(KILL cr);
8897 ins_cost(400);
8898
8899 format %{
8900 $$template
8901 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8902 %}
8903 ins_encode %{
8904 __ membar(Assembler::StoreLoad);
8905 %}
8906 ins_pipe(pipe_slow);
8907 %}
8908
8909 instruct membar_storestore() %{
8910 match(MemBarStoreStore);
8911 match(StoreStoreFence);
8912 ins_cost(0);
8913
8914 size(0);
8915 format %{ "MEMBAR-storestore (empty encoding)" %}
8916 ins_encode( );
8917 ins_pipe(empty);
8918 %}
8919
8920 //----------Move Instructions--------------------------------------------------
8921
8922 instruct castX2P(rRegP dst, rRegL src)
8923 %{
8924 match(Set dst (CastX2P src));
8925
8926 format %{ "movq $dst, $src\t# long->ptr" %}
8927 ins_encode %{
8928 if ($dst$$reg != $src$$reg) {
8929 __ movptr($dst$$Register, $src$$Register);
8930 }
8931 %}
8932 ins_pipe(ialu_reg_reg); // XXX
8933 %}
8934
8935 instruct castP2X(rRegL dst, rRegP src)
8936 %{
8937 match(Set dst (CastP2X src));
8938
8939 format %{ "movq $dst, $src\t# ptr -> long" %}
8940 ins_encode %{
8941 if ($dst$$reg != $src$$reg) {
8942 __ movptr($dst$$Register, $src$$Register);
8943 }
8944 %}
8945 ins_pipe(ialu_reg_reg); // XXX
8946 %}
8947
8948 // Convert oop into int for vectors alignment masking
8949 instruct convP2I(rRegI dst, rRegP src)
8950 %{
8951 match(Set dst (ConvL2I (CastP2X src)));
8952
8953 format %{ "movl $dst, $src\t# ptr -> int" %}
8954 ins_encode %{
8955 __ movl($dst$$Register, $src$$Register);
8956 %}
8957 ins_pipe(ialu_reg_reg); // XXX
8958 %}
8959
8960 // Convert compressed oop into int for vectors alignment masking
8961 // in case of 32bit oops (heap < 4Gb).
8962 instruct convN2I(rRegI dst, rRegN src)
8963 %{
8964 predicate(CompressedOops::shift() == 0);
8965 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8966
8967 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8968 ins_encode %{
8969 __ movl($dst$$Register, $src$$Register);
8970 %}
8971 ins_pipe(ialu_reg_reg); // XXX
8972 %}
8973
8974 // Convert oop pointer into compressed form
8975 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8976 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8977 match(Set dst (EncodeP src));
8978 effect(KILL cr);
8979 format %{ "encode_heap_oop $dst,$src" %}
8980 ins_encode %{
8981 Register s = $src$$Register;
8982 Register d = $dst$$Register;
8983 if (s != d) {
8984 __ movq(d, s);
8985 }
8986 __ encode_heap_oop(d);
8987 %}
8988 ins_pipe(ialu_reg_long);
8989 %}
8990
8991 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8992 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8993 match(Set dst (EncodeP src));
8994 effect(KILL cr);
8995 format %{ "encode_heap_oop_not_null $dst,$src" %}
8996 ins_encode %{
8997 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8998 %}
8999 ins_pipe(ialu_reg_long);
9000 %}
9001
9002 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9003 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9004 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9005 match(Set dst (DecodeN src));
9006 effect(KILL cr);
9007 format %{ "decode_heap_oop $dst,$src" %}
9008 ins_encode %{
9009 Register s = $src$$Register;
9010 Register d = $dst$$Register;
9011 if (s != d) {
9012 __ movq(d, s);
9013 }
9014 __ decode_heap_oop(d);
9015 %}
9016 ins_pipe(ialu_reg_long);
9017 %}
9018
9019 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9020 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9021 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9022 match(Set dst (DecodeN src));
9023 effect(KILL cr);
9024 format %{ "decode_heap_oop_not_null $dst,$src" %}
9025 ins_encode %{
9026 Register s = $src$$Register;
9027 Register d = $dst$$Register;
9028 if (s != d) {
9029 __ decode_heap_oop_not_null(d, s);
9030 } else {
9031 __ decode_heap_oop_not_null(d);
9032 }
9033 %}
9034 ins_pipe(ialu_reg_long);
9035 %}
9036
9037 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9038 match(Set dst (EncodePKlass src));
9039 effect(TEMP dst, KILL cr);
9040 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9041 ins_encode %{
9042 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9043 %}
9044 ins_pipe(ialu_reg_long);
9045 %}
9046
9047 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9048 match(Set dst (DecodeNKlass src));
9049 effect(TEMP dst, KILL cr);
9050 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9051 ins_encode %{
9052 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9053 %}
9054 ins_pipe(ialu_reg_long);
9055 %}
9056
9057 //----------Conditional Move---------------------------------------------------
9058 // Jump
9059 // dummy instruction for generating temp registers
9060 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9061 match(Jump (LShiftL switch_val shift));
9062 ins_cost(350);
9063 predicate(false);
9064 effect(TEMP dest);
9065
9066 format %{ "leaq $dest, [$constantaddress]\n\t"
9067 "jmp [$dest + $switch_val << $shift]\n\t" %}
9068 ins_encode %{
9069 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9070 // to do that and the compiler is using that register as one it can allocate.
9071 // So we build it all by hand.
9072 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9073 // ArrayAddress dispatch(table, index);
9074 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9075 __ lea($dest$$Register, $constantaddress);
9076 __ jmp(dispatch);
9077 %}
9078 ins_pipe(pipe_jmp);
9079 %}
9080
9081 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9082 match(Jump (AddL (LShiftL switch_val shift) offset));
9083 ins_cost(350);
9084 effect(TEMP dest);
9085
9086 format %{ "leaq $dest, [$constantaddress]\n\t"
9087 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9088 ins_encode %{
9089 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9090 // to do that and the compiler is using that register as one it can allocate.
9091 // So we build it all by hand.
9092 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9093 // ArrayAddress dispatch(table, index);
9094 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9095 __ lea($dest$$Register, $constantaddress);
9096 __ jmp(dispatch);
9097 %}
9098 ins_pipe(pipe_jmp);
9099 %}
9100
9101 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9102 match(Jump switch_val);
9103 ins_cost(350);
9104 effect(TEMP dest);
9105
9106 format %{ "leaq $dest, [$constantaddress]\n\t"
9107 "jmp [$dest + $switch_val]\n\t" %}
9108 ins_encode %{
9109 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9110 // to do that and the compiler is using that register as one it can allocate.
9111 // So we build it all by hand.
9112 // Address index(noreg, switch_reg, Address::times_1);
9113 // ArrayAddress dispatch(table, index);
9114 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9115 __ lea($dest$$Register, $constantaddress);
9116 __ jmp(dispatch);
9117 %}
9118 ins_pipe(pipe_jmp);
9119 %}
9120
9121 // Conditional move
9122 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9123 %{
9124 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9125 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9126
9127 ins_cost(100); // XXX
9128 format %{ "setbn$cop $dst\t# signed, int" %}
9129 ins_encode %{
9130 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9131 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9132 %}
9133 ins_pipe(ialu_reg);
9134 %}
9135
9136 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9137 %{
9138 predicate(!UseAPX);
9139 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9140
9141 ins_cost(200); // XXX
9142 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9143 ins_encode %{
9144 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9145 %}
9146 ins_pipe(pipe_cmov_reg);
9147 %}
9148
9149 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9150 %{
9151 predicate(UseAPX);
9152 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9153
9154 ins_cost(200);
9155 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9156 ins_encode %{
9157 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9158 %}
9159 ins_pipe(pipe_cmov_reg);
9160 %}
9161
9162 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9163 %{
9164 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9165 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9166
9167 ins_cost(100); // XXX
9168 format %{ "setbn$cop $dst\t# unsigned, int" %}
9169 ins_encode %{
9170 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9171 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9172 %}
9173 ins_pipe(ialu_reg);
9174 %}
9175
9176 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9177 predicate(!UseAPX);
9178 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9179
9180 ins_cost(200); // XXX
9181 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9182 ins_encode %{
9183 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9184 %}
9185 ins_pipe(pipe_cmov_reg);
9186 %}
9187
9188 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9189 predicate(UseAPX);
9190 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9191
9192 ins_cost(200);
9193 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9194 ins_encode %{
9195 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9196 %}
9197 ins_pipe(pipe_cmov_reg);
9198 %}
9199
9200 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9201 %{
9202 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9203 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9204
9205 ins_cost(100); // XXX
9206 format %{ "setbn$cop $dst\t# unsigned, int" %}
9207 ins_encode %{
9208 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9209 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9210 %}
9211 ins_pipe(ialu_reg);
9212 %}
9213
9214 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9215 %{
9216 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9217 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9218
9219 ins_cost(100); // XXX
9220 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9221 ins_encode %{
9222 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9223 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9224 %}
9225 ins_pipe(ialu_reg);
9226 %}
9227
9228 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9229 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9230
9231 ins_cost(200);
9232 expand %{
9233 cmovI_regU(cop, cr, dst, src);
9234 %}
9235 %}
9236
9237 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9238 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9239
9240 ins_cost(200);
9241 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9242 ins_encode %{
9243 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9244 %}
9245 ins_pipe(pipe_cmov_reg);
9246 %}
9247
9248 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9249 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9250 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9251
9252 ins_cost(200); // XXX
9253 format %{ "cmovpl $dst, $src\n\t"
9254 "cmovnel $dst, $src" %}
9255 ins_encode %{
9256 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9257 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9258 %}
9259 ins_pipe(pipe_cmov_reg);
9260 %}
9261
9262 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9263 // inputs of the CMove
9264 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9265 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9266 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9267 effect(TEMP dst);
9268
9269 ins_cost(200); // XXX
9270 format %{ "cmovpl $dst, $src\n\t"
9271 "cmovnel $dst, $src" %}
9272 ins_encode %{
9273 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9274 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9275 %}
9276 ins_pipe(pipe_cmov_reg);
9277 %}
9278
9279 // Conditional move
9280 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9281 predicate(!UseAPX);
9282 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9283
9284 ins_cost(250); // XXX
9285 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9286 ins_encode %{
9287 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9288 %}
9289 ins_pipe(pipe_cmov_mem);
9290 %}
9291
9292 // Conditional move
9293 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9294 %{
9295 predicate(UseAPX);
9296 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9297
9298 ins_cost(250);
9299 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9300 ins_encode %{
9301 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9302 %}
9303 ins_pipe(pipe_cmov_mem);
9304 %}
9305
9306 // Conditional move
9307 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9308 %{
9309 predicate(!UseAPX);
9310 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9311
9312 ins_cost(250); // XXX
9313 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9314 ins_encode %{
9315 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9316 %}
9317 ins_pipe(pipe_cmov_mem);
9318 %}
9319
9320 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9321 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9322
9323 ins_cost(250);
9324 expand %{
9325 cmovI_memU(cop, cr, dst, src);
9326 %}
9327 %}
9328
9329 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9330 %{
9331 predicate(UseAPX);
9332 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9333
9334 ins_cost(250);
9335 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9336 ins_encode %{
9337 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9338 %}
9339 ins_pipe(pipe_cmov_mem);
9340 %}
9341
9342 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9343 %{
9344 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9345
9346 ins_cost(250);
9347 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9348 ins_encode %{
9349 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9350 %}
9351 ins_pipe(pipe_cmov_mem);
9352 %}
9353
9354 // Conditional move
9355 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9356 %{
9357 predicate(!UseAPX);
9358 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9359
9360 ins_cost(200); // XXX
9361 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9362 ins_encode %{
9363 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9364 %}
9365 ins_pipe(pipe_cmov_reg);
9366 %}
9367
9368 // Conditional move ndd
9369 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9370 %{
9371 predicate(UseAPX);
9372 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9373
9374 ins_cost(200);
9375 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9376 ins_encode %{
9377 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9378 %}
9379 ins_pipe(pipe_cmov_reg);
9380 %}
9381
9382 // Conditional move
9383 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9384 %{
9385 predicate(!UseAPX);
9386 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9387
9388 ins_cost(200); // XXX
9389 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9390 ins_encode %{
9391 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9392 %}
9393 ins_pipe(pipe_cmov_reg);
9394 %}
9395
9396 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9397 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9398
9399 ins_cost(200);
9400 expand %{
9401 cmovN_regU(cop, cr, dst, src);
9402 %}
9403 %}
9404
9405 // Conditional move ndd
9406 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9407 %{
9408 predicate(UseAPX);
9409 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9410
9411 ins_cost(200);
9412 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9413 ins_encode %{
9414 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9415 %}
9416 ins_pipe(pipe_cmov_reg);
9417 %}
9418
9419 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9420 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9421
9422 ins_cost(200);
9423 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9424 ins_encode %{
9425 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9426 %}
9427 ins_pipe(pipe_cmov_reg);
9428 %}
9429
9430 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9431 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9432 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9433
9434 ins_cost(200); // XXX
9435 format %{ "cmovpl $dst, $src\n\t"
9436 "cmovnel $dst, $src" %}
9437 ins_encode %{
9438 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9439 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9440 %}
9441 ins_pipe(pipe_cmov_reg);
9442 %}
9443
9444 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9445 // inputs of the CMove
9446 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9447 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9448 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9449
9450 ins_cost(200); // XXX
9451 format %{ "cmovpl $dst, $src\n\t"
9452 "cmovnel $dst, $src" %}
9453 ins_encode %{
9454 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9455 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9456 %}
9457 ins_pipe(pipe_cmov_reg);
9458 %}
9459
9460 // Conditional move
9461 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9462 %{
9463 predicate(!UseAPX);
9464 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9465
9466 ins_cost(200); // XXX
9467 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9468 ins_encode %{
9469 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9470 %}
9471 ins_pipe(pipe_cmov_reg); // XXX
9472 %}
9473
9474 // Conditional move ndd
9475 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9476 %{
9477 predicate(UseAPX);
9478 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9479
9480 ins_cost(200);
9481 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9482 ins_encode %{
9483 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9484 %}
9485 ins_pipe(pipe_cmov_reg);
9486 %}
9487
9488 // Conditional move
9489 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9490 %{
9491 predicate(!UseAPX);
9492 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9493
9494 ins_cost(200); // XXX
9495 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9496 ins_encode %{
9497 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9498 %}
9499 ins_pipe(pipe_cmov_reg); // XXX
9500 %}
9501
9502 // Conditional move ndd
9503 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9504 %{
9505 predicate(UseAPX);
9506 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9507
9508 ins_cost(200);
9509 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9510 ins_encode %{
9511 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9512 %}
9513 ins_pipe(pipe_cmov_reg);
9514 %}
9515
9516 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9517 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9518
9519 ins_cost(200);
9520 expand %{
9521 cmovP_regU(cop, cr, dst, src);
9522 %}
9523 %}
9524
9525 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9526 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9527
9528 ins_cost(200);
9529 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9530 ins_encode %{
9531 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9532 %}
9533 ins_pipe(pipe_cmov_reg);
9534 %}
9535
9536 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9537 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9538 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9539
9540 ins_cost(200); // XXX
9541 format %{ "cmovpq $dst, $src\n\t"
9542 "cmovneq $dst, $src" %}
9543 ins_encode %{
9544 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9545 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9546 %}
9547 ins_pipe(pipe_cmov_reg);
9548 %}
9549
9550 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9551 // inputs of the CMove
9552 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9553 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9554 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9555
9556 ins_cost(200); // XXX
9557 format %{ "cmovpq $dst, $src\n\t"
9558 "cmovneq $dst, $src" %}
9559 ins_encode %{
9560 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9561 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg);
9564 %}
9565
9566 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9567 %{
9568 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9569 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9570
9571 ins_cost(100); // XXX
9572 format %{ "setbn$cop $dst\t# signed, long" %}
9573 ins_encode %{
9574 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9575 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9576 %}
9577 ins_pipe(ialu_reg);
9578 %}
9579
9580 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9581 %{
9582 predicate(!UseAPX);
9583 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9584
9585 ins_cost(200); // XXX
9586 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9587 ins_encode %{
9588 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9589 %}
9590 ins_pipe(pipe_cmov_reg); // XXX
9591 %}
9592
9593 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9594 %{
9595 predicate(UseAPX);
9596 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9597
9598 ins_cost(200);
9599 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9600 ins_encode %{
9601 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9602 %}
9603 ins_pipe(pipe_cmov_reg);
9604 %}
9605
9606 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9607 %{
9608 predicate(!UseAPX);
9609 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9610
9611 ins_cost(200); // XXX
9612 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9613 ins_encode %{
9614 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9615 %}
9616 ins_pipe(pipe_cmov_mem); // XXX
9617 %}
9618
9619 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9620 %{
9621 predicate(UseAPX);
9622 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9623
9624 ins_cost(200);
9625 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9626 ins_encode %{
9627 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9628 %}
9629 ins_pipe(pipe_cmov_mem);
9630 %}
9631
9632 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9633 %{
9634 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9636
9637 ins_cost(100); // XXX
9638 format %{ "setbn$cop $dst\t# unsigned, long" %}
9639 ins_encode %{
9640 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9641 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9642 %}
9643 ins_pipe(ialu_reg);
9644 %}
9645
9646 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9647 %{
9648 predicate(!UseAPX);
9649 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9650
9651 ins_cost(200); // XXX
9652 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9653 ins_encode %{
9654 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9655 %}
9656 ins_pipe(pipe_cmov_reg); // XXX
9657 %}
9658
9659 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9660 %{
9661 predicate(UseAPX);
9662 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9663
9664 ins_cost(200);
9665 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9666 ins_encode %{
9667 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9668 %}
9669 ins_pipe(pipe_cmov_reg);
9670 %}
9671
9672 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9673 %{
9674 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9675 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9676
9677 ins_cost(100); // XXX
9678 format %{ "setbn$cop $dst\t# unsigned, long" %}
9679 ins_encode %{
9680 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9681 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9682 %}
9683 ins_pipe(ialu_reg);
9684 %}
9685
9686 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9687 %{
9688 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9689 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9690
9691 ins_cost(100); // XXX
9692 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9693 ins_encode %{
9694 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9695 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9696 %}
9697 ins_pipe(ialu_reg);
9698 %}
9699
9700 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9701 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9702
9703 ins_cost(200);
9704 expand %{
9705 cmovL_regU(cop, cr, dst, src);
9706 %}
9707 %}
9708
9709 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9710 %{
9711 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9712
9713 ins_cost(200);
9714 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9715 ins_encode %{
9716 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9717 %}
9718 ins_pipe(pipe_cmov_reg);
9719 %}
9720
9721 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9722 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9723 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9724
9725 ins_cost(200); // XXX
9726 format %{ "cmovpq $dst, $src\n\t"
9727 "cmovneq $dst, $src" %}
9728 ins_encode %{
9729 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9730 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9731 %}
9732 ins_pipe(pipe_cmov_reg);
9733 %}
9734
9735 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9736 // inputs of the CMove
9737 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9738 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9739 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9740
9741 ins_cost(200); // XXX
9742 format %{ "cmovpq $dst, $src\n\t"
9743 "cmovneq $dst, $src" %}
9744 ins_encode %{
9745 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9746 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9747 %}
9748 ins_pipe(pipe_cmov_reg);
9749 %}
9750
9751 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9752 %{
9753 predicate(!UseAPX);
9754 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9755
9756 ins_cost(200); // XXX
9757 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9758 ins_encode %{
9759 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9760 %}
9761 ins_pipe(pipe_cmov_mem); // XXX
9762 %}
9763
9764 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9765 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9766
9767 ins_cost(200);
9768 expand %{
9769 cmovL_memU(cop, cr, dst, src);
9770 %}
9771 %}
9772
9773 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9774 %{
9775 predicate(UseAPX);
9776 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9777
9778 ins_cost(200);
9779 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9780 ins_encode %{
9781 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9782 %}
9783 ins_pipe(pipe_cmov_mem);
9784 %}
9785
9786 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9787 %{
9788 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9789
9790 ins_cost(200);
9791 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9792 ins_encode %{
9793 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9794 %}
9795 ins_pipe(pipe_cmov_mem);
9796 %}
9797
9798 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9799 %{
9800 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9801
9802 ins_cost(200); // XXX
9803 format %{ "jn$cop skip\t# signed cmove float\n\t"
9804 "movss $dst, $src\n"
9805 "skip:" %}
9806 ins_encode %{
9807 Label Lskip;
9808 // Invert sense of branch from sense of CMOV
9809 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9810 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9811 __ bind(Lskip);
9812 %}
9813 ins_pipe(pipe_slow);
9814 %}
9815
9816 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9817 %{
9818 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9819
9820 ins_cost(200); // XXX
9821 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9822 "movss $dst, $src\n"
9823 "skip:" %}
9824 ins_encode %{
9825 Label Lskip;
9826 // Invert sense of branch from sense of CMOV
9827 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9828 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9829 __ bind(Lskip);
9830 %}
9831 ins_pipe(pipe_slow);
9832 %}
9833
9834 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9835 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9836
9837 ins_cost(200);
9838 expand %{
9839 cmovF_regU(cop, cr, dst, src);
9840 %}
9841 %}
9842
9843 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9844 %{
9845 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9846
9847 ins_cost(200); // XXX
9848 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9849 "movss $dst, $src\n"
9850 "skip:" %}
9851 ins_encode %{
9852 Label Lskip;
9853 // Invert sense of branch from sense of CMOV
9854 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9855 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9856 __ bind(Lskip);
9857 %}
9858 ins_pipe(pipe_slow);
9859 %}
9860
9861 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9862 %{
9863 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9864
9865 ins_cost(200); // XXX
9866 format %{ "jn$cop skip\t# signed cmove double\n\t"
9867 "movsd $dst, $src\n"
9868 "skip:" %}
9869 ins_encode %{
9870 Label Lskip;
9871 // Invert sense of branch from sense of CMOV
9872 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9873 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9874 __ bind(Lskip);
9875 %}
9876 ins_pipe(pipe_slow);
9877 %}
9878
9879 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9880 %{
9881 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9882
9883 ins_cost(200); // XXX
9884 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9885 "movsd $dst, $src\n"
9886 "skip:" %}
9887 ins_encode %{
9888 Label Lskip;
9889 // Invert sense of branch from sense of CMOV
9890 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9892 __ bind(Lskip);
9893 %}
9894 ins_pipe(pipe_slow);
9895 %}
9896
9897 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9898 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9899
9900 ins_cost(200);
9901 expand %{
9902 cmovD_regU(cop, cr, dst, src);
9903 %}
9904 %}
9905
9906 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9907 %{
9908 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9909
9910 ins_cost(200); // XXX
9911 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9912 "movsd $dst, $src\n"
9913 "skip:" %}
9914 ins_encode %{
9915 Label Lskip;
9916 // Invert sense of branch from sense of CMOV
9917 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9918 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9919 __ bind(Lskip);
9920 %}
9921 ins_pipe(pipe_slow);
9922 %}
9923
9924 //----------Arithmetic Instructions--------------------------------------------
9925 //----------Addition Instructions----------------------------------------------
9926
9927 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9928 %{
9929 predicate(!UseAPX);
9930 match(Set dst (AddI dst src));
9931 effect(KILL cr);
9932 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9933 format %{ "addl $dst, $src\t# int" %}
9934 ins_encode %{
9935 __ addl($dst$$Register, $src$$Register);
9936 %}
9937 ins_pipe(ialu_reg_reg);
9938 %}
9939
9940 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9941 %{
9942 predicate(UseAPX);
9943 match(Set dst (AddI src1 src2));
9944 effect(KILL cr);
9945 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9946
9947 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9948 ins_encode %{
9949 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9950 %}
9951 ins_pipe(ialu_reg_reg);
9952 %}
9953
9954 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9955 %{
9956 predicate(!UseAPX);
9957 match(Set dst (AddI dst src));
9958 effect(KILL cr);
9959 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9960
9961 format %{ "addl $dst, $src\t# int" %}
9962 ins_encode %{
9963 __ addl($dst$$Register, $src$$constant);
9964 %}
9965 ins_pipe( ialu_reg );
9966 %}
9967
9968 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9969 %{
9970 predicate(UseAPX);
9971 match(Set dst (AddI src1 src2));
9972 effect(KILL cr);
9973 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9974
9975 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9976 ins_encode %{
9977 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9978 %}
9979 ins_pipe( ialu_reg );
9980 %}
9981
9982 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9983 %{
9984 predicate(UseAPX);
9985 match(Set dst (AddI (LoadI src1) src2));
9986 effect(KILL cr);
9987 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9988
9989 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9990 ins_encode %{
9991 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9992 %}
9993 ins_pipe( ialu_reg );
9994 %}
9995
9996 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9997 %{
9998 predicate(!UseAPX);
9999 match(Set dst (AddI dst (LoadI src)));
10000 effect(KILL cr);
10001 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10002
10003 ins_cost(150); // XXX
10004 format %{ "addl $dst, $src\t# int" %}
10005 ins_encode %{
10006 __ addl($dst$$Register, $src$$Address);
10007 %}
10008 ins_pipe(ialu_reg_mem);
10009 %}
10010
10011 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10012 %{
10013 predicate(UseAPX);
10014 match(Set dst (AddI src1 (LoadI src2)));
10015 effect(KILL cr);
10016 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10017
10018 ins_cost(150);
10019 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10020 ins_encode %{
10021 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10022 %}
10023 ins_pipe(ialu_reg_mem);
10024 %}
10025
10026 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10027 %{
10028 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029 effect(KILL cr);
10030 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10031
10032 ins_cost(150); // XXX
10033 format %{ "addl $dst, $src\t# int" %}
10034 ins_encode %{
10035 __ addl($dst$$Address, $src$$Register);
10036 %}
10037 ins_pipe(ialu_mem_reg);
10038 %}
10039
10040 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10041 %{
10042 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10043 effect(KILL cr);
10044 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10045
10046
10047 ins_cost(125); // XXX
10048 format %{ "addl $dst, $src\t# int" %}
10049 ins_encode %{
10050 __ addl($dst$$Address, $src$$constant);
10051 %}
10052 ins_pipe(ialu_mem_imm);
10053 %}
10054
10055 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10056 %{
10057 predicate(!UseAPX && UseIncDec);
10058 match(Set dst (AddI dst src));
10059 effect(KILL cr);
10060
10061 format %{ "incl $dst\t# int" %}
10062 ins_encode %{
10063 __ incrementl($dst$$Register);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10069 %{
10070 predicate(UseAPX && UseIncDec);
10071 match(Set dst (AddI src val));
10072 effect(KILL cr);
10073 flag(PD::Flag_ndd_demotable_opr1);
10074
10075 format %{ "eincl $dst, $src\t# int ndd" %}
10076 ins_encode %{
10077 __ eincl($dst$$Register, $src$$Register, false);
10078 %}
10079 ins_pipe(ialu_reg);
10080 %}
10081
10082 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10083 %{
10084 predicate(UseAPX && UseIncDec);
10085 match(Set dst (AddI (LoadI src) val));
10086 effect(KILL cr);
10087
10088 format %{ "eincl $dst, $src\t# int ndd" %}
10089 ins_encode %{
10090 __ eincl($dst$$Register, $src$$Address, false);
10091 %}
10092 ins_pipe(ialu_reg);
10093 %}
10094
10095 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10096 %{
10097 predicate(UseIncDec);
10098 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10099 effect(KILL cr);
10100
10101 ins_cost(125); // XXX
10102 format %{ "incl $dst\t# int" %}
10103 ins_encode %{
10104 __ incrementl($dst$$Address);
10105 %}
10106 ins_pipe(ialu_mem_imm);
10107 %}
10108
10109 // XXX why does that use AddI
10110 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10111 %{
10112 predicate(!UseAPX && UseIncDec);
10113 match(Set dst (AddI dst src));
10114 effect(KILL cr);
10115
10116 format %{ "decl $dst\t# int" %}
10117 ins_encode %{
10118 __ decrementl($dst$$Register);
10119 %}
10120 ins_pipe(ialu_reg);
10121 %}
10122
10123 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10124 %{
10125 predicate(UseAPX && UseIncDec);
10126 match(Set dst (AddI src val));
10127 effect(KILL cr);
10128 flag(PD::Flag_ndd_demotable_opr1);
10129
10130 format %{ "edecl $dst, $src\t# int ndd" %}
10131 ins_encode %{
10132 __ edecl($dst$$Register, $src$$Register, false);
10133 %}
10134 ins_pipe(ialu_reg);
10135 %}
10136
10137 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10138 %{
10139 predicate(UseAPX && UseIncDec);
10140 match(Set dst (AddI (LoadI src) val));
10141 effect(KILL cr);
10142
10143 format %{ "edecl $dst, $src\t# int ndd" %}
10144 ins_encode %{
10145 __ edecl($dst$$Register, $src$$Address, false);
10146 %}
10147 ins_pipe(ialu_reg);
10148 %}
10149
10150 // XXX why does that use AddI
10151 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10152 %{
10153 predicate(UseIncDec);
10154 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10155 effect(KILL cr);
10156
10157 ins_cost(125); // XXX
10158 format %{ "decl $dst\t# int" %}
10159 ins_encode %{
10160 __ decrementl($dst$$Address);
10161 %}
10162 ins_pipe(ialu_mem_imm);
10163 %}
10164
10165 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10166 %{
10167 predicate(VM_Version::supports_fast_2op_lea());
10168 match(Set dst (AddI (LShiftI index scale) disp));
10169
10170 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10171 ins_encode %{
10172 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10173 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10174 %}
10175 ins_pipe(ialu_reg_reg);
10176 %}
10177
10178 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10179 %{
10180 predicate(VM_Version::supports_fast_3op_lea());
10181 match(Set dst (AddI (AddI base index) disp));
10182
10183 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10184 ins_encode %{
10185 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10186 %}
10187 ins_pipe(ialu_reg_reg);
10188 %}
10189
10190 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10191 %{
10192 predicate(VM_Version::supports_fast_2op_lea());
10193 match(Set dst (AddI base (LShiftI index scale)));
10194
10195 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10196 ins_encode %{
10197 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10198 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10199 %}
10200 ins_pipe(ialu_reg_reg);
10201 %}
10202
10203 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10204 %{
10205 predicate(VM_Version::supports_fast_3op_lea());
10206 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10207
10208 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10209 ins_encode %{
10210 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10211 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10212 %}
10213 ins_pipe(ialu_reg_reg);
10214 %}
10215
10216 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10217 %{
10218 predicate(!UseAPX);
10219 match(Set dst (AddL dst src));
10220 effect(KILL cr);
10221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222
10223 format %{ "addq $dst, $src\t# long" %}
10224 ins_encode %{
10225 __ addq($dst$$Register, $src$$Register);
10226 %}
10227 ins_pipe(ialu_reg_reg);
10228 %}
10229
10230 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10231 %{
10232 predicate(UseAPX);
10233 match(Set dst (AddL src1 src2));
10234 effect(KILL cr);
10235 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10236
10237 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10238 ins_encode %{
10239 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10240 %}
10241 ins_pipe(ialu_reg_reg);
10242 %}
10243
10244 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10245 %{
10246 predicate(!UseAPX);
10247 match(Set dst (AddL dst src));
10248 effect(KILL cr);
10249 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10250
10251 format %{ "addq $dst, $src\t# long" %}
10252 ins_encode %{
10253 __ addq($dst$$Register, $src$$constant);
10254 %}
10255 ins_pipe( ialu_reg );
10256 %}
10257
10258 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10259 %{
10260 predicate(UseAPX);
10261 match(Set dst (AddL src1 src2));
10262 effect(KILL cr);
10263 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10264
10265 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10266 ins_encode %{
10267 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10268 %}
10269 ins_pipe( ialu_reg );
10270 %}
10271
10272 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10273 %{
10274 predicate(UseAPX);
10275 match(Set dst (AddL (LoadL src1) src2));
10276 effect(KILL cr);
10277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10278
10279 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10280 ins_encode %{
10281 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10282 %}
10283 ins_pipe( ialu_reg );
10284 %}
10285
10286 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10287 %{
10288 predicate(!UseAPX);
10289 match(Set dst (AddL dst (LoadL src)));
10290 effect(KILL cr);
10291 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10292
10293 ins_cost(150); // XXX
10294 format %{ "addq $dst, $src\t# long" %}
10295 ins_encode %{
10296 __ addq($dst$$Register, $src$$Address);
10297 %}
10298 ins_pipe(ialu_reg_mem);
10299 %}
10300
10301 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10302 %{
10303 predicate(UseAPX);
10304 match(Set dst (AddL src1 (LoadL src2)));
10305 effect(KILL cr);
10306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10307
10308 ins_cost(150);
10309 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10310 ins_encode %{
10311 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10312 %}
10313 ins_pipe(ialu_reg_mem);
10314 %}
10315
10316 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10317 %{
10318 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319 effect(KILL cr);
10320 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10321
10322 ins_cost(150); // XXX
10323 format %{ "addq $dst, $src\t# long" %}
10324 ins_encode %{
10325 __ addq($dst$$Address, $src$$Register);
10326 %}
10327 ins_pipe(ialu_mem_reg);
10328 %}
10329
10330 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10331 %{
10332 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10333 effect(KILL cr);
10334 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10335
10336 ins_cost(125); // XXX
10337 format %{ "addq $dst, $src\t# long" %}
10338 ins_encode %{
10339 __ addq($dst$$Address, $src$$constant);
10340 %}
10341 ins_pipe(ialu_mem_imm);
10342 %}
10343
10344 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10345 %{
10346 predicate(!UseAPX && UseIncDec);
10347 match(Set dst (AddL dst src));
10348 effect(KILL cr);
10349
10350 format %{ "incq $dst\t# long" %}
10351 ins_encode %{
10352 __ incrementq($dst$$Register);
10353 %}
10354 ins_pipe(ialu_reg);
10355 %}
10356
10357 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10358 %{
10359 predicate(UseAPX && UseIncDec);
10360 match(Set dst (AddL src val));
10361 effect(KILL cr);
10362 flag(PD::Flag_ndd_demotable_opr1);
10363
10364 format %{ "eincq $dst, $src\t# long ndd" %}
10365 ins_encode %{
10366 __ eincq($dst$$Register, $src$$Register, false);
10367 %}
10368 ins_pipe(ialu_reg);
10369 %}
10370
10371 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10372 %{
10373 predicate(UseAPX && UseIncDec);
10374 match(Set dst (AddL (LoadL src) val));
10375 effect(KILL cr);
10376
10377 format %{ "eincq $dst, $src\t# long ndd" %}
10378 ins_encode %{
10379 __ eincq($dst$$Register, $src$$Address, false);
10380 %}
10381 ins_pipe(ialu_reg);
10382 %}
10383
10384 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10385 %{
10386 predicate(UseIncDec);
10387 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10388 effect(KILL cr);
10389
10390 ins_cost(125); // XXX
10391 format %{ "incq $dst\t# long" %}
10392 ins_encode %{
10393 __ incrementq($dst$$Address);
10394 %}
10395 ins_pipe(ialu_mem_imm);
10396 %}
10397
10398 // XXX why does that use AddL
10399 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10400 %{
10401 predicate(!UseAPX && UseIncDec);
10402 match(Set dst (AddL dst src));
10403 effect(KILL cr);
10404
10405 format %{ "decq $dst\t# long" %}
10406 ins_encode %{
10407 __ decrementq($dst$$Register);
10408 %}
10409 ins_pipe(ialu_reg);
10410 %}
10411
10412 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10413 %{
10414 predicate(UseAPX && UseIncDec);
10415 match(Set dst (AddL src val));
10416 effect(KILL cr);
10417 flag(PD::Flag_ndd_demotable_opr1);
10418
10419 format %{ "edecq $dst, $src\t# long ndd" %}
10420 ins_encode %{
10421 __ edecq($dst$$Register, $src$$Register, false);
10422 %}
10423 ins_pipe(ialu_reg);
10424 %}
10425
10426 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10427 %{
10428 predicate(UseAPX && UseIncDec);
10429 match(Set dst (AddL (LoadL src) val));
10430 effect(KILL cr);
10431
10432 format %{ "edecq $dst, $src\t# long ndd" %}
10433 ins_encode %{
10434 __ edecq($dst$$Register, $src$$Address, false);
10435 %}
10436 ins_pipe(ialu_reg);
10437 %}
10438
10439 // XXX why does that use AddL
10440 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10441 %{
10442 predicate(UseIncDec);
10443 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10444 effect(KILL cr);
10445
10446 ins_cost(125); // XXX
10447 format %{ "decq $dst\t# long" %}
10448 ins_encode %{
10449 __ decrementq($dst$$Address);
10450 %}
10451 ins_pipe(ialu_mem_imm);
10452 %}
10453
10454 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10455 %{
10456 predicate(VM_Version::supports_fast_2op_lea());
10457 match(Set dst (AddL (LShiftL index scale) disp));
10458
10459 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10460 ins_encode %{
10461 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10462 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10463 %}
10464 ins_pipe(ialu_reg_reg);
10465 %}
10466
10467 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10468 %{
10469 predicate(VM_Version::supports_fast_3op_lea());
10470 match(Set dst (AddL (AddL base index) disp));
10471
10472 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10473 ins_encode %{
10474 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10475 %}
10476 ins_pipe(ialu_reg_reg);
10477 %}
10478
10479 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10480 %{
10481 predicate(VM_Version::supports_fast_2op_lea());
10482 match(Set dst (AddL base (LShiftL index scale)));
10483
10484 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10485 ins_encode %{
10486 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10487 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10488 %}
10489 ins_pipe(ialu_reg_reg);
10490 %}
10491
10492 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10493 %{
10494 predicate(VM_Version::supports_fast_3op_lea());
10495 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10496
10497 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10498 ins_encode %{
10499 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10500 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10501 %}
10502 ins_pipe(ialu_reg_reg);
10503 %}
10504
10505 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10506 %{
10507 match(Set dst (AddP dst src));
10508 effect(KILL cr);
10509 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10510
10511 format %{ "addq $dst, $src\t# ptr" %}
10512 ins_encode %{
10513 __ addq($dst$$Register, $src$$Register);
10514 %}
10515 ins_pipe(ialu_reg_reg);
10516 %}
10517
10518 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10519 %{
10520 match(Set dst (AddP dst src));
10521 effect(KILL cr);
10522 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10523
10524 format %{ "addq $dst, $src\t# ptr" %}
10525 ins_encode %{
10526 __ addq($dst$$Register, $src$$constant);
10527 %}
10528 ins_pipe( ialu_reg );
10529 %}
10530
10531 // XXX addP mem ops ????
10532
10533 instruct checkCastPP(rRegP dst)
10534 %{
10535 match(Set dst (CheckCastPP dst));
10536
10537 size(0);
10538 format %{ "# checkcastPP of $dst" %}
10539 ins_encode(/* empty encoding */);
10540 ins_pipe(empty);
10541 %}
10542
10543 instruct castPP(rRegP dst)
10544 %{
10545 match(Set dst (CastPP dst));
10546
10547 size(0);
10548 format %{ "# castPP of $dst" %}
10549 ins_encode(/* empty encoding */);
10550 ins_pipe(empty);
10551 %}
10552
10553 instruct castII(rRegI dst)
10554 %{
10555 predicate(VerifyConstraintCasts == 0);
10556 match(Set dst (CastII dst));
10557
10558 size(0);
10559 format %{ "# castII of $dst" %}
10560 ins_encode(/* empty encoding */);
10561 ins_cost(0);
10562 ins_pipe(empty);
10563 %}
10564
10565 instruct castII_checked(rRegI dst, rFlagsReg cr)
10566 %{
10567 predicate(VerifyConstraintCasts > 0);
10568 match(Set dst (CastII dst));
10569
10570 effect(KILL cr);
10571 format %{ "# cast_checked_II $dst" %}
10572 ins_encode %{
10573 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10574 %}
10575 ins_pipe(pipe_slow);
10576 %}
10577
10578 instruct castLL(rRegL dst)
10579 %{
10580 predicate(VerifyConstraintCasts == 0);
10581 match(Set dst (CastLL dst));
10582
10583 size(0);
10584 format %{ "# castLL of $dst" %}
10585 ins_encode(/* empty encoding */);
10586 ins_cost(0);
10587 ins_pipe(empty);
10588 %}
10589
10590 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10591 %{
10592 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10593 match(Set dst (CastLL dst));
10594
10595 effect(KILL cr);
10596 format %{ "# cast_checked_LL $dst" %}
10597 ins_encode %{
10598 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10599 %}
10600 ins_pipe(pipe_slow);
10601 %}
10602
10603 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10604 %{
10605 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10606 match(Set dst (CastLL dst));
10607
10608 effect(KILL cr, TEMP tmp);
10609 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10610 ins_encode %{
10611 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10612 %}
10613 ins_pipe(pipe_slow);
10614 %}
10615
10616 instruct castFF(regF dst)
10617 %{
10618 match(Set dst (CastFF dst));
10619
10620 size(0);
10621 format %{ "# castFF of $dst" %}
10622 ins_encode(/* empty encoding */);
10623 ins_cost(0);
10624 ins_pipe(empty);
10625 %}
10626
10627 instruct castHH(regF dst)
10628 %{
10629 match(Set dst (CastHH dst));
10630
10631 size(0);
10632 format %{ "# castHH of $dst" %}
10633 ins_encode(/* empty encoding */);
10634 ins_cost(0);
10635 ins_pipe(empty);
10636 %}
10637
10638 instruct castDD(regD dst)
10639 %{
10640 match(Set dst (CastDD dst));
10641
10642 size(0);
10643 format %{ "# castDD of $dst" %}
10644 ins_encode(/* empty encoding */);
10645 ins_cost(0);
10646 ins_pipe(empty);
10647 %}
10648
10649 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10650 instruct compareAndSwapP(rRegI res,
10651 memory mem_ptr,
10652 rax_RegP oldval, rRegP newval,
10653 rFlagsReg cr)
10654 %{
10655 predicate(n->as_LoadStore()->barrier_data() == 0);
10656 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10657 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10658 effect(KILL cr, KILL oldval);
10659
10660 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10661 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10662 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10663 ins_encode %{
10664 __ lock();
10665 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10666 __ setcc(Assembler::equal, $res$$Register);
10667 %}
10668 ins_pipe( pipe_cmpxchg );
10669 %}
10670
10671 instruct compareAndSwapL(rRegI res,
10672 memory mem_ptr,
10673 rax_RegL oldval, rRegL newval,
10674 rFlagsReg cr)
10675 %{
10676 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10677 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10678 effect(KILL cr, KILL oldval);
10679
10680 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10682 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10683 ins_encode %{
10684 __ lock();
10685 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10686 __ setcc(Assembler::equal, $res$$Register);
10687 %}
10688 ins_pipe( pipe_cmpxchg );
10689 %}
10690
10691 instruct compareAndSwapI(rRegI res,
10692 memory mem_ptr,
10693 rax_RegI oldval, rRegI newval,
10694 rFlagsReg cr)
10695 %{
10696 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10697 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10698 effect(KILL cr, KILL oldval);
10699
10700 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10701 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10702 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10703 ins_encode %{
10704 __ lock();
10705 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10706 __ setcc(Assembler::equal, $res$$Register);
10707 %}
10708 ins_pipe( pipe_cmpxchg );
10709 %}
10710
10711 instruct compareAndSwapB(rRegI res,
10712 memory mem_ptr,
10713 rax_RegI oldval, rRegI newval,
10714 rFlagsReg cr)
10715 %{
10716 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10717 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10718 effect(KILL cr, KILL oldval);
10719
10720 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10721 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10722 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10723 ins_encode %{
10724 __ lock();
10725 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10726 __ setcc(Assembler::equal, $res$$Register);
10727 %}
10728 ins_pipe( pipe_cmpxchg );
10729 %}
10730
10731 instruct compareAndSwapS(rRegI res,
10732 memory mem_ptr,
10733 rax_RegI oldval, rRegI newval,
10734 rFlagsReg cr)
10735 %{
10736 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10737 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10738 effect(KILL cr, KILL oldval);
10739
10740 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10741 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10742 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10743 ins_encode %{
10744 __ lock();
10745 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10746 __ setcc(Assembler::equal, $res$$Register);
10747 %}
10748 ins_pipe( pipe_cmpxchg );
10749 %}
10750
10751 instruct compareAndSwapN(rRegI res,
10752 memory mem_ptr,
10753 rax_RegN oldval, rRegN newval,
10754 rFlagsReg cr) %{
10755 predicate(n->as_LoadStore()->barrier_data() == 0);
10756 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10757 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10758 effect(KILL cr, KILL oldval);
10759
10760 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10761 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10762 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10763 ins_encode %{
10764 __ lock();
10765 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10766 __ setcc(Assembler::equal, $res$$Register);
10767 %}
10768 ins_pipe( pipe_cmpxchg );
10769 %}
10770
10771 instruct compareAndExchangeB(
10772 memory mem_ptr,
10773 rax_RegI oldval, rRegI newval,
10774 rFlagsReg cr)
10775 %{
10776 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10777 effect(KILL cr);
10778
10779 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10780 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10781 ins_encode %{
10782 __ lock();
10783 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10784 %}
10785 ins_pipe( pipe_cmpxchg );
10786 %}
10787
10788 instruct compareAndExchangeS(
10789 memory mem_ptr,
10790 rax_RegI oldval, rRegI newval,
10791 rFlagsReg cr)
10792 %{
10793 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10794 effect(KILL cr);
10795
10796 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10797 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10798 ins_encode %{
10799 __ lock();
10800 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10801 %}
10802 ins_pipe( pipe_cmpxchg );
10803 %}
10804
10805 instruct compareAndExchangeI(
10806 memory mem_ptr,
10807 rax_RegI oldval, rRegI newval,
10808 rFlagsReg cr)
10809 %{
10810 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10811 effect(KILL cr);
10812
10813 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10814 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10815 ins_encode %{
10816 __ lock();
10817 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10818 %}
10819 ins_pipe( pipe_cmpxchg );
10820 %}
10821
10822 instruct compareAndExchangeL(
10823 memory mem_ptr,
10824 rax_RegL oldval, rRegL newval,
10825 rFlagsReg cr)
10826 %{
10827 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10828 effect(KILL cr);
10829
10830 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10831 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10832 ins_encode %{
10833 __ lock();
10834 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10835 %}
10836 ins_pipe( pipe_cmpxchg );
10837 %}
10838
10839 instruct compareAndExchangeN(
10840 memory mem_ptr,
10841 rax_RegN oldval, rRegN newval,
10842 rFlagsReg cr) %{
10843 predicate(n->as_LoadStore()->barrier_data() == 0);
10844 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10845 effect(KILL cr);
10846
10847 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849 ins_encode %{
10850 __ lock();
10851 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852 %}
10853 ins_pipe( pipe_cmpxchg );
10854 %}
10855
10856 instruct compareAndExchangeP(
10857 memory mem_ptr,
10858 rax_RegP oldval, rRegP newval,
10859 rFlagsReg cr)
10860 %{
10861 predicate(n->as_LoadStore()->barrier_data() == 0);
10862 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10863 effect(KILL cr);
10864
10865 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10866 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10867 ins_encode %{
10868 __ lock();
10869 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10870 %}
10871 ins_pipe( pipe_cmpxchg );
10872 %}
10873
10874 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10875 predicate(n->as_LoadStore()->result_not_used());
10876 match(Set dummy (GetAndAddB mem add));
10877 effect(KILL cr);
10878 format %{ "addb_lock $mem, $add" %}
10879 ins_encode %{
10880 __ lock();
10881 __ addb($mem$$Address, $add$$Register);
10882 %}
10883 ins_pipe(pipe_cmpxchg);
10884 %}
10885
10886 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10887 predicate(n->as_LoadStore()->result_not_used());
10888 match(Set dummy (GetAndAddB mem add));
10889 effect(KILL cr);
10890 format %{ "addb_lock $mem, $add" %}
10891 ins_encode %{
10892 __ lock();
10893 __ addb($mem$$Address, $add$$constant);
10894 %}
10895 ins_pipe(pipe_cmpxchg);
10896 %}
10897
10898 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10899 predicate(!n->as_LoadStore()->result_not_used());
10900 match(Set newval (GetAndAddB mem newval));
10901 effect(KILL cr);
10902 format %{ "xaddb_lock $mem, $newval" %}
10903 ins_encode %{
10904 __ lock();
10905 __ xaddb($mem$$Address, $newval$$Register);
10906 %}
10907 ins_pipe(pipe_cmpxchg);
10908 %}
10909
10910 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10911 predicate(n->as_LoadStore()->result_not_used());
10912 match(Set dummy (GetAndAddS mem add));
10913 effect(KILL cr);
10914 format %{ "addw_lock $mem, $add" %}
10915 ins_encode %{
10916 __ lock();
10917 __ addw($mem$$Address, $add$$Register);
10918 %}
10919 ins_pipe(pipe_cmpxchg);
10920 %}
10921
10922 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10923 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10924 match(Set dummy (GetAndAddS mem add));
10925 effect(KILL cr);
10926 format %{ "addw_lock $mem, $add" %}
10927 ins_encode %{
10928 __ lock();
10929 __ addw($mem$$Address, $add$$constant);
10930 %}
10931 ins_pipe(pipe_cmpxchg);
10932 %}
10933
10934 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10935 predicate(!n->as_LoadStore()->result_not_used());
10936 match(Set newval (GetAndAddS mem newval));
10937 effect(KILL cr);
10938 format %{ "xaddw_lock $mem, $newval" %}
10939 ins_encode %{
10940 __ lock();
10941 __ xaddw($mem$$Address, $newval$$Register);
10942 %}
10943 ins_pipe(pipe_cmpxchg);
10944 %}
10945
10946 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10947 predicate(n->as_LoadStore()->result_not_used());
10948 match(Set dummy (GetAndAddI mem add));
10949 effect(KILL cr);
10950 format %{ "addl_lock $mem, $add" %}
10951 ins_encode %{
10952 __ lock();
10953 __ addl($mem$$Address, $add$$Register);
10954 %}
10955 ins_pipe(pipe_cmpxchg);
10956 %}
10957
10958 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10959 predicate(n->as_LoadStore()->result_not_used());
10960 match(Set dummy (GetAndAddI mem add));
10961 effect(KILL cr);
10962 format %{ "addl_lock $mem, $add" %}
10963 ins_encode %{
10964 __ lock();
10965 __ addl($mem$$Address, $add$$constant);
10966 %}
10967 ins_pipe(pipe_cmpxchg);
10968 %}
10969
10970 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10971 predicate(!n->as_LoadStore()->result_not_used());
10972 match(Set newval (GetAndAddI mem newval));
10973 effect(KILL cr);
10974 format %{ "xaddl_lock $mem, $newval" %}
10975 ins_encode %{
10976 __ lock();
10977 __ xaddl($mem$$Address, $newval$$Register);
10978 %}
10979 ins_pipe(pipe_cmpxchg);
10980 %}
10981
10982 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10983 predicate(n->as_LoadStore()->result_not_used());
10984 match(Set dummy (GetAndAddL mem add));
10985 effect(KILL cr);
10986 format %{ "addq_lock $mem, $add" %}
10987 ins_encode %{
10988 __ lock();
10989 __ addq($mem$$Address, $add$$Register);
10990 %}
10991 ins_pipe(pipe_cmpxchg);
10992 %}
10993
10994 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10995 predicate(n->as_LoadStore()->result_not_used());
10996 match(Set dummy (GetAndAddL mem add));
10997 effect(KILL cr);
10998 format %{ "addq_lock $mem, $add" %}
10999 ins_encode %{
11000 __ lock();
11001 __ addq($mem$$Address, $add$$constant);
11002 %}
11003 ins_pipe(pipe_cmpxchg);
11004 %}
11005
11006 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11007 predicate(!n->as_LoadStore()->result_not_used());
11008 match(Set newval (GetAndAddL mem newval));
11009 effect(KILL cr);
11010 format %{ "xaddq_lock $mem, $newval" %}
11011 ins_encode %{
11012 __ lock();
11013 __ xaddq($mem$$Address, $newval$$Register);
11014 %}
11015 ins_pipe(pipe_cmpxchg);
11016 %}
11017
11018 instruct xchgB( memory mem, rRegI newval) %{
11019 match(Set newval (GetAndSetB mem newval));
11020 format %{ "XCHGB $newval,[$mem]" %}
11021 ins_encode %{
11022 __ xchgb($newval$$Register, $mem$$Address);
11023 %}
11024 ins_pipe( pipe_cmpxchg );
11025 %}
11026
11027 instruct xchgS( memory mem, rRegI newval) %{
11028 match(Set newval (GetAndSetS mem newval));
11029 format %{ "XCHGW $newval,[$mem]" %}
11030 ins_encode %{
11031 __ xchgw($newval$$Register, $mem$$Address);
11032 %}
11033 ins_pipe( pipe_cmpxchg );
11034 %}
11035
11036 instruct xchgI( memory mem, rRegI newval) %{
11037 match(Set newval (GetAndSetI mem newval));
11038 format %{ "XCHGL $newval,[$mem]" %}
11039 ins_encode %{
11040 __ xchgl($newval$$Register, $mem$$Address);
11041 %}
11042 ins_pipe( pipe_cmpxchg );
11043 %}
11044
11045 instruct xchgL( memory mem, rRegL newval) %{
11046 match(Set newval (GetAndSetL mem newval));
11047 format %{ "XCHGL $newval,[$mem]" %}
11048 ins_encode %{
11049 __ xchgq($newval$$Register, $mem$$Address);
11050 %}
11051 ins_pipe( pipe_cmpxchg );
11052 %}
11053
11054 instruct xchgP( memory mem, rRegP newval) %{
11055 match(Set newval (GetAndSetP mem newval));
11056 predicate(n->as_LoadStore()->barrier_data() == 0);
11057 format %{ "XCHGQ $newval,[$mem]" %}
11058 ins_encode %{
11059 __ xchgq($newval$$Register, $mem$$Address);
11060 %}
11061 ins_pipe( pipe_cmpxchg );
11062 %}
11063
11064 instruct xchgN( memory mem, rRegN newval) %{
11065 predicate(n->as_LoadStore()->barrier_data() == 0);
11066 match(Set newval (GetAndSetN mem newval));
11067 format %{ "XCHGL $newval,$mem]" %}
11068 ins_encode %{
11069 __ xchgl($newval$$Register, $mem$$Address);
11070 %}
11071 ins_pipe( pipe_cmpxchg );
11072 %}
11073
11074 //----------Abs Instructions-------------------------------------------
11075
11076 // Integer Absolute Instructions
11077 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11078 %{
11079 match(Set dst (AbsI src));
11080 effect(TEMP dst, KILL cr);
11081 format %{ "xorl $dst, $dst\t# abs int\n\t"
11082 "subl $dst, $src\n\t"
11083 "cmovll $dst, $src" %}
11084 ins_encode %{
11085 __ xorl($dst$$Register, $dst$$Register);
11086 __ subl($dst$$Register, $src$$Register);
11087 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11088 %}
11089
11090 ins_pipe(ialu_reg_reg);
11091 %}
11092
11093 // Long Absolute Instructions
11094 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11095 %{
11096 match(Set dst (AbsL src));
11097 effect(TEMP dst, KILL cr);
11098 format %{ "xorl $dst, $dst\t# abs long\n\t"
11099 "subq $dst, $src\n\t"
11100 "cmovlq $dst, $src" %}
11101 ins_encode %{
11102 __ xorl($dst$$Register, $dst$$Register);
11103 __ subq($dst$$Register, $src$$Register);
11104 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11105 %}
11106
11107 ins_pipe(ialu_reg_reg);
11108 %}
11109
11110 //----------Subtraction Instructions-------------------------------------------
11111
11112 // Integer Subtraction Instructions
11113 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11114 %{
11115 predicate(!UseAPX);
11116 match(Set dst (SubI dst src));
11117 effect(KILL cr);
11118 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11119
11120 format %{ "subl $dst, $src\t# int" %}
11121 ins_encode %{
11122 __ subl($dst$$Register, $src$$Register);
11123 %}
11124 ins_pipe(ialu_reg_reg);
11125 %}
11126
11127 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11128 %{
11129 predicate(UseAPX);
11130 match(Set dst (SubI src1 src2));
11131 effect(KILL cr);
11132 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11133
11134 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11135 ins_encode %{
11136 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11137 %}
11138 ins_pipe(ialu_reg_reg);
11139 %}
11140
11141 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11142 %{
11143 predicate(UseAPX);
11144 match(Set dst (SubI src1 src2));
11145 effect(KILL cr);
11146 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11147
11148 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11149 ins_encode %{
11150 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11151 %}
11152 ins_pipe(ialu_reg_reg);
11153 %}
11154
11155 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11156 %{
11157 predicate(UseAPX);
11158 match(Set dst (SubI (LoadI src1) src2));
11159 effect(KILL cr);
11160 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11161
11162 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11163 ins_encode %{
11164 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11165 %}
11166 ins_pipe(ialu_reg_reg);
11167 %}
11168
11169 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11170 %{
11171 predicate(!UseAPX);
11172 match(Set dst (SubI dst (LoadI src)));
11173 effect(KILL cr);
11174 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175
11176 ins_cost(150);
11177 format %{ "subl $dst, $src\t# int" %}
11178 ins_encode %{
11179 __ subl($dst$$Register, $src$$Address);
11180 %}
11181 ins_pipe(ialu_reg_mem);
11182 %}
11183
11184 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11185 %{
11186 predicate(UseAPX);
11187 match(Set dst (SubI src1 (LoadI src2)));
11188 effect(KILL cr);
11189 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11190
11191 ins_cost(150);
11192 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11193 ins_encode %{
11194 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11195 %}
11196 ins_pipe(ialu_reg_mem);
11197 %}
11198
11199 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11200 %{
11201 predicate(UseAPX);
11202 match(Set dst (SubI (LoadI src1) src2));
11203 effect(KILL cr);
11204 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11205
11206 ins_cost(150);
11207 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11208 ins_encode %{
11209 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11210 %}
11211 ins_pipe(ialu_reg_mem);
11212 %}
11213
11214 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11215 %{
11216 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11217 effect(KILL cr);
11218 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11219
11220 ins_cost(150);
11221 format %{ "subl $dst, $src\t# int" %}
11222 ins_encode %{
11223 __ subl($dst$$Address, $src$$Register);
11224 %}
11225 ins_pipe(ialu_mem_reg);
11226 %}
11227
11228 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11229 %{
11230 predicate(!UseAPX);
11231 match(Set dst (SubL dst src));
11232 effect(KILL cr);
11233 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11234
11235 format %{ "subq $dst, $src\t# long" %}
11236 ins_encode %{
11237 __ subq($dst$$Register, $src$$Register);
11238 %}
11239 ins_pipe(ialu_reg_reg);
11240 %}
11241
11242 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11243 %{
11244 predicate(UseAPX);
11245 match(Set dst (SubL src1 src2));
11246 effect(KILL cr);
11247 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11248
11249 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11250 ins_encode %{
11251 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11252 %}
11253 ins_pipe(ialu_reg_reg);
11254 %}
11255
11256 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11257 %{
11258 predicate(UseAPX);
11259 match(Set dst (SubL src1 src2));
11260 effect(KILL cr);
11261 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11262
11263 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11264 ins_encode %{
11265 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11266 %}
11267 ins_pipe(ialu_reg_reg);
11268 %}
11269
11270 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11271 %{
11272 predicate(UseAPX);
11273 match(Set dst (SubL (LoadL src1) src2));
11274 effect(KILL cr);
11275 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11276
11277 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11278 ins_encode %{
11279 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11280 %}
11281 ins_pipe(ialu_reg_reg);
11282 %}
11283
11284 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11285 %{
11286 predicate(!UseAPX);
11287 match(Set dst (SubL dst (LoadL src)));
11288 effect(KILL cr);
11289 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290
11291 ins_cost(150);
11292 format %{ "subq $dst, $src\t# long" %}
11293 ins_encode %{
11294 __ subq($dst$$Register, $src$$Address);
11295 %}
11296 ins_pipe(ialu_reg_mem);
11297 %}
11298
11299 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11300 %{
11301 predicate(UseAPX);
11302 match(Set dst (SubL src1 (LoadL src2)));
11303 effect(KILL cr);
11304 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11305
11306 ins_cost(150);
11307 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11308 ins_encode %{
11309 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11310 %}
11311 ins_pipe(ialu_reg_mem);
11312 %}
11313
11314 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11315 %{
11316 predicate(UseAPX);
11317 match(Set dst (SubL (LoadL src1) src2));
11318 effect(KILL cr);
11319 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11320
11321 ins_cost(150);
11322 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11323 ins_encode %{
11324 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11325 %}
11326 ins_pipe(ialu_reg_mem);
11327 %}
11328
11329 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11330 %{
11331 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11332 effect(KILL cr);
11333 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11334
11335 ins_cost(150);
11336 format %{ "subq $dst, $src\t# long" %}
11337 ins_encode %{
11338 __ subq($dst$$Address, $src$$Register);
11339 %}
11340 ins_pipe(ialu_mem_reg);
11341 %}
11342
11343 // Subtract from a pointer
11344 // XXX hmpf???
11345 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11346 %{
11347 match(Set dst (AddP dst (SubI zero src)));
11348 effect(KILL cr);
11349
11350 format %{ "subq $dst, $src\t# ptr - int" %}
11351 ins_encode %{
11352 __ subq($dst$$Register, $src$$Register);
11353 %}
11354 ins_pipe(ialu_reg_reg);
11355 %}
11356
11357 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11358 %{
11359 predicate(!UseAPX);
11360 match(Set dst (SubI zero dst));
11361 effect(KILL cr);
11362 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11363
11364 format %{ "negl $dst\t# int" %}
11365 ins_encode %{
11366 __ negl($dst$$Register);
11367 %}
11368 ins_pipe(ialu_reg);
11369 %}
11370
11371 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11372 %{
11373 predicate(UseAPX);
11374 match(Set dst (SubI zero src));
11375 effect(KILL cr);
11376 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11377
11378 format %{ "enegl $dst, $src\t# int ndd" %}
11379 ins_encode %{
11380 __ enegl($dst$$Register, $src$$Register, false);
11381 %}
11382 ins_pipe(ialu_reg);
11383 %}
11384
11385 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11386 %{
11387 predicate(!UseAPX);
11388 match(Set dst (NegI dst));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391
11392 format %{ "negl $dst\t# int" %}
11393 ins_encode %{
11394 __ negl($dst$$Register);
11395 %}
11396 ins_pipe(ialu_reg);
11397 %}
11398
11399 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11400 %{
11401 predicate(UseAPX);
11402 match(Set dst (NegI src));
11403 effect(KILL cr);
11404 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11405
11406 format %{ "enegl $dst, $src\t# int ndd" %}
11407 ins_encode %{
11408 __ enegl($dst$$Register, $src$$Register, false);
11409 %}
11410 ins_pipe(ialu_reg);
11411 %}
11412
11413 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11414 %{
11415 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11416 effect(KILL cr);
11417 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11418
11419 format %{ "negl $dst\t# int" %}
11420 ins_encode %{
11421 __ negl($dst$$Address);
11422 %}
11423 ins_pipe(ialu_reg);
11424 %}
11425
11426 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11427 %{
11428 predicate(!UseAPX);
11429 match(Set dst (SubL zero dst));
11430 effect(KILL cr);
11431 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11432
11433 format %{ "negq $dst\t# long" %}
11434 ins_encode %{
11435 __ negq($dst$$Register);
11436 %}
11437 ins_pipe(ialu_reg);
11438 %}
11439
11440 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11441 %{
11442 predicate(UseAPX);
11443 match(Set dst (SubL zero src));
11444 effect(KILL cr);
11445 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11446
11447 format %{ "enegq $dst, $src\t# long ndd" %}
11448 ins_encode %{
11449 __ enegq($dst$$Register, $src$$Register, false);
11450 %}
11451 ins_pipe(ialu_reg);
11452 %}
11453
11454 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11455 %{
11456 predicate(!UseAPX);
11457 match(Set dst (NegL dst));
11458 effect(KILL cr);
11459 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11460
11461 format %{ "negq $dst\t# int" %}
11462 ins_encode %{
11463 __ negq($dst$$Register);
11464 %}
11465 ins_pipe(ialu_reg);
11466 %}
11467
11468 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11469 %{
11470 predicate(UseAPX);
11471 match(Set dst (NegL src));
11472 effect(KILL cr);
11473 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11474
11475 format %{ "enegq $dst, $src\t# long ndd" %}
11476 ins_encode %{
11477 __ enegq($dst$$Register, $src$$Register, false);
11478 %}
11479 ins_pipe(ialu_reg);
11480 %}
11481
11482 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11483 %{
11484 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11485 effect(KILL cr);
11486 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11487
11488 format %{ "negq $dst\t# long" %}
11489 ins_encode %{
11490 __ negq($dst$$Address);
11491 %}
11492 ins_pipe(ialu_reg);
11493 %}
11494
11495 //----------Multiplication/Division Instructions-------------------------------
11496 // Integer Multiplication Instructions
11497 // Multiply Register
11498
11499 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11500 %{
11501 predicate(!UseAPX);
11502 match(Set dst (MulI dst src));
11503 effect(KILL cr);
11504
11505 ins_cost(300);
11506 format %{ "imull $dst, $src\t# int" %}
11507 ins_encode %{
11508 __ imull($dst$$Register, $src$$Register);
11509 %}
11510 ins_pipe(ialu_reg_reg_alu0);
11511 %}
11512
11513 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11514 %{
11515 predicate(UseAPX);
11516 match(Set dst (MulI src1 src2));
11517 effect(KILL cr);
11518 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11519
11520 ins_cost(300);
11521 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11522 ins_encode %{
11523 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11524 %}
11525 ins_pipe(ialu_reg_reg_alu0);
11526 %}
11527
11528 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11529 %{
11530 match(Set dst (MulI src imm));
11531 effect(KILL cr);
11532
11533 ins_cost(300);
11534 format %{ "imull $dst, $src, $imm\t# int" %}
11535 ins_encode %{
11536 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11537 %}
11538 ins_pipe(ialu_reg_reg_alu0);
11539 %}
11540
11541 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11542 %{
11543 predicate(!UseAPX);
11544 match(Set dst (MulI dst (LoadI src)));
11545 effect(KILL cr);
11546
11547 ins_cost(350);
11548 format %{ "imull $dst, $src\t# int" %}
11549 ins_encode %{
11550 __ imull($dst$$Register, $src$$Address);
11551 %}
11552 ins_pipe(ialu_reg_mem_alu0);
11553 %}
11554
11555 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11556 %{
11557 predicate(UseAPX);
11558 match(Set dst (MulI src1 (LoadI src2)));
11559 effect(KILL cr);
11560 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11561
11562 ins_cost(350);
11563 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11564 ins_encode %{
11565 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11566 %}
11567 ins_pipe(ialu_reg_mem_alu0);
11568 %}
11569
11570 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11571 %{
11572 match(Set dst (MulI (LoadI src) imm));
11573 effect(KILL cr);
11574
11575 ins_cost(300);
11576 format %{ "imull $dst, $src, $imm\t# int" %}
11577 ins_encode %{
11578 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11579 %}
11580 ins_pipe(ialu_reg_mem_alu0);
11581 %}
11582
11583 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11584 %{
11585 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11586 effect(KILL cr, KILL src2);
11587
11588 expand %{ mulI_rReg(dst, src1, cr);
11589 mulI_rReg(src2, src3, cr);
11590 addI_rReg(dst, src2, cr); %}
11591 %}
11592
11593 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11594 %{
11595 predicate(!UseAPX);
11596 match(Set dst (MulL dst src));
11597 effect(KILL cr);
11598
11599 ins_cost(300);
11600 format %{ "imulq $dst, $src\t# long" %}
11601 ins_encode %{
11602 __ imulq($dst$$Register, $src$$Register);
11603 %}
11604 ins_pipe(ialu_reg_reg_alu0);
11605 %}
11606
11607 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11608 %{
11609 predicate(UseAPX);
11610 match(Set dst (MulL src1 src2));
11611 effect(KILL cr);
11612 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11613
11614 ins_cost(300);
11615 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11616 ins_encode %{
11617 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11618 %}
11619 ins_pipe(ialu_reg_reg_alu0);
11620 %}
11621
11622 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11623 %{
11624 match(Set dst (MulL src imm));
11625 effect(KILL cr);
11626
11627 ins_cost(300);
11628 format %{ "imulq $dst, $src, $imm\t# long" %}
11629 ins_encode %{
11630 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11631 %}
11632 ins_pipe(ialu_reg_reg_alu0);
11633 %}
11634
11635 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11636 %{
11637 predicate(!UseAPX);
11638 match(Set dst (MulL dst (LoadL src)));
11639 effect(KILL cr);
11640
11641 ins_cost(350);
11642 format %{ "imulq $dst, $src\t# long" %}
11643 ins_encode %{
11644 __ imulq($dst$$Register, $src$$Address);
11645 %}
11646 ins_pipe(ialu_reg_mem_alu0);
11647 %}
11648
11649 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11650 %{
11651 predicate(UseAPX);
11652 match(Set dst (MulL src1 (LoadL src2)));
11653 effect(KILL cr);
11654 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11655
11656 ins_cost(350);
11657 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11658 ins_encode %{
11659 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11660 %}
11661 ins_pipe(ialu_reg_mem_alu0);
11662 %}
11663
11664 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11665 %{
11666 match(Set dst (MulL (LoadL src) imm));
11667 effect(KILL cr);
11668
11669 ins_cost(300);
11670 format %{ "imulq $dst, $src, $imm\t# long" %}
11671 ins_encode %{
11672 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11673 %}
11674 ins_pipe(ialu_reg_mem_alu0);
11675 %}
11676
11677 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11678 %{
11679 match(Set dst (MulHiL src rax));
11680 effect(USE_KILL rax, KILL cr);
11681
11682 ins_cost(300);
11683 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11684 ins_encode %{
11685 __ imulq($src$$Register);
11686 %}
11687 ins_pipe(ialu_reg_reg_alu0);
11688 %}
11689
11690 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11691 %{
11692 match(Set dst (UMulHiL src rax));
11693 effect(USE_KILL rax, KILL cr);
11694
11695 ins_cost(300);
11696 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11697 ins_encode %{
11698 __ mulq($src$$Register);
11699 %}
11700 ins_pipe(ialu_reg_reg_alu0);
11701 %}
11702
11703 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11704 rFlagsReg cr)
11705 %{
11706 match(Set rax (DivI rax div));
11707 effect(KILL rdx, KILL cr);
11708
11709 ins_cost(30*100+10*100); // XXX
11710 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11711 "jne,s normal\n\t"
11712 "xorl rdx, rdx\n\t"
11713 "cmpl $div, -1\n\t"
11714 "je,s done\n"
11715 "normal: cdql\n\t"
11716 "idivl $div\n"
11717 "done:" %}
11718 ins_encode(cdql_enc(div));
11719 ins_pipe(ialu_reg_reg_alu0);
11720 %}
11721
11722 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11723 rFlagsReg cr)
11724 %{
11725 match(Set rax (DivL rax div));
11726 effect(KILL rdx, KILL cr);
11727
11728 ins_cost(30*100+10*100); // XXX
11729 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11730 "cmpq rax, rdx\n\t"
11731 "jne,s normal\n\t"
11732 "xorl rdx, rdx\n\t"
11733 "cmpq $div, -1\n\t"
11734 "je,s done\n"
11735 "normal: cdqq\n\t"
11736 "idivq $div\n"
11737 "done:" %}
11738 ins_encode(cdqq_enc(div));
11739 ins_pipe(ialu_reg_reg_alu0);
11740 %}
11741
11742 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11743 %{
11744 match(Set rax (UDivI rax div));
11745 effect(KILL rdx, KILL cr);
11746
11747 ins_cost(300);
11748 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11749 ins_encode %{
11750 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11751 %}
11752 ins_pipe(ialu_reg_reg_alu0);
11753 %}
11754
11755 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11756 %{
11757 match(Set rax (UDivL rax div));
11758 effect(KILL rdx, KILL cr);
11759
11760 ins_cost(300);
11761 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11762 ins_encode %{
11763 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11764 %}
11765 ins_pipe(ialu_reg_reg_alu0);
11766 %}
11767
11768 // Integer DIVMOD with Register, both quotient and mod results
11769 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11770 rFlagsReg cr)
11771 %{
11772 match(DivModI rax div);
11773 effect(KILL cr);
11774
11775 ins_cost(30*100+10*100); // XXX
11776 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11777 "jne,s normal\n\t"
11778 "xorl rdx, rdx\n\t"
11779 "cmpl $div, -1\n\t"
11780 "je,s done\n"
11781 "normal: cdql\n\t"
11782 "idivl $div\n"
11783 "done:" %}
11784 ins_encode(cdql_enc(div));
11785 ins_pipe(pipe_slow);
11786 %}
11787
11788 // Long DIVMOD with Register, both quotient and mod results
11789 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11790 rFlagsReg cr)
11791 %{
11792 match(DivModL rax div);
11793 effect(KILL cr);
11794
11795 ins_cost(30*100+10*100); // XXX
11796 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11797 "cmpq rax, rdx\n\t"
11798 "jne,s normal\n\t"
11799 "xorl rdx, rdx\n\t"
11800 "cmpq $div, -1\n\t"
11801 "je,s done\n"
11802 "normal: cdqq\n\t"
11803 "idivq $div\n"
11804 "done:" %}
11805 ins_encode(cdqq_enc(div));
11806 ins_pipe(pipe_slow);
11807 %}
11808
11809 // Unsigned integer DIVMOD with Register, both quotient and mod results
11810 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11811 no_rax_rdx_RegI div, rFlagsReg cr)
11812 %{
11813 match(UDivModI rax div);
11814 effect(TEMP tmp, KILL cr);
11815
11816 ins_cost(300);
11817 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11818 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11819 %}
11820 ins_encode %{
11821 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11822 %}
11823 ins_pipe(pipe_slow);
11824 %}
11825
11826 // Unsigned long DIVMOD with Register, both quotient and mod results
11827 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11828 no_rax_rdx_RegL div, rFlagsReg cr)
11829 %{
11830 match(UDivModL rax div);
11831 effect(TEMP tmp, KILL cr);
11832
11833 ins_cost(300);
11834 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11835 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11836 %}
11837 ins_encode %{
11838 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11839 %}
11840 ins_pipe(pipe_slow);
11841 %}
11842
11843 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11844 rFlagsReg cr)
11845 %{
11846 match(Set rdx (ModI rax div));
11847 effect(KILL rax, KILL cr);
11848
11849 ins_cost(300); // XXX
11850 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11851 "jne,s normal\n\t"
11852 "xorl rdx, rdx\n\t"
11853 "cmpl $div, -1\n\t"
11854 "je,s done\n"
11855 "normal: cdql\n\t"
11856 "idivl $div\n"
11857 "done:" %}
11858 ins_encode(cdql_enc(div));
11859 ins_pipe(ialu_reg_reg_alu0);
11860 %}
11861
11862 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11863 rFlagsReg cr)
11864 %{
11865 match(Set rdx (ModL rax div));
11866 effect(KILL rax, KILL cr);
11867
11868 ins_cost(300); // XXX
11869 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11870 "cmpq rax, rdx\n\t"
11871 "jne,s normal\n\t"
11872 "xorl rdx, rdx\n\t"
11873 "cmpq $div, -1\n\t"
11874 "je,s done\n"
11875 "normal: cdqq\n\t"
11876 "idivq $div\n"
11877 "done:" %}
11878 ins_encode(cdqq_enc(div));
11879 ins_pipe(ialu_reg_reg_alu0);
11880 %}
11881
11882 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11883 %{
11884 match(Set rdx (UModI rax div));
11885 effect(KILL rax, KILL cr);
11886
11887 ins_cost(300);
11888 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11889 ins_encode %{
11890 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11891 %}
11892 ins_pipe(ialu_reg_reg_alu0);
11893 %}
11894
11895 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11896 %{
11897 match(Set rdx (UModL rax div));
11898 effect(KILL rax, KILL cr);
11899
11900 ins_cost(300);
11901 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11902 ins_encode %{
11903 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11904 %}
11905 ins_pipe(ialu_reg_reg_alu0);
11906 %}
11907
11908 // Integer Shift Instructions
11909 // Shift Left by one, two, three
11910 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11911 %{
11912 predicate(!UseAPX);
11913 match(Set dst (LShiftI dst shift));
11914 effect(KILL cr);
11915
11916 format %{ "sall $dst, $shift" %}
11917 ins_encode %{
11918 __ sall($dst$$Register, $shift$$constant);
11919 %}
11920 ins_pipe(ialu_reg);
11921 %}
11922
11923 // Shift Left by one, two, three
11924 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11925 %{
11926 predicate(UseAPX);
11927 match(Set dst (LShiftI src shift));
11928 effect(KILL cr);
11929 flag(PD::Flag_ndd_demotable_opr1);
11930
11931 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11932 ins_encode %{
11933 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11934 %}
11935 ins_pipe(ialu_reg);
11936 %}
11937
11938 // Shift Left by 8-bit immediate
11939 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11940 %{
11941 predicate(!UseAPX);
11942 match(Set dst (LShiftI dst shift));
11943 effect(KILL cr);
11944
11945 format %{ "sall $dst, $shift" %}
11946 ins_encode %{
11947 __ sall($dst$$Register, $shift$$constant);
11948 %}
11949 ins_pipe(ialu_reg);
11950 %}
11951
11952 // Shift Left by 8-bit immediate
11953 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11954 %{
11955 predicate(UseAPX);
11956 match(Set dst (LShiftI src shift));
11957 effect(KILL cr);
11958 flag(PD::Flag_ndd_demotable_opr1);
11959
11960 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11961 ins_encode %{
11962 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11963 %}
11964 ins_pipe(ialu_reg);
11965 %}
11966
11967 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11968 %{
11969 predicate(UseAPX);
11970 match(Set dst (LShiftI (LoadI src) shift));
11971 effect(KILL cr);
11972
11973 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11974 ins_encode %{
11975 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11976 %}
11977 ins_pipe(ialu_reg);
11978 %}
11979
11980 // Shift Left by 8-bit immediate
11981 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11982 %{
11983 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11984 effect(KILL cr);
11985
11986 format %{ "sall $dst, $shift" %}
11987 ins_encode %{
11988 __ sall($dst$$Address, $shift$$constant);
11989 %}
11990 ins_pipe(ialu_mem_imm);
11991 %}
11992
11993 // Shift Left by variable
11994 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11995 %{
11996 predicate(!VM_Version::supports_bmi2());
11997 match(Set dst (LShiftI dst shift));
11998 effect(KILL cr);
11999
12000 format %{ "sall $dst, $shift" %}
12001 ins_encode %{
12002 __ sall($dst$$Register);
12003 %}
12004 ins_pipe(ialu_reg_reg);
12005 %}
12006
12007 // Shift Left by variable
12008 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12009 %{
12010 predicate(!VM_Version::supports_bmi2());
12011 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12012 effect(KILL cr);
12013
12014 format %{ "sall $dst, $shift" %}
12015 ins_encode %{
12016 __ sall($dst$$Address);
12017 %}
12018 ins_pipe(ialu_mem_reg);
12019 %}
12020
12021 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12022 %{
12023 predicate(VM_Version::supports_bmi2());
12024 match(Set dst (LShiftI src shift));
12025
12026 format %{ "shlxl $dst, $src, $shift" %}
12027 ins_encode %{
12028 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12029 %}
12030 ins_pipe(ialu_reg_reg);
12031 %}
12032
12033 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12034 %{
12035 predicate(VM_Version::supports_bmi2());
12036 match(Set dst (LShiftI (LoadI src) shift));
12037 ins_cost(175);
12038 format %{ "shlxl $dst, $src, $shift" %}
12039 ins_encode %{
12040 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12041 %}
12042 ins_pipe(ialu_reg_mem);
12043 %}
12044
12045 // Arithmetic Shift Right by 8-bit immediate
12046 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12047 %{
12048 predicate(!UseAPX);
12049 match(Set dst (RShiftI dst shift));
12050 effect(KILL cr);
12051
12052 format %{ "sarl $dst, $shift" %}
12053 ins_encode %{
12054 __ sarl($dst$$Register, $shift$$constant);
12055 %}
12056 ins_pipe(ialu_mem_imm);
12057 %}
12058
12059 // Arithmetic Shift Right by 8-bit immediate
12060 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12061 %{
12062 predicate(UseAPX);
12063 match(Set dst (RShiftI src shift));
12064 effect(KILL cr);
12065 flag(PD::Flag_ndd_demotable_opr1);
12066
12067 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12068 ins_encode %{
12069 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12070 %}
12071 ins_pipe(ialu_mem_imm);
12072 %}
12073
12074 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12075 %{
12076 predicate(UseAPX);
12077 match(Set dst (RShiftI (LoadI src) shift));
12078 effect(KILL cr);
12079
12080 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12081 ins_encode %{
12082 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12083 %}
12084 ins_pipe(ialu_mem_imm);
12085 %}
12086
12087 // Arithmetic Shift Right by 8-bit immediate
12088 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12089 %{
12090 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12091 effect(KILL cr);
12092
12093 format %{ "sarl $dst, $shift" %}
12094 ins_encode %{
12095 __ sarl($dst$$Address, $shift$$constant);
12096 %}
12097 ins_pipe(ialu_mem_imm);
12098 %}
12099
12100 // Arithmetic Shift Right by variable
12101 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12102 %{
12103 predicate(!VM_Version::supports_bmi2());
12104 match(Set dst (RShiftI dst shift));
12105 effect(KILL cr);
12106
12107 format %{ "sarl $dst, $shift" %}
12108 ins_encode %{
12109 __ sarl($dst$$Register);
12110 %}
12111 ins_pipe(ialu_reg_reg);
12112 %}
12113
12114 // Arithmetic Shift Right by variable
12115 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12116 %{
12117 predicate(!VM_Version::supports_bmi2());
12118 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12119 effect(KILL cr);
12120
12121 format %{ "sarl $dst, $shift" %}
12122 ins_encode %{
12123 __ sarl($dst$$Address);
12124 %}
12125 ins_pipe(ialu_mem_reg);
12126 %}
12127
12128 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12129 %{
12130 predicate(VM_Version::supports_bmi2());
12131 match(Set dst (RShiftI src shift));
12132
12133 format %{ "sarxl $dst, $src, $shift" %}
12134 ins_encode %{
12135 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12136 %}
12137 ins_pipe(ialu_reg_reg);
12138 %}
12139
12140 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12141 %{
12142 predicate(VM_Version::supports_bmi2());
12143 match(Set dst (RShiftI (LoadI src) shift));
12144 ins_cost(175);
12145 format %{ "sarxl $dst, $src, $shift" %}
12146 ins_encode %{
12147 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12148 %}
12149 ins_pipe(ialu_reg_mem);
12150 %}
12151
12152 // Logical Shift Right by 8-bit immediate
12153 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12154 %{
12155 predicate(!UseAPX);
12156 match(Set dst (URShiftI dst shift));
12157 effect(KILL cr);
12158
12159 format %{ "shrl $dst, $shift" %}
12160 ins_encode %{
12161 __ shrl($dst$$Register, $shift$$constant);
12162 %}
12163 ins_pipe(ialu_reg);
12164 %}
12165
12166 // Logical Shift Right by 8-bit immediate
12167 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12168 %{
12169 predicate(UseAPX);
12170 match(Set dst (URShiftI src shift));
12171 effect(KILL cr);
12172 flag(PD::Flag_ndd_demotable_opr1);
12173
12174 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12175 ins_encode %{
12176 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12177 %}
12178 ins_pipe(ialu_reg);
12179 %}
12180
12181 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12182 %{
12183 predicate(UseAPX);
12184 match(Set dst (URShiftI (LoadI src) shift));
12185 effect(KILL cr);
12186
12187 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12188 ins_encode %{
12189 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12190 %}
12191 ins_pipe(ialu_reg);
12192 %}
12193
12194 // Logical Shift Right by 8-bit immediate
12195 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12196 %{
12197 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12198 effect(KILL cr);
12199
12200 format %{ "shrl $dst, $shift" %}
12201 ins_encode %{
12202 __ shrl($dst$$Address, $shift$$constant);
12203 %}
12204 ins_pipe(ialu_mem_imm);
12205 %}
12206
12207 // Logical Shift Right by variable
12208 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12209 %{
12210 predicate(!VM_Version::supports_bmi2());
12211 match(Set dst (URShiftI dst shift));
12212 effect(KILL cr);
12213
12214 format %{ "shrl $dst, $shift" %}
12215 ins_encode %{
12216 __ shrl($dst$$Register);
12217 %}
12218 ins_pipe(ialu_reg_reg);
12219 %}
12220
12221 // Logical Shift Right by variable
12222 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12223 %{
12224 predicate(!VM_Version::supports_bmi2());
12225 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12226 effect(KILL cr);
12227
12228 format %{ "shrl $dst, $shift" %}
12229 ins_encode %{
12230 __ shrl($dst$$Address);
12231 %}
12232 ins_pipe(ialu_mem_reg);
12233 %}
12234
12235 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12236 %{
12237 predicate(VM_Version::supports_bmi2());
12238 match(Set dst (URShiftI src shift));
12239
12240 format %{ "shrxl $dst, $src, $shift" %}
12241 ins_encode %{
12242 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12243 %}
12244 ins_pipe(ialu_reg_reg);
12245 %}
12246
12247 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12248 %{
12249 predicate(VM_Version::supports_bmi2());
12250 match(Set dst (URShiftI (LoadI src) shift));
12251 ins_cost(175);
12252 format %{ "shrxl $dst, $src, $shift" %}
12253 ins_encode %{
12254 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12255 %}
12256 ins_pipe(ialu_reg_mem);
12257 %}
12258
12259 // Long Shift Instructions
12260 // Shift Left by one, two, three
12261 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12262 %{
12263 predicate(!UseAPX);
12264 match(Set dst (LShiftL dst shift));
12265 effect(KILL cr);
12266
12267 format %{ "salq $dst, $shift" %}
12268 ins_encode %{
12269 __ salq($dst$$Register, $shift$$constant);
12270 %}
12271 ins_pipe(ialu_reg);
12272 %}
12273
12274 // Shift Left by one, two, three
12275 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12276 %{
12277 predicate(UseAPX);
12278 match(Set dst (LShiftL src shift));
12279 effect(KILL cr);
12280 flag(PD::Flag_ndd_demotable_opr1);
12281
12282 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12283 ins_encode %{
12284 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12285 %}
12286 ins_pipe(ialu_reg);
12287 %}
12288
12289 // Shift Left by 8-bit immediate
12290 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12291 %{
12292 predicate(!UseAPX);
12293 match(Set dst (LShiftL dst shift));
12294 effect(KILL cr);
12295
12296 format %{ "salq $dst, $shift" %}
12297 ins_encode %{
12298 __ salq($dst$$Register, $shift$$constant);
12299 %}
12300 ins_pipe(ialu_reg);
12301 %}
12302
12303 // Shift Left by 8-bit immediate
12304 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12305 %{
12306 predicate(UseAPX);
12307 match(Set dst (LShiftL src shift));
12308 effect(KILL cr);
12309 flag(PD::Flag_ndd_demotable_opr1);
12310
12311 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12312 ins_encode %{
12313 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12314 %}
12315 ins_pipe(ialu_reg);
12316 %}
12317
12318 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12319 %{
12320 predicate(UseAPX);
12321 match(Set dst (LShiftL (LoadL src) shift));
12322 effect(KILL cr);
12323
12324 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12325 ins_encode %{
12326 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12327 %}
12328 ins_pipe(ialu_reg);
12329 %}
12330
12331 // Shift Left by 8-bit immediate
12332 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12333 %{
12334 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12335 effect(KILL cr);
12336
12337 format %{ "salq $dst, $shift" %}
12338 ins_encode %{
12339 __ salq($dst$$Address, $shift$$constant);
12340 %}
12341 ins_pipe(ialu_mem_imm);
12342 %}
12343
12344 // Shift Left by variable
12345 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12346 %{
12347 predicate(!VM_Version::supports_bmi2());
12348 match(Set dst (LShiftL dst shift));
12349 effect(KILL cr);
12350
12351 format %{ "salq $dst, $shift" %}
12352 ins_encode %{
12353 __ salq($dst$$Register);
12354 %}
12355 ins_pipe(ialu_reg_reg);
12356 %}
12357
12358 // Shift Left by variable
12359 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12360 %{
12361 predicate(!VM_Version::supports_bmi2());
12362 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12363 effect(KILL cr);
12364
12365 format %{ "salq $dst, $shift" %}
12366 ins_encode %{
12367 __ salq($dst$$Address);
12368 %}
12369 ins_pipe(ialu_mem_reg);
12370 %}
12371
12372 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12373 %{
12374 predicate(VM_Version::supports_bmi2());
12375 match(Set dst (LShiftL src shift));
12376
12377 format %{ "shlxq $dst, $src, $shift" %}
12378 ins_encode %{
12379 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12380 %}
12381 ins_pipe(ialu_reg_reg);
12382 %}
12383
12384 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12385 %{
12386 predicate(VM_Version::supports_bmi2());
12387 match(Set dst (LShiftL (LoadL src) shift));
12388 ins_cost(175);
12389 format %{ "shlxq $dst, $src, $shift" %}
12390 ins_encode %{
12391 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12392 %}
12393 ins_pipe(ialu_reg_mem);
12394 %}
12395
12396 // Arithmetic Shift Right by 8-bit immediate
12397 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12398 %{
12399 predicate(!UseAPX);
12400 match(Set dst (RShiftL dst shift));
12401 effect(KILL cr);
12402
12403 format %{ "sarq $dst, $shift" %}
12404 ins_encode %{
12405 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12406 %}
12407 ins_pipe(ialu_mem_imm);
12408 %}
12409
12410 // Arithmetic Shift Right by 8-bit immediate
12411 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12412 %{
12413 predicate(UseAPX);
12414 match(Set dst (RShiftL src shift));
12415 effect(KILL cr);
12416 flag(PD::Flag_ndd_demotable_opr1);
12417
12418 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12419 ins_encode %{
12420 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12421 %}
12422 ins_pipe(ialu_mem_imm);
12423 %}
12424
12425 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12426 %{
12427 predicate(UseAPX);
12428 match(Set dst (RShiftL (LoadL src) shift));
12429 effect(KILL cr);
12430
12431 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12432 ins_encode %{
12433 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12434 %}
12435 ins_pipe(ialu_mem_imm);
12436 %}
12437
12438 // Arithmetic Shift Right by 8-bit immediate
12439 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12440 %{
12441 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12442 effect(KILL cr);
12443
12444 format %{ "sarq $dst, $shift" %}
12445 ins_encode %{
12446 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12447 %}
12448 ins_pipe(ialu_mem_imm);
12449 %}
12450
12451 // Arithmetic Shift Right by variable
12452 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12453 %{
12454 predicate(!VM_Version::supports_bmi2());
12455 match(Set dst (RShiftL dst shift));
12456 effect(KILL cr);
12457
12458 format %{ "sarq $dst, $shift" %}
12459 ins_encode %{
12460 __ sarq($dst$$Register);
12461 %}
12462 ins_pipe(ialu_reg_reg);
12463 %}
12464
12465 // Arithmetic Shift Right by variable
12466 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12467 %{
12468 predicate(!VM_Version::supports_bmi2());
12469 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12470 effect(KILL cr);
12471
12472 format %{ "sarq $dst, $shift" %}
12473 ins_encode %{
12474 __ sarq($dst$$Address);
12475 %}
12476 ins_pipe(ialu_mem_reg);
12477 %}
12478
12479 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12480 %{
12481 predicate(VM_Version::supports_bmi2());
12482 match(Set dst (RShiftL src shift));
12483
12484 format %{ "sarxq $dst, $src, $shift" %}
12485 ins_encode %{
12486 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12487 %}
12488 ins_pipe(ialu_reg_reg);
12489 %}
12490
12491 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12492 %{
12493 predicate(VM_Version::supports_bmi2());
12494 match(Set dst (RShiftL (LoadL src) shift));
12495 ins_cost(175);
12496 format %{ "sarxq $dst, $src, $shift" %}
12497 ins_encode %{
12498 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12499 %}
12500 ins_pipe(ialu_reg_mem);
12501 %}
12502
12503 // Logical Shift Right by 8-bit immediate
12504 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12505 %{
12506 predicate(!UseAPX);
12507 match(Set dst (URShiftL dst shift));
12508 effect(KILL cr);
12509
12510 format %{ "shrq $dst, $shift" %}
12511 ins_encode %{
12512 __ shrq($dst$$Register, $shift$$constant);
12513 %}
12514 ins_pipe(ialu_reg);
12515 %}
12516
12517 // Logical Shift Right by 8-bit immediate
12518 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12519 %{
12520 predicate(UseAPX);
12521 match(Set dst (URShiftL src shift));
12522 effect(KILL cr);
12523 flag(PD::Flag_ndd_demotable_opr1);
12524
12525 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12526 ins_encode %{
12527 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12528 %}
12529 ins_pipe(ialu_reg);
12530 %}
12531
12532 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12533 %{
12534 predicate(UseAPX);
12535 match(Set dst (URShiftL (LoadL src) shift));
12536 effect(KILL cr);
12537
12538 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12539 ins_encode %{
12540 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12541 %}
12542 ins_pipe(ialu_reg);
12543 %}
12544
12545 // Logical Shift Right by 8-bit immediate
12546 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12547 %{
12548 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12549 effect(KILL cr);
12550
12551 format %{ "shrq $dst, $shift" %}
12552 ins_encode %{
12553 __ shrq($dst$$Address, $shift$$constant);
12554 %}
12555 ins_pipe(ialu_mem_imm);
12556 %}
12557
12558 // Logical Shift Right by variable
12559 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12560 %{
12561 predicate(!VM_Version::supports_bmi2());
12562 match(Set dst (URShiftL dst shift));
12563 effect(KILL cr);
12564
12565 format %{ "shrq $dst, $shift" %}
12566 ins_encode %{
12567 __ shrq($dst$$Register);
12568 %}
12569 ins_pipe(ialu_reg_reg);
12570 %}
12571
12572 // Logical Shift Right by variable
12573 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12574 %{
12575 predicate(!VM_Version::supports_bmi2());
12576 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12577 effect(KILL cr);
12578
12579 format %{ "shrq $dst, $shift" %}
12580 ins_encode %{
12581 __ shrq($dst$$Address);
12582 %}
12583 ins_pipe(ialu_mem_reg);
12584 %}
12585
12586 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12587 %{
12588 predicate(VM_Version::supports_bmi2());
12589 match(Set dst (URShiftL src shift));
12590
12591 format %{ "shrxq $dst, $src, $shift" %}
12592 ins_encode %{
12593 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12594 %}
12595 ins_pipe(ialu_reg_reg);
12596 %}
12597
12598 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12599 %{
12600 predicate(VM_Version::supports_bmi2());
12601 match(Set dst (URShiftL (LoadL src) shift));
12602 ins_cost(175);
12603 format %{ "shrxq $dst, $src, $shift" %}
12604 ins_encode %{
12605 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12606 %}
12607 ins_pipe(ialu_reg_mem);
12608 %}
12609
12610 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12611 // This idiom is used by the compiler for the i2b bytecode.
12612 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12613 %{
12614 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12615
12616 format %{ "movsbl $dst, $src\t# i2b" %}
12617 ins_encode %{
12618 __ movsbl($dst$$Register, $src$$Register);
12619 %}
12620 ins_pipe(ialu_reg_reg);
12621 %}
12622
12623 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12624 // This idiom is used by the compiler the i2s bytecode.
12625 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12626 %{
12627 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12628
12629 format %{ "movswl $dst, $src\t# i2s" %}
12630 ins_encode %{
12631 __ movswl($dst$$Register, $src$$Register);
12632 %}
12633 ins_pipe(ialu_reg_reg);
12634 %}
12635
12636 // ROL/ROR instructions
12637
12638 // Rotate left by constant.
12639 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12640 %{
12641 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12642 match(Set dst (RotateLeft dst shift));
12643 effect(KILL cr);
12644 format %{ "roll $dst, $shift" %}
12645 ins_encode %{
12646 __ roll($dst$$Register, $shift$$constant);
12647 %}
12648 ins_pipe(ialu_reg);
12649 %}
12650
12651 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12652 %{
12653 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12654 match(Set dst (RotateLeft src shift));
12655 format %{ "rolxl $dst, $src, $shift" %}
12656 ins_encode %{
12657 int shift = 32 - ($shift$$constant & 31);
12658 __ rorxl($dst$$Register, $src$$Register, shift);
12659 %}
12660 ins_pipe(ialu_reg_reg);
12661 %}
12662
12663 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12664 %{
12665 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12666 match(Set dst (RotateLeft (LoadI src) shift));
12667 ins_cost(175);
12668 format %{ "rolxl $dst, $src, $shift" %}
12669 ins_encode %{
12670 int shift = 32 - ($shift$$constant & 31);
12671 __ rorxl($dst$$Register, $src$$Address, shift);
12672 %}
12673 ins_pipe(ialu_reg_mem);
12674 %}
12675
12676 // Rotate Left by variable
12677 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12678 %{
12679 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12680 match(Set dst (RotateLeft dst shift));
12681 effect(KILL cr);
12682 format %{ "roll $dst, $shift" %}
12683 ins_encode %{
12684 __ roll($dst$$Register);
12685 %}
12686 ins_pipe(ialu_reg_reg);
12687 %}
12688
12689 // Rotate Left by variable
12690 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12691 %{
12692 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12693 match(Set dst (RotateLeft src shift));
12694 effect(KILL cr);
12695 flag(PD::Flag_ndd_demotable_opr1);
12696
12697 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12698 ins_encode %{
12699 __ eroll($dst$$Register, $src$$Register, false);
12700 %}
12701 ins_pipe(ialu_reg_reg);
12702 %}
12703
12704 // Rotate Right by constant.
12705 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12706 %{
12707 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12708 match(Set dst (RotateRight dst shift));
12709 effect(KILL cr);
12710 format %{ "rorl $dst, $shift" %}
12711 ins_encode %{
12712 __ rorl($dst$$Register, $shift$$constant);
12713 %}
12714 ins_pipe(ialu_reg);
12715 %}
12716
12717 // Rotate Right by constant.
12718 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12719 %{
12720 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12721 match(Set dst (RotateRight src shift));
12722 format %{ "rorxl $dst, $src, $shift" %}
12723 ins_encode %{
12724 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12725 %}
12726 ins_pipe(ialu_reg_reg);
12727 %}
12728
12729 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12730 %{
12731 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12732 match(Set dst (RotateRight (LoadI src) shift));
12733 ins_cost(175);
12734 format %{ "rorxl $dst, $src, $shift" %}
12735 ins_encode %{
12736 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12737 %}
12738 ins_pipe(ialu_reg_mem);
12739 %}
12740
12741 // Rotate Right by variable
12742 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12743 %{
12744 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12745 match(Set dst (RotateRight dst shift));
12746 effect(KILL cr);
12747 format %{ "rorl $dst, $shift" %}
12748 ins_encode %{
12749 __ rorl($dst$$Register);
12750 %}
12751 ins_pipe(ialu_reg_reg);
12752 %}
12753
12754 // Rotate Right by variable
12755 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12756 %{
12757 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12758 match(Set dst (RotateRight src shift));
12759 effect(KILL cr);
12760 flag(PD::Flag_ndd_demotable_opr1);
12761
12762 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12763 ins_encode %{
12764 __ erorl($dst$$Register, $src$$Register, false);
12765 %}
12766 ins_pipe(ialu_reg_reg);
12767 %}
12768
12769 // Rotate Left by constant.
12770 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12771 %{
12772 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12773 match(Set dst (RotateLeft dst shift));
12774 effect(KILL cr);
12775 format %{ "rolq $dst, $shift" %}
12776 ins_encode %{
12777 __ rolq($dst$$Register, $shift$$constant);
12778 %}
12779 ins_pipe(ialu_reg);
12780 %}
12781
12782 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12783 %{
12784 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12785 match(Set dst (RotateLeft src shift));
12786 format %{ "rolxq $dst, $src, $shift" %}
12787 ins_encode %{
12788 int shift = 64 - ($shift$$constant & 63);
12789 __ rorxq($dst$$Register, $src$$Register, shift);
12790 %}
12791 ins_pipe(ialu_reg_reg);
12792 %}
12793
12794 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12795 %{
12796 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12797 match(Set dst (RotateLeft (LoadL src) shift));
12798 ins_cost(175);
12799 format %{ "rolxq $dst, $src, $shift" %}
12800 ins_encode %{
12801 int shift = 64 - ($shift$$constant & 63);
12802 __ rorxq($dst$$Register, $src$$Address, shift);
12803 %}
12804 ins_pipe(ialu_reg_mem);
12805 %}
12806
12807 // Rotate Left by variable
12808 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12809 %{
12810 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12811 match(Set dst (RotateLeft dst shift));
12812 effect(KILL cr);
12813
12814 format %{ "rolq $dst, $shift" %}
12815 ins_encode %{
12816 __ rolq($dst$$Register);
12817 %}
12818 ins_pipe(ialu_reg_reg);
12819 %}
12820
12821 // Rotate Left by variable
12822 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12823 %{
12824 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12825 match(Set dst (RotateLeft src shift));
12826 effect(KILL cr);
12827 flag(PD::Flag_ndd_demotable_opr1);
12828
12829 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12830 ins_encode %{
12831 __ erolq($dst$$Register, $src$$Register, false);
12832 %}
12833 ins_pipe(ialu_reg_reg);
12834 %}
12835
12836 // Rotate Right by constant.
12837 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12838 %{
12839 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12840 match(Set dst (RotateRight dst shift));
12841 effect(KILL cr);
12842 format %{ "rorq $dst, $shift" %}
12843 ins_encode %{
12844 __ rorq($dst$$Register, $shift$$constant);
12845 %}
12846 ins_pipe(ialu_reg);
12847 %}
12848
12849 // Rotate Right by constant
12850 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12851 %{
12852 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12853 match(Set dst (RotateRight src shift));
12854 format %{ "rorxq $dst, $src, $shift" %}
12855 ins_encode %{
12856 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12857 %}
12858 ins_pipe(ialu_reg_reg);
12859 %}
12860
12861 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12862 %{
12863 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12864 match(Set dst (RotateRight (LoadL src) shift));
12865 ins_cost(175);
12866 format %{ "rorxq $dst, $src, $shift" %}
12867 ins_encode %{
12868 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12869 %}
12870 ins_pipe(ialu_reg_mem);
12871 %}
12872
12873 // Rotate Right by variable
12874 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12875 %{
12876 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12877 match(Set dst (RotateRight dst shift));
12878 effect(KILL cr);
12879 format %{ "rorq $dst, $shift" %}
12880 ins_encode %{
12881 __ rorq($dst$$Register);
12882 %}
12883 ins_pipe(ialu_reg_reg);
12884 %}
12885
12886 // Rotate Right by variable
12887 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12888 %{
12889 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12890 match(Set dst (RotateRight src shift));
12891 effect(KILL cr);
12892 flag(PD::Flag_ndd_demotable_opr1);
12893
12894 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12895 ins_encode %{
12896 __ erorq($dst$$Register, $src$$Register, false);
12897 %}
12898 ins_pipe(ialu_reg_reg);
12899 %}
12900
12901 //----------------------------- CompressBits/ExpandBits ------------------------
12902
12903 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12904 predicate(n->bottom_type()->isa_long());
12905 match(Set dst (CompressBits src mask));
12906 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12907 ins_encode %{
12908 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12909 %}
12910 ins_pipe( pipe_slow );
12911 %}
12912
12913 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12914 predicate(n->bottom_type()->isa_long());
12915 match(Set dst (ExpandBits src mask));
12916 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12917 ins_encode %{
12918 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12919 %}
12920 ins_pipe( pipe_slow );
12921 %}
12922
12923 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12924 predicate(n->bottom_type()->isa_long());
12925 match(Set dst (CompressBits src (LoadL mask)));
12926 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12927 ins_encode %{
12928 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12929 %}
12930 ins_pipe( pipe_slow );
12931 %}
12932
12933 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12934 predicate(n->bottom_type()->isa_long());
12935 match(Set dst (ExpandBits src (LoadL mask)));
12936 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12937 ins_encode %{
12938 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12939 %}
12940 ins_pipe( pipe_slow );
12941 %}
12942
12943
12944 // Logical Instructions
12945
12946 // Integer Logical Instructions
12947
12948 // And Instructions
12949 // And Register with Register
12950 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12951 %{
12952 predicate(!UseAPX);
12953 match(Set dst (AndI dst src));
12954 effect(KILL cr);
12955 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12956
12957 format %{ "andl $dst, $src\t# int" %}
12958 ins_encode %{
12959 __ andl($dst$$Register, $src$$Register);
12960 %}
12961 ins_pipe(ialu_reg_reg);
12962 %}
12963
12964 // And Register with Register using New Data Destination (NDD)
12965 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12966 %{
12967 predicate(UseAPX);
12968 match(Set dst (AndI src1 src2));
12969 effect(KILL cr);
12970 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12971
12972 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12973 ins_encode %{
12974 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12975
12976 %}
12977 ins_pipe(ialu_reg_reg);
12978 %}
12979
12980 // And Register with Immediate 255
12981 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12982 %{
12983 match(Set dst (AndI src mask));
12984
12985 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12986 ins_encode %{
12987 __ movzbl($dst$$Register, $src$$Register);
12988 %}
12989 ins_pipe(ialu_reg);
12990 %}
12991
12992 // And Register with Immediate 255 and promote to long
12993 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12994 %{
12995 match(Set dst (ConvI2L (AndI src mask)));
12996
12997 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12998 ins_encode %{
12999 __ movzbl($dst$$Register, $src$$Register);
13000 %}
13001 ins_pipe(ialu_reg);
13002 %}
13003
13004 // And Register with Immediate 65535
13005 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13006 %{
13007 match(Set dst (AndI src mask));
13008
13009 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13010 ins_encode %{
13011 __ movzwl($dst$$Register, $src$$Register);
13012 %}
13013 ins_pipe(ialu_reg);
13014 %}
13015
13016 // And Register with Immediate 65535 and promote to long
13017 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13018 %{
13019 match(Set dst (ConvI2L (AndI src mask)));
13020
13021 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13022 ins_encode %{
13023 __ movzwl($dst$$Register, $src$$Register);
13024 %}
13025 ins_pipe(ialu_reg);
13026 %}
13027
13028 // Can skip int2long conversions after AND with small bitmask
13029 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13030 %{
13031 predicate(VM_Version::supports_bmi2());
13032 ins_cost(125);
13033 effect(TEMP tmp, KILL cr);
13034 match(Set dst (ConvI2L (AndI src mask)));
13035 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13036 ins_encode %{
13037 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13038 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13039 %}
13040 ins_pipe(ialu_reg_reg);
13041 %}
13042
13043 // And Register with Immediate
13044 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13045 %{
13046 predicate(!UseAPX);
13047 match(Set dst (AndI dst src));
13048 effect(KILL cr);
13049 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13050
13051 format %{ "andl $dst, $src\t# int" %}
13052 ins_encode %{
13053 __ andl($dst$$Register, $src$$constant);
13054 %}
13055 ins_pipe(ialu_reg);
13056 %}
13057
13058 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13059 %{
13060 predicate(UseAPX);
13061 match(Set dst (AndI src1 src2));
13062 effect(KILL cr);
13063 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13064
13065 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13066 ins_encode %{
13067 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13068 %}
13069 ins_pipe(ialu_reg);
13070 %}
13071
13072 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13073 %{
13074 predicate(UseAPX);
13075 match(Set dst (AndI (LoadI src1) src2));
13076 effect(KILL cr);
13077 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13078
13079 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13080 ins_encode %{
13081 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13082 %}
13083 ins_pipe(ialu_reg);
13084 %}
13085
13086 // And Register with Memory
13087 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13088 %{
13089 predicate(!UseAPX);
13090 match(Set dst (AndI dst (LoadI src)));
13091 effect(KILL cr);
13092 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13093
13094 ins_cost(150);
13095 format %{ "andl $dst, $src\t# int" %}
13096 ins_encode %{
13097 __ andl($dst$$Register, $src$$Address);
13098 %}
13099 ins_pipe(ialu_reg_mem);
13100 %}
13101
13102 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13103 %{
13104 predicate(UseAPX);
13105 match(Set dst (AndI src1 (LoadI src2)));
13106 effect(KILL cr);
13107 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13108
13109 ins_cost(150);
13110 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13111 ins_encode %{
13112 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13113 %}
13114 ins_pipe(ialu_reg_mem);
13115 %}
13116
13117 // And Memory with Register
13118 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13119 %{
13120 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13121 effect(KILL cr);
13122 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13123
13124 ins_cost(150);
13125 format %{ "andb $dst, $src\t# byte" %}
13126 ins_encode %{
13127 __ andb($dst$$Address, $src$$Register);
13128 %}
13129 ins_pipe(ialu_mem_reg);
13130 %}
13131
13132 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13133 %{
13134 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13135 effect(KILL cr);
13136 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13137
13138 ins_cost(150);
13139 format %{ "andl $dst, $src\t# int" %}
13140 ins_encode %{
13141 __ andl($dst$$Address, $src$$Register);
13142 %}
13143 ins_pipe(ialu_mem_reg);
13144 %}
13145
13146 // And Memory with Immediate
13147 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13148 %{
13149 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13150 effect(KILL cr);
13151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13152
13153 ins_cost(125);
13154 format %{ "andl $dst, $src\t# int" %}
13155 ins_encode %{
13156 __ andl($dst$$Address, $src$$constant);
13157 %}
13158 ins_pipe(ialu_mem_imm);
13159 %}
13160
13161 // BMI1 instructions
13162 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13163 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13164 predicate(UseBMI1Instructions);
13165 effect(KILL cr);
13166 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13167
13168 ins_cost(125);
13169 format %{ "andnl $dst, $src1, $src2" %}
13170
13171 ins_encode %{
13172 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13173 %}
13174 ins_pipe(ialu_reg_mem);
13175 %}
13176
13177 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13178 match(Set dst (AndI (XorI src1 minus_1) src2));
13179 predicate(UseBMI1Instructions);
13180 effect(KILL cr);
13181 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182
13183 format %{ "andnl $dst, $src1, $src2" %}
13184
13185 ins_encode %{
13186 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13187 %}
13188 ins_pipe(ialu_reg);
13189 %}
13190
13191 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13192 match(Set dst (AndI (SubI imm_zero src) src));
13193 predicate(UseBMI1Instructions);
13194 effect(KILL cr);
13195 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13196
13197 format %{ "blsil $dst, $src" %}
13198
13199 ins_encode %{
13200 __ blsil($dst$$Register, $src$$Register);
13201 %}
13202 ins_pipe(ialu_reg);
13203 %}
13204
13205 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13206 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13207 predicate(UseBMI1Instructions);
13208 effect(KILL cr);
13209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13210
13211 ins_cost(125);
13212 format %{ "blsil $dst, $src" %}
13213
13214 ins_encode %{
13215 __ blsil($dst$$Register, $src$$Address);
13216 %}
13217 ins_pipe(ialu_reg_mem);
13218 %}
13219
13220 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13221 %{
13222 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13223 predicate(UseBMI1Instructions);
13224 effect(KILL cr);
13225 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13226
13227 ins_cost(125);
13228 format %{ "blsmskl $dst, $src" %}
13229
13230 ins_encode %{
13231 __ blsmskl($dst$$Register, $src$$Address);
13232 %}
13233 ins_pipe(ialu_reg_mem);
13234 %}
13235
13236 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13237 %{
13238 match(Set dst (XorI (AddI src minus_1) src));
13239 predicate(UseBMI1Instructions);
13240 effect(KILL cr);
13241 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13242
13243 format %{ "blsmskl $dst, $src" %}
13244
13245 ins_encode %{
13246 __ blsmskl($dst$$Register, $src$$Register);
13247 %}
13248
13249 ins_pipe(ialu_reg);
13250 %}
13251
13252 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13253 %{
13254 match(Set dst (AndI (AddI src minus_1) src) );
13255 predicate(UseBMI1Instructions);
13256 effect(KILL cr);
13257 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13258
13259 format %{ "blsrl $dst, $src" %}
13260
13261 ins_encode %{
13262 __ blsrl($dst$$Register, $src$$Register);
13263 %}
13264
13265 ins_pipe(ialu_reg_mem);
13266 %}
13267
13268 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13269 %{
13270 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13271 predicate(UseBMI1Instructions);
13272 effect(KILL cr);
13273 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13274
13275 ins_cost(125);
13276 format %{ "blsrl $dst, $src" %}
13277
13278 ins_encode %{
13279 __ blsrl($dst$$Register, $src$$Address);
13280 %}
13281
13282 ins_pipe(ialu_reg);
13283 %}
13284
13285 // Or Instructions
13286 // Or Register with Register
13287 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13288 %{
13289 predicate(!UseAPX);
13290 match(Set dst (OrI dst src));
13291 effect(KILL cr);
13292 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13293
13294 format %{ "orl $dst, $src\t# int" %}
13295 ins_encode %{
13296 __ orl($dst$$Register, $src$$Register);
13297 %}
13298 ins_pipe(ialu_reg_reg);
13299 %}
13300
13301 // Or Register with Register using New Data Destination (NDD)
13302 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13303 %{
13304 predicate(UseAPX);
13305 match(Set dst (OrI src1 src2));
13306 effect(KILL cr);
13307 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13308
13309 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13310 ins_encode %{
13311 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13312 %}
13313 ins_pipe(ialu_reg_reg);
13314 %}
13315
13316 // Or Register with Immediate
13317 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13318 %{
13319 predicate(!UseAPX);
13320 match(Set dst (OrI dst src));
13321 effect(KILL cr);
13322 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13323
13324 format %{ "orl $dst, $src\t# int" %}
13325 ins_encode %{
13326 __ orl($dst$$Register, $src$$constant);
13327 %}
13328 ins_pipe(ialu_reg);
13329 %}
13330
13331 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13332 %{
13333 predicate(UseAPX);
13334 match(Set dst (OrI src1 src2));
13335 effect(KILL cr);
13336 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13337
13338 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13339 ins_encode %{
13340 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13341 %}
13342 ins_pipe(ialu_reg);
13343 %}
13344
13345 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13346 %{
13347 predicate(UseAPX);
13348 match(Set dst (OrI src1 src2));
13349 effect(KILL cr);
13350 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13351
13352 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13353 ins_encode %{
13354 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13355 %}
13356 ins_pipe(ialu_reg);
13357 %}
13358
13359 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13360 %{
13361 predicate(UseAPX);
13362 match(Set dst (OrI (LoadI src1) src2));
13363 effect(KILL cr);
13364 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13365
13366 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13367 ins_encode %{
13368 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13369 %}
13370 ins_pipe(ialu_reg);
13371 %}
13372
13373 // Or Register with Memory
13374 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13375 %{
13376 predicate(!UseAPX);
13377 match(Set dst (OrI dst (LoadI src)));
13378 effect(KILL cr);
13379 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13380
13381 ins_cost(150);
13382 format %{ "orl $dst, $src\t# int" %}
13383 ins_encode %{
13384 __ orl($dst$$Register, $src$$Address);
13385 %}
13386 ins_pipe(ialu_reg_mem);
13387 %}
13388
13389 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13390 %{
13391 predicate(UseAPX);
13392 match(Set dst (OrI src1 (LoadI src2)));
13393 effect(KILL cr);
13394 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13395
13396 ins_cost(150);
13397 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13398 ins_encode %{
13399 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13400 %}
13401 ins_pipe(ialu_reg_mem);
13402 %}
13403
13404 // Or Memory with Register
13405 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13406 %{
13407 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13408 effect(KILL cr);
13409 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13410
13411 ins_cost(150);
13412 format %{ "orb $dst, $src\t# byte" %}
13413 ins_encode %{
13414 __ orb($dst$$Address, $src$$Register);
13415 %}
13416 ins_pipe(ialu_mem_reg);
13417 %}
13418
13419 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13420 %{
13421 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13422 effect(KILL cr);
13423 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13424
13425 ins_cost(150);
13426 format %{ "orl $dst, $src\t# int" %}
13427 ins_encode %{
13428 __ orl($dst$$Address, $src$$Register);
13429 %}
13430 ins_pipe(ialu_mem_reg);
13431 %}
13432
13433 // Or Memory with Immediate
13434 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13435 %{
13436 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13437 effect(KILL cr);
13438 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439
13440 ins_cost(125);
13441 format %{ "orl $dst, $src\t# int" %}
13442 ins_encode %{
13443 __ orl($dst$$Address, $src$$constant);
13444 %}
13445 ins_pipe(ialu_mem_imm);
13446 %}
13447
13448 // Xor Instructions
13449 // Xor Register with Register
13450 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13451 %{
13452 predicate(!UseAPX);
13453 match(Set dst (XorI dst src));
13454 effect(KILL cr);
13455 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456
13457 format %{ "xorl $dst, $src\t# int" %}
13458 ins_encode %{
13459 __ xorl($dst$$Register, $src$$Register);
13460 %}
13461 ins_pipe(ialu_reg_reg);
13462 %}
13463
13464 // Xor Register with Register using New Data Destination (NDD)
13465 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13466 %{
13467 predicate(UseAPX);
13468 match(Set dst (XorI src1 src2));
13469 effect(KILL cr);
13470 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13471
13472 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13473 ins_encode %{
13474 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13475 %}
13476 ins_pipe(ialu_reg_reg);
13477 %}
13478
13479 // Xor Register with Immediate -1
13480 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13481 %{
13482 predicate(!UseAPX);
13483 match(Set dst (XorI dst imm));
13484
13485 format %{ "notl $dst" %}
13486 ins_encode %{
13487 __ notl($dst$$Register);
13488 %}
13489 ins_pipe(ialu_reg);
13490 %}
13491
13492 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13493 %{
13494 match(Set dst (XorI src imm));
13495 predicate(UseAPX);
13496 flag(PD::Flag_ndd_demotable_opr1);
13497
13498 format %{ "enotl $dst, $src" %}
13499 ins_encode %{
13500 __ enotl($dst$$Register, $src$$Register);
13501 %}
13502 ins_pipe(ialu_reg);
13503 %}
13504
13505 // Xor Register with Immediate
13506 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13507 %{
13508 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13509 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13510 match(Set dst (XorI dst src));
13511 effect(KILL cr);
13512 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13513
13514 format %{ "xorl $dst, $src\t# int" %}
13515 ins_encode %{
13516 __ xorl($dst$$Register, $src$$constant);
13517 %}
13518 ins_pipe(ialu_reg);
13519 %}
13520
13521 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13522 %{
13523 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13524 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13525 match(Set dst (XorI src1 src2));
13526 effect(KILL cr);
13527 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13528
13529 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13530 ins_encode %{
13531 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13532 %}
13533 ins_pipe(ialu_reg);
13534 %}
13535
13536 // Xor Memory with Immediate
13537 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13538 %{
13539 predicate(UseAPX);
13540 match(Set dst (XorI (LoadI src1) src2));
13541 effect(KILL cr);
13542 ins_cost(150);
13543 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13544
13545 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13546 ins_encode %{
13547 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13548 %}
13549 ins_pipe(ialu_reg);
13550 %}
13551
13552 // Xor Register with Memory
13553 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13554 %{
13555 predicate(!UseAPX);
13556 match(Set dst (XorI dst (LoadI src)));
13557 effect(KILL cr);
13558 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13559
13560 ins_cost(150);
13561 format %{ "xorl $dst, $src\t# int" %}
13562 ins_encode %{
13563 __ xorl($dst$$Register, $src$$Address);
13564 %}
13565 ins_pipe(ialu_reg_mem);
13566 %}
13567
13568 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13569 %{
13570 predicate(UseAPX);
13571 match(Set dst (XorI src1 (LoadI src2)));
13572 effect(KILL cr);
13573 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13574
13575 ins_cost(150);
13576 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13577 ins_encode %{
13578 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13579 %}
13580 ins_pipe(ialu_reg_mem);
13581 %}
13582
13583 // Xor Memory with Register
13584 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13585 %{
13586 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13587 effect(KILL cr);
13588 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13589
13590 ins_cost(150);
13591 format %{ "xorb $dst, $src\t# byte" %}
13592 ins_encode %{
13593 __ xorb($dst$$Address, $src$$Register);
13594 %}
13595 ins_pipe(ialu_mem_reg);
13596 %}
13597
13598 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13599 %{
13600 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13601 effect(KILL cr);
13602 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603
13604 ins_cost(150);
13605 format %{ "xorl $dst, $src\t# int" %}
13606 ins_encode %{
13607 __ xorl($dst$$Address, $src$$Register);
13608 %}
13609 ins_pipe(ialu_mem_reg);
13610 %}
13611
13612 // Xor Memory with Immediate
13613 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13614 %{
13615 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13616 effect(KILL cr);
13617 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13618
13619 ins_cost(125);
13620 format %{ "xorl $dst, $src\t# int" %}
13621 ins_encode %{
13622 __ xorl($dst$$Address, $src$$constant);
13623 %}
13624 ins_pipe(ialu_mem_imm);
13625 %}
13626
13627
13628 // Long Logical Instructions
13629
13630 // And Instructions
13631 // And Register with Register
13632 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13633 %{
13634 predicate(!UseAPX);
13635 match(Set dst (AndL dst src));
13636 effect(KILL cr);
13637 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13638
13639 format %{ "andq $dst, $src\t# long" %}
13640 ins_encode %{
13641 __ andq($dst$$Register, $src$$Register);
13642 %}
13643 ins_pipe(ialu_reg_reg);
13644 %}
13645
13646 // And Register with Register using New Data Destination (NDD)
13647 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13648 %{
13649 predicate(UseAPX);
13650 match(Set dst (AndL src1 src2));
13651 effect(KILL cr);
13652 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13653
13654 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13655 ins_encode %{
13656 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13657
13658 %}
13659 ins_pipe(ialu_reg_reg);
13660 %}
13661
13662 // And Register with Immediate 255
13663 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13664 %{
13665 match(Set dst (AndL src mask));
13666
13667 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13668 ins_encode %{
13669 // movzbl zeroes out the upper 32-bit and does not need REX.W
13670 __ movzbl($dst$$Register, $src$$Register);
13671 %}
13672 ins_pipe(ialu_reg);
13673 %}
13674
13675 // And Register with Immediate 65535
13676 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13677 %{
13678 match(Set dst (AndL src mask));
13679
13680 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13681 ins_encode %{
13682 // movzwl zeroes out the upper 32-bit and does not need REX.W
13683 __ movzwl($dst$$Register, $src$$Register);
13684 %}
13685 ins_pipe(ialu_reg);
13686 %}
13687
13688 // And Register with Immediate
13689 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13690 %{
13691 predicate(!UseAPX);
13692 match(Set dst (AndL dst src));
13693 effect(KILL cr);
13694 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13695
13696 format %{ "andq $dst, $src\t# long" %}
13697 ins_encode %{
13698 __ andq($dst$$Register, $src$$constant);
13699 %}
13700 ins_pipe(ialu_reg);
13701 %}
13702
13703 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13704 %{
13705 predicate(UseAPX);
13706 match(Set dst (AndL src1 src2));
13707 effect(KILL cr);
13708 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13709
13710 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13711 ins_encode %{
13712 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13713 %}
13714 ins_pipe(ialu_reg);
13715 %}
13716
13717 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13718 %{
13719 predicate(UseAPX);
13720 match(Set dst (AndL (LoadL src1) src2));
13721 effect(KILL cr);
13722 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13723
13724 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13725 ins_encode %{
13726 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13727 %}
13728 ins_pipe(ialu_reg);
13729 %}
13730
13731 // And Register with Memory
13732 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13733 %{
13734 predicate(!UseAPX);
13735 match(Set dst (AndL dst (LoadL src)));
13736 effect(KILL cr);
13737 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13738
13739 ins_cost(150);
13740 format %{ "andq $dst, $src\t# long" %}
13741 ins_encode %{
13742 __ andq($dst$$Register, $src$$Address);
13743 %}
13744 ins_pipe(ialu_reg_mem);
13745 %}
13746
13747 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13748 %{
13749 predicate(UseAPX);
13750 match(Set dst (AndL src1 (LoadL src2)));
13751 effect(KILL cr);
13752 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13753
13754 ins_cost(150);
13755 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13756 ins_encode %{
13757 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13758 %}
13759 ins_pipe(ialu_reg_mem);
13760 %}
13761
13762 // And Memory with Register
13763 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13764 %{
13765 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13766 effect(KILL cr);
13767 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13768
13769 ins_cost(150);
13770 format %{ "andq $dst, $src\t# long" %}
13771 ins_encode %{
13772 __ andq($dst$$Address, $src$$Register);
13773 %}
13774 ins_pipe(ialu_mem_reg);
13775 %}
13776
13777 // And Memory with Immediate
13778 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13779 %{
13780 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13781 effect(KILL cr);
13782 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13783
13784 ins_cost(125);
13785 format %{ "andq $dst, $src\t# long" %}
13786 ins_encode %{
13787 __ andq($dst$$Address, $src$$constant);
13788 %}
13789 ins_pipe(ialu_mem_imm);
13790 %}
13791
13792 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13793 %{
13794 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13795 // because AND/OR works well enough for 8/32-bit values.
13796 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13797
13798 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13799 effect(KILL cr);
13800
13801 ins_cost(125);
13802 format %{ "btrq $dst, log2(not($con))\t# long" %}
13803 ins_encode %{
13804 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13805 %}
13806 ins_pipe(ialu_mem_imm);
13807 %}
13808
13809 // BMI1 instructions
13810 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13811 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13812 predicate(UseBMI1Instructions);
13813 effect(KILL cr);
13814 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13815
13816 ins_cost(125);
13817 format %{ "andnq $dst, $src1, $src2" %}
13818
13819 ins_encode %{
13820 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13821 %}
13822 ins_pipe(ialu_reg_mem);
13823 %}
13824
13825 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13826 match(Set dst (AndL (XorL src1 minus_1) src2));
13827 predicate(UseBMI1Instructions);
13828 effect(KILL cr);
13829 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13830
13831 format %{ "andnq $dst, $src1, $src2" %}
13832
13833 ins_encode %{
13834 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13835 %}
13836 ins_pipe(ialu_reg_mem);
13837 %}
13838
13839 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13840 match(Set dst (AndL (SubL imm_zero src) src));
13841 predicate(UseBMI1Instructions);
13842 effect(KILL cr);
13843 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13844
13845 format %{ "blsiq $dst, $src" %}
13846
13847 ins_encode %{
13848 __ blsiq($dst$$Register, $src$$Register);
13849 %}
13850 ins_pipe(ialu_reg);
13851 %}
13852
13853 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13854 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13855 predicate(UseBMI1Instructions);
13856 effect(KILL cr);
13857 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13858
13859 ins_cost(125);
13860 format %{ "blsiq $dst, $src" %}
13861
13862 ins_encode %{
13863 __ blsiq($dst$$Register, $src$$Address);
13864 %}
13865 ins_pipe(ialu_reg_mem);
13866 %}
13867
13868 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13869 %{
13870 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13871 predicate(UseBMI1Instructions);
13872 effect(KILL cr);
13873 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13874
13875 ins_cost(125);
13876 format %{ "blsmskq $dst, $src" %}
13877
13878 ins_encode %{
13879 __ blsmskq($dst$$Register, $src$$Address);
13880 %}
13881 ins_pipe(ialu_reg_mem);
13882 %}
13883
13884 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13885 %{
13886 match(Set dst (XorL (AddL src minus_1) src));
13887 predicate(UseBMI1Instructions);
13888 effect(KILL cr);
13889 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13890
13891 format %{ "blsmskq $dst, $src" %}
13892
13893 ins_encode %{
13894 __ blsmskq($dst$$Register, $src$$Register);
13895 %}
13896
13897 ins_pipe(ialu_reg);
13898 %}
13899
13900 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13901 %{
13902 match(Set dst (AndL (AddL src minus_1) src) );
13903 predicate(UseBMI1Instructions);
13904 effect(KILL cr);
13905 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13906
13907 format %{ "blsrq $dst, $src" %}
13908
13909 ins_encode %{
13910 __ blsrq($dst$$Register, $src$$Register);
13911 %}
13912
13913 ins_pipe(ialu_reg);
13914 %}
13915
13916 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13917 %{
13918 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13919 predicate(UseBMI1Instructions);
13920 effect(KILL cr);
13921 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13922
13923 ins_cost(125);
13924 format %{ "blsrq $dst, $src" %}
13925
13926 ins_encode %{
13927 __ blsrq($dst$$Register, $src$$Address);
13928 %}
13929
13930 ins_pipe(ialu_reg);
13931 %}
13932
13933 // Or Instructions
13934 // Or Register with Register
13935 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13936 %{
13937 predicate(!UseAPX);
13938 match(Set dst (OrL dst src));
13939 effect(KILL cr);
13940 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13941
13942 format %{ "orq $dst, $src\t# long" %}
13943 ins_encode %{
13944 __ orq($dst$$Register, $src$$Register);
13945 %}
13946 ins_pipe(ialu_reg_reg);
13947 %}
13948
13949 // Or Register with Register using New Data Destination (NDD)
13950 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13951 %{
13952 predicate(UseAPX);
13953 match(Set dst (OrL src1 src2));
13954 effect(KILL cr);
13955 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13956
13957 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13958 ins_encode %{
13959 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13960
13961 %}
13962 ins_pipe(ialu_reg_reg);
13963 %}
13964
13965 // Use any_RegP to match R15 (TLS register) without spilling.
13966 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13967 match(Set dst (OrL dst (CastP2X src)));
13968 effect(KILL cr);
13969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970
13971 format %{ "orq $dst, $src\t# long" %}
13972 ins_encode %{
13973 __ orq($dst$$Register, $src$$Register);
13974 %}
13975 ins_pipe(ialu_reg_reg);
13976 %}
13977
13978 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13979 match(Set dst (OrL src1 (CastP2X src2)));
13980 effect(KILL cr);
13981 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13982
13983 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13984 ins_encode %{
13985 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13986 %}
13987 ins_pipe(ialu_reg_reg);
13988 %}
13989
13990 // Or Register with Immediate
13991 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13992 %{
13993 predicate(!UseAPX);
13994 match(Set dst (OrL dst src));
13995 effect(KILL cr);
13996 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13997
13998 format %{ "orq $dst, $src\t# long" %}
13999 ins_encode %{
14000 __ orq($dst$$Register, $src$$constant);
14001 %}
14002 ins_pipe(ialu_reg);
14003 %}
14004
14005 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14006 %{
14007 predicate(UseAPX);
14008 match(Set dst (OrL src1 src2));
14009 effect(KILL cr);
14010 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14011
14012 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14013 ins_encode %{
14014 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14015 %}
14016 ins_pipe(ialu_reg);
14017 %}
14018
14019 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14020 %{
14021 predicate(UseAPX);
14022 match(Set dst (OrL src1 src2));
14023 effect(KILL cr);
14024 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14025
14026 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14027 ins_encode %{
14028 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14029 %}
14030 ins_pipe(ialu_reg);
14031 %}
14032
14033 // Or Memory with Immediate
14034 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14035 %{
14036 predicate(UseAPX);
14037 match(Set dst (OrL (LoadL src1) src2));
14038 effect(KILL cr);
14039 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14040
14041 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14042 ins_encode %{
14043 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14044 %}
14045 ins_pipe(ialu_reg);
14046 %}
14047
14048 // Or Register with Memory
14049 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14050 %{
14051 predicate(!UseAPX);
14052 match(Set dst (OrL dst (LoadL src)));
14053 effect(KILL cr);
14054 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14055
14056 ins_cost(150);
14057 format %{ "orq $dst, $src\t# long" %}
14058 ins_encode %{
14059 __ orq($dst$$Register, $src$$Address);
14060 %}
14061 ins_pipe(ialu_reg_mem);
14062 %}
14063
14064 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14065 %{
14066 predicate(UseAPX);
14067 match(Set dst (OrL src1 (LoadL src2)));
14068 effect(KILL cr);
14069 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14070
14071 ins_cost(150);
14072 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14073 ins_encode %{
14074 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14075 %}
14076 ins_pipe(ialu_reg_mem);
14077 %}
14078
14079 // Or Memory with Register
14080 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14081 %{
14082 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14083 effect(KILL cr);
14084 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14085
14086 ins_cost(150);
14087 format %{ "orq $dst, $src\t# long" %}
14088 ins_encode %{
14089 __ orq($dst$$Address, $src$$Register);
14090 %}
14091 ins_pipe(ialu_mem_reg);
14092 %}
14093
14094 // Or Memory with Immediate
14095 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14096 %{
14097 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14098 effect(KILL cr);
14099 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14100
14101 ins_cost(125);
14102 format %{ "orq $dst, $src\t# long" %}
14103 ins_encode %{
14104 __ orq($dst$$Address, $src$$constant);
14105 %}
14106 ins_pipe(ialu_mem_imm);
14107 %}
14108
14109 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14110 %{
14111 // con should be a pure 64-bit power of 2 immediate
14112 // because AND/OR works well enough for 8/32-bit values.
14113 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14114
14115 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14116 effect(KILL cr);
14117
14118 ins_cost(125);
14119 format %{ "btsq $dst, log2($con)\t# long" %}
14120 ins_encode %{
14121 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14122 %}
14123 ins_pipe(ialu_mem_imm);
14124 %}
14125
14126 // Xor Instructions
14127 // Xor Register with Register
14128 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14129 %{
14130 predicate(!UseAPX);
14131 match(Set dst (XorL dst src));
14132 effect(KILL cr);
14133 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14134
14135 format %{ "xorq $dst, $src\t# long" %}
14136 ins_encode %{
14137 __ xorq($dst$$Register, $src$$Register);
14138 %}
14139 ins_pipe(ialu_reg_reg);
14140 %}
14141
14142 // Xor Register with Register using New Data Destination (NDD)
14143 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14144 %{
14145 predicate(UseAPX);
14146 match(Set dst (XorL src1 src2));
14147 effect(KILL cr);
14148 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14149
14150 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14151 ins_encode %{
14152 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14153 %}
14154 ins_pipe(ialu_reg_reg);
14155 %}
14156
14157 // Xor Register with Immediate -1
14158 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14159 %{
14160 predicate(!UseAPX);
14161 match(Set dst (XorL dst imm));
14162
14163 format %{ "notq $dst" %}
14164 ins_encode %{
14165 __ notq($dst$$Register);
14166 %}
14167 ins_pipe(ialu_reg);
14168 %}
14169
14170 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14171 %{
14172 predicate(UseAPX);
14173 match(Set dst (XorL src imm));
14174 flag(PD::Flag_ndd_demotable_opr1);
14175
14176 format %{ "enotq $dst, $src" %}
14177 ins_encode %{
14178 __ enotq($dst$$Register, $src$$Register);
14179 %}
14180 ins_pipe(ialu_reg);
14181 %}
14182
14183 // Xor Register with Immediate
14184 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14185 %{
14186 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14187 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14188 match(Set dst (XorL dst src));
14189 effect(KILL cr);
14190 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14191
14192 format %{ "xorq $dst, $src\t# long" %}
14193 ins_encode %{
14194 __ xorq($dst$$Register, $src$$constant);
14195 %}
14196 ins_pipe(ialu_reg);
14197 %}
14198
14199 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14200 %{
14201 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14202 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14203 match(Set dst (XorL src1 src2));
14204 effect(KILL cr);
14205 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14206
14207 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14208 ins_encode %{
14209 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14210 %}
14211 ins_pipe(ialu_reg);
14212 %}
14213
14214 // Xor Memory with Immediate
14215 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14216 %{
14217 predicate(UseAPX);
14218 match(Set dst (XorL (LoadL src1) src2));
14219 effect(KILL cr);
14220 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14221 ins_cost(150);
14222
14223 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14224 ins_encode %{
14225 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14226 %}
14227 ins_pipe(ialu_reg);
14228 %}
14229
14230 // Xor Register with Memory
14231 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14232 %{
14233 predicate(!UseAPX);
14234 match(Set dst (XorL dst (LoadL src)));
14235 effect(KILL cr);
14236 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14237
14238 ins_cost(150);
14239 format %{ "xorq $dst, $src\t# long" %}
14240 ins_encode %{
14241 __ xorq($dst$$Register, $src$$Address);
14242 %}
14243 ins_pipe(ialu_reg_mem);
14244 %}
14245
14246 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14247 %{
14248 predicate(UseAPX);
14249 match(Set dst (XorL src1 (LoadL src2)));
14250 effect(KILL cr);
14251 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14252
14253 ins_cost(150);
14254 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14255 ins_encode %{
14256 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14257 %}
14258 ins_pipe(ialu_reg_mem);
14259 %}
14260
14261 // Xor Memory with Register
14262 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14263 %{
14264 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14265 effect(KILL cr);
14266 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14267
14268 ins_cost(150);
14269 format %{ "xorq $dst, $src\t# long" %}
14270 ins_encode %{
14271 __ xorq($dst$$Address, $src$$Register);
14272 %}
14273 ins_pipe(ialu_mem_reg);
14274 %}
14275
14276 // Xor Memory with Immediate
14277 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14278 %{
14279 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14280 effect(KILL cr);
14281 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14282
14283 ins_cost(125);
14284 format %{ "xorq $dst, $src\t# long" %}
14285 ins_encode %{
14286 __ xorq($dst$$Address, $src$$constant);
14287 %}
14288 ins_pipe(ialu_mem_imm);
14289 %}
14290
14291 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14292 %{
14293 match(Set dst (CmpLTMask p q));
14294 effect(KILL cr);
14295
14296 ins_cost(400);
14297 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14298 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14299 "negl $dst" %}
14300 ins_encode %{
14301 __ cmpl($p$$Register, $q$$Register);
14302 __ setcc(Assembler::less, $dst$$Register);
14303 __ negl($dst$$Register);
14304 %}
14305 ins_pipe(pipe_slow);
14306 %}
14307
14308 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14309 %{
14310 match(Set dst (CmpLTMask dst zero));
14311 effect(KILL cr);
14312
14313 ins_cost(100);
14314 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14315 ins_encode %{
14316 __ sarl($dst$$Register, 31);
14317 %}
14318 ins_pipe(ialu_reg);
14319 %}
14320
14321 /* Better to save a register than avoid a branch */
14322 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14323 %{
14324 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14325 effect(KILL cr);
14326 ins_cost(300);
14327 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14328 "jge done\n\t"
14329 "addl $p,$y\n"
14330 "done: " %}
14331 ins_encode %{
14332 Register Rp = $p$$Register;
14333 Register Rq = $q$$Register;
14334 Register Ry = $y$$Register;
14335 Label done;
14336 __ subl(Rp, Rq);
14337 __ jccb(Assembler::greaterEqual, done);
14338 __ addl(Rp, Ry);
14339 __ bind(done);
14340 %}
14341 ins_pipe(pipe_cmplt);
14342 %}
14343
14344 /* Better to save a register than avoid a branch */
14345 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14346 %{
14347 match(Set y (AndI (CmpLTMask p q) y));
14348 effect(KILL cr);
14349
14350 ins_cost(300);
14351
14352 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14353 "jlt done\n\t"
14354 "xorl $y, $y\n"
14355 "done: " %}
14356 ins_encode %{
14357 Register Rp = $p$$Register;
14358 Register Rq = $q$$Register;
14359 Register Ry = $y$$Register;
14360 Label done;
14361 __ cmpl(Rp, Rq);
14362 __ jccb(Assembler::less, done);
14363 __ xorl(Ry, Ry);
14364 __ bind(done);
14365 %}
14366 ins_pipe(pipe_cmplt);
14367 %}
14368
14369
14370 //---------- FP Instructions------------------------------------------------
14371
14372 // Really expensive, avoid
14373 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14374 %{
14375 match(Set cr (CmpF src1 src2));
14376
14377 ins_cost(500);
14378 format %{ "ucomiss $src1, $src2\n\t"
14379 "jnp,s exit\n\t"
14380 "pushfq\t# saw NaN, set CF\n\t"
14381 "andq [rsp], #0xffffff2b\n\t"
14382 "popfq\n"
14383 "exit:" %}
14384 ins_encode %{
14385 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14386 emit_cmpfp_fixup(masm);
14387 %}
14388 ins_pipe(pipe_slow);
14389 %}
14390
14391 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14392 match(Set cr (CmpF src1 src2));
14393
14394 ins_cost(100);
14395 format %{ "ucomiss $src1, $src2" %}
14396 ins_encode %{
14397 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14398 %}
14399 ins_pipe(pipe_slow);
14400 %}
14401
14402 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14403 match(Set cr (CmpF src1 src2));
14404
14405 ins_cost(100);
14406 format %{ "evucomxss $src1, $src2" %}
14407 ins_encode %{
14408 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14409 %}
14410 ins_pipe(pipe_slow);
14411 %}
14412
14413 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14414 match(Set cr (CmpF src1 (LoadF src2)));
14415
14416 ins_cost(100);
14417 format %{ "ucomiss $src1, $src2" %}
14418 ins_encode %{
14419 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14420 %}
14421 ins_pipe(pipe_slow);
14422 %}
14423
14424 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14425 match(Set cr (CmpF src1 (LoadF src2)));
14426
14427 ins_cost(100);
14428 format %{ "evucomxss $src1, $src2" %}
14429 ins_encode %{
14430 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14431 %}
14432 ins_pipe(pipe_slow);
14433 %}
14434
14435 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14436 match(Set cr (CmpF src con));
14437
14438 ins_cost(100);
14439 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14440 ins_encode %{
14441 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14442 %}
14443 ins_pipe(pipe_slow);
14444 %}
14445
14446 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14447 match(Set cr (CmpF src con));
14448
14449 ins_cost(100);
14450 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14451 ins_encode %{
14452 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14453 %}
14454 ins_pipe(pipe_slow);
14455 %}
14456
14457 // Really expensive, avoid
14458 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14459 %{
14460 match(Set cr (CmpD src1 src2));
14461
14462 ins_cost(500);
14463 format %{ "ucomisd $src1, $src2\n\t"
14464 "jnp,s exit\n\t"
14465 "pushfq\t# saw NaN, set CF\n\t"
14466 "andq [rsp], #0xffffff2b\n\t"
14467 "popfq\n"
14468 "exit:" %}
14469 ins_encode %{
14470 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14471 emit_cmpfp_fixup(masm);
14472 %}
14473 ins_pipe(pipe_slow);
14474 %}
14475
14476 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14477 match(Set cr (CmpD src1 src2));
14478
14479 ins_cost(100);
14480 format %{ "ucomisd $src1, $src2 test" %}
14481 ins_encode %{
14482 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14483 %}
14484 ins_pipe(pipe_slow);
14485 %}
14486
14487 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14488 match(Set cr (CmpD src1 src2));
14489
14490 ins_cost(100);
14491 format %{ "evucomxsd $src1, $src2 test" %}
14492 ins_encode %{
14493 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14494 %}
14495 ins_pipe(pipe_slow);
14496 %}
14497
14498 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14499 match(Set cr (CmpD src1 (LoadD src2)));
14500
14501 ins_cost(100);
14502 format %{ "ucomisd $src1, $src2" %}
14503 ins_encode %{
14504 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14505 %}
14506 ins_pipe(pipe_slow);
14507 %}
14508
14509 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14510 match(Set cr (CmpD src1 (LoadD src2)));
14511
14512 ins_cost(100);
14513 format %{ "evucomxsd $src1, $src2" %}
14514 ins_encode %{
14515 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14516 %}
14517 ins_pipe(pipe_slow);
14518 %}
14519
14520 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14521 match(Set cr (CmpD src con));
14522 ins_cost(100);
14523 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14524 ins_encode %{
14525 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14526 %}
14527 ins_pipe(pipe_slow);
14528 %}
14529
14530 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14531 match(Set cr (CmpD src con));
14532
14533 ins_cost(100);
14534 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14535 ins_encode %{
14536 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14537 %}
14538 ins_pipe(pipe_slow);
14539 %}
14540
14541 // Compare into -1,0,1
14542 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14543 %{
14544 match(Set dst (CmpF3 src1 src2));
14545 effect(KILL cr);
14546
14547 ins_cost(275);
14548 format %{ "ucomiss $src1, $src2\n\t"
14549 "movl $dst, #-1\n\t"
14550 "jp,s done\n\t"
14551 "jb,s done\n\t"
14552 "setne $dst\n\t"
14553 "movzbl $dst, $dst\n"
14554 "done:" %}
14555 ins_encode %{
14556 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14557 emit_cmpfp3(masm, $dst$$Register);
14558 %}
14559 ins_pipe(pipe_slow);
14560 %}
14561
14562 // Compare into -1,0,1
14563 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14564 %{
14565 match(Set dst (CmpF3 src1 (LoadF src2)));
14566 effect(KILL cr);
14567
14568 ins_cost(275);
14569 format %{ "ucomiss $src1, $src2\n\t"
14570 "movl $dst, #-1\n\t"
14571 "jp,s done\n\t"
14572 "jb,s done\n\t"
14573 "setne $dst\n\t"
14574 "movzbl $dst, $dst\n"
14575 "done:" %}
14576 ins_encode %{
14577 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14578 emit_cmpfp3(masm, $dst$$Register);
14579 %}
14580 ins_pipe(pipe_slow);
14581 %}
14582
14583 // Compare into -1,0,1
14584 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14585 match(Set dst (CmpF3 src con));
14586 effect(KILL cr);
14587
14588 ins_cost(275);
14589 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14590 "movl $dst, #-1\n\t"
14591 "jp,s done\n\t"
14592 "jb,s done\n\t"
14593 "setne $dst\n\t"
14594 "movzbl $dst, $dst\n"
14595 "done:" %}
14596 ins_encode %{
14597 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14598 emit_cmpfp3(masm, $dst$$Register);
14599 %}
14600 ins_pipe(pipe_slow);
14601 %}
14602
14603 // Compare into -1,0,1
14604 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14605 %{
14606 match(Set dst (CmpD3 src1 src2));
14607 effect(KILL cr);
14608
14609 ins_cost(275);
14610 format %{ "ucomisd $src1, $src2\n\t"
14611 "movl $dst, #-1\n\t"
14612 "jp,s done\n\t"
14613 "jb,s done\n\t"
14614 "setne $dst\n\t"
14615 "movzbl $dst, $dst\n"
14616 "done:" %}
14617 ins_encode %{
14618 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14619 emit_cmpfp3(masm, $dst$$Register);
14620 %}
14621 ins_pipe(pipe_slow);
14622 %}
14623
14624 // Compare into -1,0,1
14625 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14626 %{
14627 match(Set dst (CmpD3 src1 (LoadD src2)));
14628 effect(KILL cr);
14629
14630 ins_cost(275);
14631 format %{ "ucomisd $src1, $src2\n\t"
14632 "movl $dst, #-1\n\t"
14633 "jp,s done\n\t"
14634 "jb,s done\n\t"
14635 "setne $dst\n\t"
14636 "movzbl $dst, $dst\n"
14637 "done:" %}
14638 ins_encode %{
14639 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14640 emit_cmpfp3(masm, $dst$$Register);
14641 %}
14642 ins_pipe(pipe_slow);
14643 %}
14644
14645 // Compare into -1,0,1
14646 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14647 match(Set dst (CmpD3 src con));
14648 effect(KILL cr);
14649
14650 ins_cost(275);
14651 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14652 "movl $dst, #-1\n\t"
14653 "jp,s done\n\t"
14654 "jb,s done\n\t"
14655 "setne $dst\n\t"
14656 "movzbl $dst, $dst\n"
14657 "done:" %}
14658 ins_encode %{
14659 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14660 emit_cmpfp3(masm, $dst$$Register);
14661 %}
14662 ins_pipe(pipe_slow);
14663 %}
14664
14665 //----------Arithmetic Conversion Instructions---------------------------------
14666
14667 instruct convF2D_reg_reg(regD dst, regF src)
14668 %{
14669 match(Set dst (ConvF2D src));
14670
14671 format %{ "cvtss2sd $dst, $src" %}
14672 ins_encode %{
14673 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14674 %}
14675 ins_pipe(pipe_slow); // XXX
14676 %}
14677
14678 instruct convF2D_reg_mem(regD dst, memory src)
14679 %{
14680 predicate(UseAVX == 0);
14681 match(Set dst (ConvF2D (LoadF src)));
14682
14683 format %{ "cvtss2sd $dst, $src" %}
14684 ins_encode %{
14685 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14686 %}
14687 ins_pipe(pipe_slow); // XXX
14688 %}
14689
14690 instruct convD2F_reg_reg(regF dst, regD src)
14691 %{
14692 match(Set dst (ConvD2F src));
14693
14694 format %{ "cvtsd2ss $dst, $src" %}
14695 ins_encode %{
14696 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14697 %}
14698 ins_pipe(pipe_slow); // XXX
14699 %}
14700
14701 instruct convD2F_reg_mem(regF dst, memory src)
14702 %{
14703 predicate(UseAVX == 0);
14704 match(Set dst (ConvD2F (LoadD src)));
14705
14706 format %{ "cvtsd2ss $dst, $src" %}
14707 ins_encode %{
14708 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14709 %}
14710 ins_pipe(pipe_slow); // XXX
14711 %}
14712
14713 // XXX do mem variants
14714 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14715 %{
14716 predicate(!VM_Version::supports_avx10_2());
14717 match(Set dst (ConvF2I src));
14718 effect(KILL cr);
14719 format %{ "convert_f2i $dst, $src" %}
14720 ins_encode %{
14721 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14722 %}
14723 ins_pipe(pipe_slow);
14724 %}
14725
14726 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14727 %{
14728 predicate(VM_Version::supports_avx10_2());
14729 match(Set dst (ConvF2I src));
14730 format %{ "evcvttss2sisl $dst, $src" %}
14731 ins_encode %{
14732 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14733 %}
14734 ins_pipe(pipe_slow);
14735 %}
14736
14737 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14738 %{
14739 predicate(VM_Version::supports_avx10_2());
14740 match(Set dst (ConvF2I (LoadF src)));
14741 format %{ "evcvttss2sisl $dst, $src" %}
14742 ins_encode %{
14743 __ evcvttss2sisl($dst$$Register, $src$$Address);
14744 %}
14745 ins_pipe(pipe_slow);
14746 %}
14747
14748 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14749 %{
14750 predicate(!VM_Version::supports_avx10_2());
14751 match(Set dst (ConvF2L src));
14752 effect(KILL cr);
14753 format %{ "convert_f2l $dst, $src"%}
14754 ins_encode %{
14755 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14756 %}
14757 ins_pipe(pipe_slow);
14758 %}
14759
14760 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14761 %{
14762 predicate(VM_Version::supports_avx10_2());
14763 match(Set dst (ConvF2L src));
14764 format %{ "evcvttss2sisq $dst, $src" %}
14765 ins_encode %{
14766 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14767 %}
14768 ins_pipe(pipe_slow);
14769 %}
14770
14771 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14772 %{
14773 predicate(VM_Version::supports_avx10_2());
14774 match(Set dst (ConvF2L (LoadF src)));
14775 format %{ "evcvttss2sisq $dst, $src" %}
14776 ins_encode %{
14777 __ evcvttss2sisq($dst$$Register, $src$$Address);
14778 %}
14779 ins_pipe(pipe_slow);
14780 %}
14781
14782 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14783 %{
14784 predicate(!VM_Version::supports_avx10_2());
14785 match(Set dst (ConvD2I src));
14786 effect(KILL cr);
14787 format %{ "convert_d2i $dst, $src"%}
14788 ins_encode %{
14789 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14790 %}
14791 ins_pipe(pipe_slow);
14792 %}
14793
14794 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14795 %{
14796 predicate(VM_Version::supports_avx10_2());
14797 match(Set dst (ConvD2I src));
14798 format %{ "evcvttsd2sisl $dst, $src" %}
14799 ins_encode %{
14800 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14801 %}
14802 ins_pipe(pipe_slow);
14803 %}
14804
14805 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14806 %{
14807 predicate(VM_Version::supports_avx10_2());
14808 match(Set dst (ConvD2I (LoadD src)));
14809 format %{ "evcvttsd2sisl $dst, $src" %}
14810 ins_encode %{
14811 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14812 %}
14813 ins_pipe(pipe_slow);
14814 %}
14815
14816 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14817 %{
14818 predicate(!VM_Version::supports_avx10_2());
14819 match(Set dst (ConvD2L src));
14820 effect(KILL cr);
14821 format %{ "convert_d2l $dst, $src"%}
14822 ins_encode %{
14823 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14824 %}
14825 ins_pipe(pipe_slow);
14826 %}
14827
14828 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14829 %{
14830 predicate(VM_Version::supports_avx10_2());
14831 match(Set dst (ConvD2L src));
14832 format %{ "evcvttsd2sisq $dst, $src" %}
14833 ins_encode %{
14834 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14835 %}
14836 ins_pipe(pipe_slow);
14837 %}
14838
14839 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14840 %{
14841 predicate(VM_Version::supports_avx10_2());
14842 match(Set dst (ConvD2L (LoadD src)));
14843 format %{ "evcvttsd2sisq $dst, $src" %}
14844 ins_encode %{
14845 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14846 %}
14847 ins_pipe(pipe_slow);
14848 %}
14849
14850 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14851 %{
14852 match(Set dst (RoundD src));
14853 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14854 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14855 ins_encode %{
14856 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14857 %}
14858 ins_pipe(pipe_slow);
14859 %}
14860
14861 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14862 %{
14863 match(Set dst (RoundF src));
14864 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14865 format %{ "round_float $dst,$src" %}
14866 ins_encode %{
14867 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14868 %}
14869 ins_pipe(pipe_slow);
14870 %}
14871
14872 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14873 %{
14874 predicate(!UseXmmI2F);
14875 match(Set dst (ConvI2F src));
14876
14877 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14878 ins_encode %{
14879 if (UseAVX > 0) {
14880 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14881 }
14882 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14883 %}
14884 ins_pipe(pipe_slow); // XXX
14885 %}
14886
14887 instruct convI2F_reg_mem(regF dst, memory src)
14888 %{
14889 predicate(UseAVX == 0);
14890 match(Set dst (ConvI2F (LoadI src)));
14891
14892 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14893 ins_encode %{
14894 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14895 %}
14896 ins_pipe(pipe_slow); // XXX
14897 %}
14898
14899 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14900 %{
14901 predicate(!UseXmmI2D);
14902 match(Set dst (ConvI2D src));
14903
14904 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14905 ins_encode %{
14906 if (UseAVX > 0) {
14907 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14908 }
14909 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14910 %}
14911 ins_pipe(pipe_slow); // XXX
14912 %}
14913
14914 instruct convI2D_reg_mem(regD dst, memory src)
14915 %{
14916 predicate(UseAVX == 0);
14917 match(Set dst (ConvI2D (LoadI src)));
14918
14919 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14920 ins_encode %{
14921 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14922 %}
14923 ins_pipe(pipe_slow); // XXX
14924 %}
14925
14926 instruct convXI2F_reg(regF dst, rRegI src)
14927 %{
14928 predicate(UseXmmI2F);
14929 match(Set dst (ConvI2F src));
14930
14931 format %{ "movdl $dst, $src\n\t"
14932 "cvtdq2psl $dst, $dst\t# i2f" %}
14933 ins_encode %{
14934 __ movdl($dst$$XMMRegister, $src$$Register);
14935 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14936 %}
14937 ins_pipe(pipe_slow); // XXX
14938 %}
14939
14940 instruct convXI2D_reg(regD dst, rRegI src)
14941 %{
14942 predicate(UseXmmI2D);
14943 match(Set dst (ConvI2D src));
14944
14945 format %{ "movdl $dst, $src\n\t"
14946 "cvtdq2pdl $dst, $dst\t# i2d" %}
14947 ins_encode %{
14948 __ movdl($dst$$XMMRegister, $src$$Register);
14949 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14950 %}
14951 ins_pipe(pipe_slow); // XXX
14952 %}
14953
14954 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14955 %{
14956 match(Set dst (ConvL2F src));
14957
14958 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14959 ins_encode %{
14960 if (UseAVX > 0) {
14961 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14962 }
14963 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14964 %}
14965 ins_pipe(pipe_slow); // XXX
14966 %}
14967
14968 instruct convL2F_reg_mem(regF dst, memory src)
14969 %{
14970 predicate(UseAVX == 0);
14971 match(Set dst (ConvL2F (LoadL src)));
14972
14973 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14974 ins_encode %{
14975 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14976 %}
14977 ins_pipe(pipe_slow); // XXX
14978 %}
14979
14980 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14981 %{
14982 match(Set dst (ConvL2D src));
14983
14984 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14985 ins_encode %{
14986 if (UseAVX > 0) {
14987 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14988 }
14989 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14990 %}
14991 ins_pipe(pipe_slow); // XXX
14992 %}
14993
14994 instruct convL2D_reg_mem(regD dst, memory src)
14995 %{
14996 predicate(UseAVX == 0);
14997 match(Set dst (ConvL2D (LoadL src)));
14998
14999 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15000 ins_encode %{
15001 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15002 %}
15003 ins_pipe(pipe_slow); // XXX
15004 %}
15005
15006 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15007 %{
15008 match(Set dst (ConvI2L src));
15009
15010 ins_cost(125);
15011 format %{ "movslq $dst, $src\t# i2l" %}
15012 ins_encode %{
15013 __ movslq($dst$$Register, $src$$Register);
15014 %}
15015 ins_pipe(ialu_reg_reg);
15016 %}
15017
15018 // Zero-extend convert int to long
15019 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15020 %{
15021 match(Set dst (AndL (ConvI2L src) mask));
15022
15023 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15024 ins_encode %{
15025 if ($dst$$reg != $src$$reg) {
15026 __ movl($dst$$Register, $src$$Register);
15027 }
15028 %}
15029 ins_pipe(ialu_reg_reg);
15030 %}
15031
15032 // Zero-extend convert int to long
15033 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15034 %{
15035 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15036
15037 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15038 ins_encode %{
15039 __ movl($dst$$Register, $src$$Address);
15040 %}
15041 ins_pipe(ialu_reg_mem);
15042 %}
15043
15044 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15045 %{
15046 match(Set dst (AndL src mask));
15047
15048 format %{ "movl $dst, $src\t# zero-extend long" %}
15049 ins_encode %{
15050 __ movl($dst$$Register, $src$$Register);
15051 %}
15052 ins_pipe(ialu_reg_reg);
15053 %}
15054
15055 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15056 %{
15057 match(Set dst (ConvL2I src));
15058
15059 format %{ "movl $dst, $src\t# l2i" %}
15060 ins_encode %{
15061 __ movl($dst$$Register, $src$$Register);
15062 %}
15063 ins_pipe(ialu_reg_reg);
15064 %}
15065
15066
15067 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15068 match(Set dst (MoveF2I src));
15069 effect(DEF dst, USE src);
15070
15071 ins_cost(125);
15072 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15073 ins_encode %{
15074 __ movl($dst$$Register, Address(rsp, $src$$disp));
15075 %}
15076 ins_pipe(ialu_reg_mem);
15077 %}
15078
15079 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15080 match(Set dst (MoveI2F src));
15081 effect(DEF dst, USE src);
15082
15083 ins_cost(125);
15084 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15085 ins_encode %{
15086 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15087 %}
15088 ins_pipe(pipe_slow);
15089 %}
15090
15091 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15092 match(Set dst (MoveD2L src));
15093 effect(DEF dst, USE src);
15094
15095 ins_cost(125);
15096 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15097 ins_encode %{
15098 __ movq($dst$$Register, Address(rsp, $src$$disp));
15099 %}
15100 ins_pipe(ialu_reg_mem);
15101 %}
15102
15103 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15104 predicate(!UseXmmLoadAndClearUpper);
15105 match(Set dst (MoveL2D src));
15106 effect(DEF dst, USE src);
15107
15108 ins_cost(125);
15109 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15110 ins_encode %{
15111 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15112 %}
15113 ins_pipe(pipe_slow);
15114 %}
15115
15116 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15117 predicate(UseXmmLoadAndClearUpper);
15118 match(Set dst (MoveL2D src));
15119 effect(DEF dst, USE src);
15120
15121 ins_cost(125);
15122 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15123 ins_encode %{
15124 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15125 %}
15126 ins_pipe(pipe_slow);
15127 %}
15128
15129
15130 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15131 match(Set dst (MoveF2I src));
15132 effect(DEF dst, USE src);
15133
15134 ins_cost(95); // XXX
15135 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15136 ins_encode %{
15137 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15138 %}
15139 ins_pipe(pipe_slow);
15140 %}
15141
15142 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15143 match(Set dst (MoveI2F src));
15144 effect(DEF dst, USE src);
15145
15146 ins_cost(100);
15147 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15148 ins_encode %{
15149 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15150 %}
15151 ins_pipe( ialu_mem_reg );
15152 %}
15153
15154 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15155 match(Set dst (MoveD2L src));
15156 effect(DEF dst, USE src);
15157
15158 ins_cost(95); // XXX
15159 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15160 ins_encode %{
15161 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15162 %}
15163 ins_pipe(pipe_slow);
15164 %}
15165
15166 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15167 match(Set dst (MoveL2D src));
15168 effect(DEF dst, USE src);
15169
15170 ins_cost(100);
15171 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15172 ins_encode %{
15173 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15174 %}
15175 ins_pipe(ialu_mem_reg);
15176 %}
15177
15178 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15179 match(Set dst (MoveF2I src));
15180 effect(DEF dst, USE src);
15181 ins_cost(85);
15182 format %{ "movd $dst,$src\t# MoveF2I" %}
15183 ins_encode %{
15184 __ movdl($dst$$Register, $src$$XMMRegister);
15185 %}
15186 ins_pipe( pipe_slow );
15187 %}
15188
15189 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15190 match(Set dst (MoveD2L src));
15191 effect(DEF dst, USE src);
15192 ins_cost(85);
15193 format %{ "movd $dst,$src\t# MoveD2L" %}
15194 ins_encode %{
15195 __ movdq($dst$$Register, $src$$XMMRegister);
15196 %}
15197 ins_pipe( pipe_slow );
15198 %}
15199
15200 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15201 match(Set dst (MoveI2F src));
15202 effect(DEF dst, USE src);
15203 ins_cost(100);
15204 format %{ "movd $dst,$src\t# MoveI2F" %}
15205 ins_encode %{
15206 __ movdl($dst$$XMMRegister, $src$$Register);
15207 %}
15208 ins_pipe( pipe_slow );
15209 %}
15210
15211 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15212 match(Set dst (MoveL2D src));
15213 effect(DEF dst, USE src);
15214 ins_cost(100);
15215 format %{ "movd $dst,$src\t# MoveL2D" %}
15216 ins_encode %{
15217 __ movdq($dst$$XMMRegister, $src$$Register);
15218 %}
15219 ins_pipe( pipe_slow );
15220 %}
15221
15222 // Fast clearing of an array
15223 // Small non-constant lenght ClearArray for non-AVX512 targets.
15224 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15225 Universe dummy, rFlagsReg cr)
15226 %{
15227 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15228 match(Set dummy (ClearArray cnt base));
15229 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15230
15231 format %{ $$template
15232 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15233 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15234 $$emit$$"jg LARGE\n\t"
15235 $$emit$$"dec rcx\n\t"
15236 $$emit$$"js DONE\t# Zero length\n\t"
15237 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15238 $$emit$$"dec rcx\n\t"
15239 $$emit$$"jge LOOP\n\t"
15240 $$emit$$"jmp DONE\n\t"
15241 $$emit$$"# LARGE:\n\t"
15242 if (UseFastStosb) {
15243 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15244 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15245 } else if (UseXMMForObjInit) {
15246 $$emit$$"mov rdi,rax\n\t"
15247 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15248 $$emit$$"jmpq L_zero_64_bytes\n\t"
15249 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15250 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15251 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15252 $$emit$$"add 0x40,rax\n\t"
15253 $$emit$$"# L_zero_64_bytes:\n\t"
15254 $$emit$$"sub 0x8,rcx\n\t"
15255 $$emit$$"jge L_loop\n\t"
15256 $$emit$$"add 0x4,rcx\n\t"
15257 $$emit$$"jl L_tail\n\t"
15258 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15259 $$emit$$"add 0x20,rax\n\t"
15260 $$emit$$"sub 0x4,rcx\n\t"
15261 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15262 $$emit$$"add 0x4,rcx\n\t"
15263 $$emit$$"jle L_end\n\t"
15264 $$emit$$"dec rcx\n\t"
15265 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15266 $$emit$$"vmovq xmm0,(rax)\n\t"
15267 $$emit$$"add 0x8,rax\n\t"
15268 $$emit$$"dec rcx\n\t"
15269 $$emit$$"jge L_sloop\n\t"
15270 $$emit$$"# L_end:\n\t"
15271 } else {
15272 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15273 }
15274 $$emit$$"# DONE"
15275 %}
15276 ins_encode %{
15277 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15278 $tmp$$XMMRegister, false, knoreg);
15279 %}
15280 ins_pipe(pipe_slow);
15281 %}
15282
15283 // Small non-constant length ClearArray for AVX512 targets.
15284 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15285 Universe dummy, rFlagsReg cr)
15286 %{
15287 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15288 match(Set dummy (ClearArray cnt base));
15289 ins_cost(125);
15290 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291
15292 format %{ $$template
15293 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15294 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15295 $$emit$$"jg LARGE\n\t"
15296 $$emit$$"dec rcx\n\t"
15297 $$emit$$"js DONE\t# Zero length\n\t"
15298 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15299 $$emit$$"dec rcx\n\t"
15300 $$emit$$"jge LOOP\n\t"
15301 $$emit$$"jmp DONE\n\t"
15302 $$emit$$"# LARGE:\n\t"
15303 if (UseFastStosb) {
15304 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15305 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15306 } else if (UseXMMForObjInit) {
15307 $$emit$$"mov rdi,rax\n\t"
15308 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15309 $$emit$$"jmpq L_zero_64_bytes\n\t"
15310 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15311 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15312 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15313 $$emit$$"add 0x40,rax\n\t"
15314 $$emit$$"# L_zero_64_bytes:\n\t"
15315 $$emit$$"sub 0x8,rcx\n\t"
15316 $$emit$$"jge L_loop\n\t"
15317 $$emit$$"add 0x4,rcx\n\t"
15318 $$emit$$"jl L_tail\n\t"
15319 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15320 $$emit$$"add 0x20,rax\n\t"
15321 $$emit$$"sub 0x4,rcx\n\t"
15322 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15323 $$emit$$"add 0x4,rcx\n\t"
15324 $$emit$$"jle L_end\n\t"
15325 $$emit$$"dec rcx\n\t"
15326 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15327 $$emit$$"vmovq xmm0,(rax)\n\t"
15328 $$emit$$"add 0x8,rax\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"jge L_sloop\n\t"
15331 $$emit$$"# L_end:\n\t"
15332 } else {
15333 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15334 }
15335 $$emit$$"# DONE"
15336 %}
15337 ins_encode %{
15338 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15339 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15340 %}
15341 ins_pipe(pipe_slow);
15342 %}
15343
15344 // Large non-constant length ClearArray for non-AVX512 targets.
15345 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15346 Universe dummy, rFlagsReg cr)
15347 %{
15348 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15349 match(Set dummy (ClearArray cnt base));
15350 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15351
15352 format %{ $$template
15353 if (UseFastStosb) {
15354 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15355 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15356 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15357 } else if (UseXMMForObjInit) {
15358 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15359 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15360 $$emit$$"jmpq L_zero_64_bytes\n\t"
15361 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15362 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15363 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15364 $$emit$$"add 0x40,rax\n\t"
15365 $$emit$$"# L_zero_64_bytes:\n\t"
15366 $$emit$$"sub 0x8,rcx\n\t"
15367 $$emit$$"jge L_loop\n\t"
15368 $$emit$$"add 0x4,rcx\n\t"
15369 $$emit$$"jl L_tail\n\t"
15370 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15371 $$emit$$"add 0x20,rax\n\t"
15372 $$emit$$"sub 0x4,rcx\n\t"
15373 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15374 $$emit$$"add 0x4,rcx\n\t"
15375 $$emit$$"jle L_end\n\t"
15376 $$emit$$"dec rcx\n\t"
15377 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15378 $$emit$$"vmovq xmm0,(rax)\n\t"
15379 $$emit$$"add 0x8,rax\n\t"
15380 $$emit$$"dec rcx\n\t"
15381 $$emit$$"jge L_sloop\n\t"
15382 $$emit$$"# L_end:\n\t"
15383 } else {
15384 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15385 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15386 }
15387 %}
15388 ins_encode %{
15389 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15390 $tmp$$XMMRegister, true, knoreg);
15391 %}
15392 ins_pipe(pipe_slow);
15393 %}
15394
15395 // Large non-constant length ClearArray for AVX512 targets.
15396 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15397 Universe dummy, rFlagsReg cr)
15398 %{
15399 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15400 match(Set dummy (ClearArray cnt base));
15401 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15402
15403 format %{ $$template
15404 if (UseFastStosb) {
15405 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15406 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15407 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15408 } else if (UseXMMForObjInit) {
15409 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15410 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15411 $$emit$$"jmpq L_zero_64_bytes\n\t"
15412 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15413 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15414 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15415 $$emit$$"add 0x40,rax\n\t"
15416 $$emit$$"# L_zero_64_bytes:\n\t"
15417 $$emit$$"sub 0x8,rcx\n\t"
15418 $$emit$$"jge L_loop\n\t"
15419 $$emit$$"add 0x4,rcx\n\t"
15420 $$emit$$"jl L_tail\n\t"
15421 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15422 $$emit$$"add 0x20,rax\n\t"
15423 $$emit$$"sub 0x4,rcx\n\t"
15424 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15425 $$emit$$"add 0x4,rcx\n\t"
15426 $$emit$$"jle L_end\n\t"
15427 $$emit$$"dec rcx\n\t"
15428 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15429 $$emit$$"vmovq xmm0,(rax)\n\t"
15430 $$emit$$"add 0x8,rax\n\t"
15431 $$emit$$"dec rcx\n\t"
15432 $$emit$$"jge L_sloop\n\t"
15433 $$emit$$"# L_end:\n\t"
15434 } else {
15435 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15436 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15437 }
15438 %}
15439 ins_encode %{
15440 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15441 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15442 %}
15443 ins_pipe(pipe_slow);
15444 %}
15445
15446 // Small constant length ClearArray for AVX512 targets.
15447 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15448 %{
15449 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15450 match(Set dummy (ClearArray cnt base));
15451 ins_cost(100);
15452 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15453 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15454 ins_encode %{
15455 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15456 %}
15457 ins_pipe(pipe_slow);
15458 %}
15459
15460 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15461 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15462 %{
15463 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15464 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15465 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15466
15467 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15468 ins_encode %{
15469 __ string_compare($str1$$Register, $str2$$Register,
15470 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15471 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15472 %}
15473 ins_pipe( pipe_slow );
15474 %}
15475
15476 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15477 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15478 %{
15479 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15480 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15481 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15482
15483 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15484 ins_encode %{
15485 __ string_compare($str1$$Register, $str2$$Register,
15486 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15487 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15488 %}
15489 ins_pipe( pipe_slow );
15490 %}
15491
15492 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15493 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15494 %{
15495 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15496 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15497 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15498
15499 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15500 ins_encode %{
15501 __ string_compare($str1$$Register, $str2$$Register,
15502 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15503 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15504 %}
15505 ins_pipe( pipe_slow );
15506 %}
15507
15508 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15509 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15510 %{
15511 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15512 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15513 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15514
15515 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15516 ins_encode %{
15517 __ string_compare($str1$$Register, $str2$$Register,
15518 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15519 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15520 %}
15521 ins_pipe( pipe_slow );
15522 %}
15523
15524 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15525 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15526 %{
15527 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15528 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15529 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15530
15531 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15532 ins_encode %{
15533 __ string_compare($str1$$Register, $str2$$Register,
15534 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15535 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15536 %}
15537 ins_pipe( pipe_slow );
15538 %}
15539
15540 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15541 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15542 %{
15543 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15544 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15545 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15546
15547 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15548 ins_encode %{
15549 __ string_compare($str1$$Register, $str2$$Register,
15550 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15551 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15552 %}
15553 ins_pipe( pipe_slow );
15554 %}
15555
15556 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15557 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15558 %{
15559 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15560 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15561 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15562
15563 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15564 ins_encode %{
15565 __ string_compare($str2$$Register, $str1$$Register,
15566 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15567 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15568 %}
15569 ins_pipe( pipe_slow );
15570 %}
15571
15572 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15573 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15574 %{
15575 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15576 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15577 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15578
15579 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15580 ins_encode %{
15581 __ string_compare($str2$$Register, $str1$$Register,
15582 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15583 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15584 %}
15585 ins_pipe( pipe_slow );
15586 %}
15587
15588 // fast search of substring with known size.
15589 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15590 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15591 %{
15592 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15593 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15594 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15595
15596 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15597 ins_encode %{
15598 int icnt2 = (int)$int_cnt2$$constant;
15599 if (icnt2 >= 16) {
15600 // IndexOf for constant substrings with size >= 16 elements
15601 // which don't need to be loaded through stack.
15602 __ string_indexofC8($str1$$Register, $str2$$Register,
15603 $cnt1$$Register, $cnt2$$Register,
15604 icnt2, $result$$Register,
15605 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15606 } else {
15607 // Small strings are loaded through stack if they cross page boundary.
15608 __ string_indexof($str1$$Register, $str2$$Register,
15609 $cnt1$$Register, $cnt2$$Register,
15610 icnt2, $result$$Register,
15611 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15612 }
15613 %}
15614 ins_pipe( pipe_slow );
15615 %}
15616
15617 // fast search of substring with known size.
15618 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15619 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15620 %{
15621 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15622 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15623 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15624
15625 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15626 ins_encode %{
15627 int icnt2 = (int)$int_cnt2$$constant;
15628 if (icnt2 >= 8) {
15629 // IndexOf for constant substrings with size >= 8 elements
15630 // which don't need to be loaded through stack.
15631 __ string_indexofC8($str1$$Register, $str2$$Register,
15632 $cnt1$$Register, $cnt2$$Register,
15633 icnt2, $result$$Register,
15634 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15635 } else {
15636 // Small strings are loaded through stack if they cross page boundary.
15637 __ string_indexof($str1$$Register, $str2$$Register,
15638 $cnt1$$Register, $cnt2$$Register,
15639 icnt2, $result$$Register,
15640 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15641 }
15642 %}
15643 ins_pipe( pipe_slow );
15644 %}
15645
15646 // fast search of substring with known size.
15647 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15648 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15649 %{
15650 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15651 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15652 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15653
15654 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15655 ins_encode %{
15656 int icnt2 = (int)$int_cnt2$$constant;
15657 if (icnt2 >= 8) {
15658 // IndexOf for constant substrings with size >= 8 elements
15659 // which don't need to be loaded through stack.
15660 __ string_indexofC8($str1$$Register, $str2$$Register,
15661 $cnt1$$Register, $cnt2$$Register,
15662 icnt2, $result$$Register,
15663 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15664 } else {
15665 // Small strings are loaded through stack if they cross page boundary.
15666 __ string_indexof($str1$$Register, $str2$$Register,
15667 $cnt1$$Register, $cnt2$$Register,
15668 icnt2, $result$$Register,
15669 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15670 }
15671 %}
15672 ins_pipe( pipe_slow );
15673 %}
15674
15675 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15676 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15677 %{
15678 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15679 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15680 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15681
15682 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15683 ins_encode %{
15684 __ string_indexof($str1$$Register, $str2$$Register,
15685 $cnt1$$Register, $cnt2$$Register,
15686 (-1), $result$$Register,
15687 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15688 %}
15689 ins_pipe( pipe_slow );
15690 %}
15691
15692 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15693 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15694 %{
15695 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15696 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15697 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15698
15699 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15700 ins_encode %{
15701 __ string_indexof($str1$$Register, $str2$$Register,
15702 $cnt1$$Register, $cnt2$$Register,
15703 (-1), $result$$Register,
15704 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15705 %}
15706 ins_pipe( pipe_slow );
15707 %}
15708
15709 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15710 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15711 %{
15712 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15713 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15714 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15715
15716 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15717 ins_encode %{
15718 __ string_indexof($str1$$Register, $str2$$Register,
15719 $cnt1$$Register, $cnt2$$Register,
15720 (-1), $result$$Register,
15721 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15722 %}
15723 ins_pipe( pipe_slow );
15724 %}
15725
15726 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15727 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15728 %{
15729 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15730 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15731 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15732 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15733 ins_encode %{
15734 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15735 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15736 %}
15737 ins_pipe( pipe_slow );
15738 %}
15739
15740 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15741 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15742 %{
15743 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15744 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15745 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15746 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15747 ins_encode %{
15748 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15749 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15750 %}
15751 ins_pipe( pipe_slow );
15752 %}
15753
15754 // fast string equals
15755 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15756 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15757 %{
15758 predicate(!VM_Version::supports_avx512vlbw());
15759 match(Set result (StrEquals (Binary str1 str2) cnt));
15760 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15761
15762 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15763 ins_encode %{
15764 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15765 $cnt$$Register, $result$$Register, $tmp3$$Register,
15766 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15767 %}
15768 ins_pipe( pipe_slow );
15769 %}
15770
15771 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15772 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15773 %{
15774 predicate(VM_Version::supports_avx512vlbw());
15775 match(Set result (StrEquals (Binary str1 str2) cnt));
15776 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15777
15778 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15779 ins_encode %{
15780 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15781 $cnt$$Register, $result$$Register, $tmp3$$Register,
15782 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15783 %}
15784 ins_pipe( pipe_slow );
15785 %}
15786
15787 // fast array equals
15788 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15789 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15790 %{
15791 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15792 match(Set result (AryEq ary1 ary2));
15793 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15794
15795 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15796 ins_encode %{
15797 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15798 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15799 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15800 %}
15801 ins_pipe( pipe_slow );
15802 %}
15803
15804 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15805 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15806 %{
15807 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15808 match(Set result (AryEq ary1 ary2));
15809 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15810
15811 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15812 ins_encode %{
15813 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15814 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15815 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15816 %}
15817 ins_pipe( pipe_slow );
15818 %}
15819
15820 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15821 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15822 %{
15823 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15824 match(Set result (AryEq ary1 ary2));
15825 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15826
15827 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15828 ins_encode %{
15829 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15830 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15831 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15832 %}
15833 ins_pipe( pipe_slow );
15834 %}
15835
15836 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15837 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15838 %{
15839 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15840 match(Set result (AryEq ary1 ary2));
15841 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15842
15843 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15844 ins_encode %{
15845 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15846 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15847 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15848 %}
15849 ins_pipe( pipe_slow );
15850 %}
15851
15852 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15853 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15854 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15855 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15856 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15857 %{
15858 predicate(UseAVX >= 2);
15859 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15860 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15861 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15862 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15863 USE basic_type, KILL cr);
15864
15865 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15866 ins_encode %{
15867 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15868 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15869 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15870 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15871 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15872 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15873 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15874 %}
15875 ins_pipe( pipe_slow );
15876 %}
15877
15878 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15879 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15880 %{
15881 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15882 match(Set result (CountPositives ary1 len));
15883 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15884
15885 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15886 ins_encode %{
15887 __ count_positives($ary1$$Register, $len$$Register,
15888 $result$$Register, $tmp3$$Register,
15889 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15890 %}
15891 ins_pipe( pipe_slow );
15892 %}
15893
15894 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15895 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15896 %{
15897 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15898 match(Set result (CountPositives ary1 len));
15899 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15900
15901 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15902 ins_encode %{
15903 __ count_positives($ary1$$Register, $len$$Register,
15904 $result$$Register, $tmp3$$Register,
15905 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15906 %}
15907 ins_pipe( pipe_slow );
15908 %}
15909
15910 // fast char[] to byte[] compression
15911 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15912 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15913 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15914 match(Set result (StrCompressedCopy src (Binary dst len)));
15915 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15916 USE_KILL len, KILL tmp5, KILL cr);
15917
15918 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15919 ins_encode %{
15920 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15921 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15922 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15923 knoreg, knoreg);
15924 %}
15925 ins_pipe( pipe_slow );
15926 %}
15927
15928 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15929 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15930 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15931 match(Set result (StrCompressedCopy src (Binary dst len)));
15932 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15933 USE_KILL len, KILL tmp5, KILL cr);
15934
15935 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15936 ins_encode %{
15937 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15938 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15939 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15940 $ktmp1$$KRegister, $ktmp2$$KRegister);
15941 %}
15942 ins_pipe( pipe_slow );
15943 %}
15944 // fast byte[] to char[] inflation
15945 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15946 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15947 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15948 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15949 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15950
15951 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15952 ins_encode %{
15953 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15954 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15955 %}
15956 ins_pipe( pipe_slow );
15957 %}
15958
15959 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15960 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15961 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15962 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15963 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15964
15965 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15966 ins_encode %{
15967 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15968 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15969 %}
15970 ins_pipe( pipe_slow );
15971 %}
15972
15973 // encode char[] to byte[] in ISO_8859_1
15974 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15975 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15976 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15977 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15978 match(Set result (EncodeISOArray src (Binary dst len)));
15979 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15980
15981 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15982 ins_encode %{
15983 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15984 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15985 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15986 %}
15987 ins_pipe( pipe_slow );
15988 %}
15989
15990 // encode char[] to byte[] in ASCII
15991 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15992 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15993 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15994 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15995 match(Set result (EncodeISOArray src (Binary dst len)));
15996 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15997
15998 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15999 ins_encode %{
16000 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16001 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16002 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16003 %}
16004 ins_pipe( pipe_slow );
16005 %}
16006
16007 //----------Overflow Math Instructions-----------------------------------------
16008
16009 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16010 %{
16011 match(Set cr (OverflowAddI op1 op2));
16012 effect(DEF cr, USE_KILL op1, USE op2);
16013
16014 format %{ "addl $op1, $op2\t# overflow check int" %}
16015
16016 ins_encode %{
16017 __ addl($op1$$Register, $op2$$Register);
16018 %}
16019 ins_pipe(ialu_reg_reg);
16020 %}
16021
16022 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16023 %{
16024 match(Set cr (OverflowAddI op1 op2));
16025 effect(DEF cr, USE_KILL op1, USE op2);
16026
16027 format %{ "addl $op1, $op2\t# overflow check int" %}
16028
16029 ins_encode %{
16030 __ addl($op1$$Register, $op2$$constant);
16031 %}
16032 ins_pipe(ialu_reg_reg);
16033 %}
16034
16035 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16036 %{
16037 match(Set cr (OverflowAddL op1 op2));
16038 effect(DEF cr, USE_KILL op1, USE op2);
16039
16040 format %{ "addq $op1, $op2\t# overflow check long" %}
16041 ins_encode %{
16042 __ addq($op1$$Register, $op2$$Register);
16043 %}
16044 ins_pipe(ialu_reg_reg);
16045 %}
16046
16047 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16048 %{
16049 match(Set cr (OverflowAddL op1 op2));
16050 effect(DEF cr, USE_KILL op1, USE op2);
16051
16052 format %{ "addq $op1, $op2\t# overflow check long" %}
16053 ins_encode %{
16054 __ addq($op1$$Register, $op2$$constant);
16055 %}
16056 ins_pipe(ialu_reg_reg);
16057 %}
16058
16059 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16060 %{
16061 match(Set cr (OverflowSubI op1 op2));
16062
16063 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16064 ins_encode %{
16065 __ cmpl($op1$$Register, $op2$$Register);
16066 %}
16067 ins_pipe(ialu_reg_reg);
16068 %}
16069
16070 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16071 %{
16072 match(Set cr (OverflowSubI op1 op2));
16073
16074 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16075 ins_encode %{
16076 __ cmpl($op1$$Register, $op2$$constant);
16077 %}
16078 ins_pipe(ialu_reg_reg);
16079 %}
16080
16081 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16082 %{
16083 match(Set cr (OverflowSubL op1 op2));
16084
16085 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16086 ins_encode %{
16087 __ cmpq($op1$$Register, $op2$$Register);
16088 %}
16089 ins_pipe(ialu_reg_reg);
16090 %}
16091
16092 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16093 %{
16094 match(Set cr (OverflowSubL op1 op2));
16095
16096 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16097 ins_encode %{
16098 __ cmpq($op1$$Register, $op2$$constant);
16099 %}
16100 ins_pipe(ialu_reg_reg);
16101 %}
16102
16103 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16104 %{
16105 match(Set cr (OverflowSubI zero op2));
16106 effect(DEF cr, USE_KILL op2);
16107
16108 format %{ "negl $op2\t# overflow check int" %}
16109 ins_encode %{
16110 __ negl($op2$$Register);
16111 %}
16112 ins_pipe(ialu_reg_reg);
16113 %}
16114
16115 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16116 %{
16117 match(Set cr (OverflowSubL zero op2));
16118 effect(DEF cr, USE_KILL op2);
16119
16120 format %{ "negq $op2\t# overflow check long" %}
16121 ins_encode %{
16122 __ negq($op2$$Register);
16123 %}
16124 ins_pipe(ialu_reg_reg);
16125 %}
16126
16127 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16128 %{
16129 match(Set cr (OverflowMulI op1 op2));
16130 effect(DEF cr, USE_KILL op1, USE op2);
16131
16132 format %{ "imull $op1, $op2\t# overflow check int" %}
16133 ins_encode %{
16134 __ imull($op1$$Register, $op2$$Register);
16135 %}
16136 ins_pipe(ialu_reg_reg_alu0);
16137 %}
16138
16139 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16140 %{
16141 match(Set cr (OverflowMulI op1 op2));
16142 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16143
16144 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16145 ins_encode %{
16146 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16147 %}
16148 ins_pipe(ialu_reg_reg_alu0);
16149 %}
16150
16151 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16152 %{
16153 match(Set cr (OverflowMulL op1 op2));
16154 effect(DEF cr, USE_KILL op1, USE op2);
16155
16156 format %{ "imulq $op1, $op2\t# overflow check long" %}
16157 ins_encode %{
16158 __ imulq($op1$$Register, $op2$$Register);
16159 %}
16160 ins_pipe(ialu_reg_reg_alu0);
16161 %}
16162
16163 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16164 %{
16165 match(Set cr (OverflowMulL op1 op2));
16166 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16167
16168 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16169 ins_encode %{
16170 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16171 %}
16172 ins_pipe(ialu_reg_reg_alu0);
16173 %}
16174
16175
16176 //----------Control Flow Instructions------------------------------------------
16177 // Signed compare Instructions
16178
16179 // XXX more variants!!
16180 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16181 %{
16182 match(Set cr (CmpI op1 op2));
16183 effect(DEF cr, USE op1, USE op2);
16184
16185 format %{ "cmpl $op1, $op2" %}
16186 ins_encode %{
16187 __ cmpl($op1$$Register, $op2$$Register);
16188 %}
16189 ins_pipe(ialu_cr_reg_reg);
16190 %}
16191
16192 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16193 %{
16194 match(Set cr (CmpI op1 op2));
16195
16196 format %{ "cmpl $op1, $op2" %}
16197 ins_encode %{
16198 __ cmpl($op1$$Register, $op2$$constant);
16199 %}
16200 ins_pipe(ialu_cr_reg_imm);
16201 %}
16202
16203 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16204 %{
16205 match(Set cr (CmpI op1 (LoadI op2)));
16206
16207 ins_cost(500); // XXX
16208 format %{ "cmpl $op1, $op2" %}
16209 ins_encode %{
16210 __ cmpl($op1$$Register, $op2$$Address);
16211 %}
16212 ins_pipe(ialu_cr_reg_mem);
16213 %}
16214
16215 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16216 %{
16217 match(Set cr (CmpI src zero));
16218
16219 format %{ "testl $src, $src" %}
16220 ins_encode %{
16221 __ testl($src$$Register, $src$$Register);
16222 %}
16223 ins_pipe(ialu_cr_reg_imm);
16224 %}
16225
16226 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16227 %{
16228 match(Set cr (CmpI (AndI src con) zero));
16229
16230 format %{ "testl $src, $con" %}
16231 ins_encode %{
16232 __ testl($src$$Register, $con$$constant);
16233 %}
16234 ins_pipe(ialu_cr_reg_imm);
16235 %}
16236
16237 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16238 %{
16239 match(Set cr (CmpI (AndI src1 src2) zero));
16240
16241 format %{ "testl $src1, $src2" %}
16242 ins_encode %{
16243 __ testl($src1$$Register, $src2$$Register);
16244 %}
16245 ins_pipe(ialu_cr_reg_imm);
16246 %}
16247
16248 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16249 %{
16250 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16251
16252 format %{ "testl $src, $mem" %}
16253 ins_encode %{
16254 __ testl($src$$Register, $mem$$Address);
16255 %}
16256 ins_pipe(ialu_cr_reg_mem);
16257 %}
16258
16259 // Unsigned compare Instructions; really, same as signed except they
16260 // produce an rFlagsRegU instead of rFlagsReg.
16261 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16262 %{
16263 match(Set cr (CmpU op1 op2));
16264
16265 format %{ "cmpl $op1, $op2\t# unsigned" %}
16266 ins_encode %{
16267 __ cmpl($op1$$Register, $op2$$Register);
16268 %}
16269 ins_pipe(ialu_cr_reg_reg);
16270 %}
16271
16272 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16273 %{
16274 match(Set cr (CmpU op1 op2));
16275
16276 format %{ "cmpl $op1, $op2\t# unsigned" %}
16277 ins_encode %{
16278 __ cmpl($op1$$Register, $op2$$constant);
16279 %}
16280 ins_pipe(ialu_cr_reg_imm);
16281 %}
16282
16283 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16284 %{
16285 match(Set cr (CmpU op1 (LoadI op2)));
16286
16287 ins_cost(500); // XXX
16288 format %{ "cmpl $op1, $op2\t# unsigned" %}
16289 ins_encode %{
16290 __ cmpl($op1$$Register, $op2$$Address);
16291 %}
16292 ins_pipe(ialu_cr_reg_mem);
16293 %}
16294
16295 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16296 %{
16297 match(Set cr (CmpU src zero));
16298
16299 format %{ "testl $src, $src\t# unsigned" %}
16300 ins_encode %{
16301 __ testl($src$$Register, $src$$Register);
16302 %}
16303 ins_pipe(ialu_cr_reg_imm);
16304 %}
16305
16306 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16307 %{
16308 match(Set cr (CmpP op1 op2));
16309
16310 format %{ "cmpq $op1, $op2\t# ptr" %}
16311 ins_encode %{
16312 __ cmpq($op1$$Register, $op2$$Register);
16313 %}
16314 ins_pipe(ialu_cr_reg_reg);
16315 %}
16316
16317 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16318 %{
16319 match(Set cr (CmpP op1 (LoadP op2)));
16320 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16321
16322 ins_cost(500); // XXX
16323 format %{ "cmpq $op1, $op2\t# ptr" %}
16324 ins_encode %{
16325 __ cmpq($op1$$Register, $op2$$Address);
16326 %}
16327 ins_pipe(ialu_cr_reg_mem);
16328 %}
16329
16330 // XXX this is generalized by compP_rReg_mem???
16331 // Compare raw pointer (used in out-of-heap check).
16332 // Only works because non-oop pointers must be raw pointers
16333 // and raw pointers have no anti-dependencies.
16334 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16335 %{
16336 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16337 n->in(2)->as_Load()->barrier_data() == 0);
16338 match(Set cr (CmpP op1 (LoadP op2)));
16339
16340 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16341 ins_encode %{
16342 __ cmpq($op1$$Register, $op2$$Address);
16343 %}
16344 ins_pipe(ialu_cr_reg_mem);
16345 %}
16346
16347 // This will generate a signed flags result. This should be OK since
16348 // any compare to a zero should be eq/neq.
16349 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16350 %{
16351 match(Set cr (CmpP src zero));
16352
16353 format %{ "testq $src, $src\t# ptr" %}
16354 ins_encode %{
16355 __ testq($src$$Register, $src$$Register);
16356 %}
16357 ins_pipe(ialu_cr_reg_imm);
16358 %}
16359
16360 // This will generate a signed flags result. This should be OK since
16361 // any compare to a zero should be eq/neq.
16362 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16363 %{
16364 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16365 n->in(1)->as_Load()->barrier_data() == 0);
16366 match(Set cr (CmpP (LoadP op) zero));
16367
16368 ins_cost(500); // XXX
16369 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16370 ins_encode %{
16371 __ testq($op$$Address, 0xFFFFFFFF);
16372 %}
16373 ins_pipe(ialu_cr_reg_imm);
16374 %}
16375
16376 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16377 %{
16378 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16379 n->in(1)->as_Load()->barrier_data() == 0);
16380 match(Set cr (CmpP (LoadP mem) zero));
16381
16382 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16383 ins_encode %{
16384 __ cmpq(r12, $mem$$Address);
16385 %}
16386 ins_pipe(ialu_cr_reg_mem);
16387 %}
16388
16389 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16390 %{
16391 match(Set cr (CmpN op1 op2));
16392
16393 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16394 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16395 ins_pipe(ialu_cr_reg_reg);
16396 %}
16397
16398 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16399 %{
16400 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16401 match(Set cr (CmpN src (LoadN mem)));
16402
16403 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16404 ins_encode %{
16405 __ cmpl($src$$Register, $mem$$Address);
16406 %}
16407 ins_pipe(ialu_cr_reg_mem);
16408 %}
16409
16410 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16411 match(Set cr (CmpN op1 op2));
16412
16413 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16414 ins_encode %{
16415 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16416 %}
16417 ins_pipe(ialu_cr_reg_imm);
16418 %}
16419
16420 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16421 %{
16422 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16423 match(Set cr (CmpN src (LoadN mem)));
16424
16425 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16426 ins_encode %{
16427 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16428 %}
16429 ins_pipe(ialu_cr_reg_mem);
16430 %}
16431
16432 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16433 match(Set cr (CmpN op1 op2));
16434
16435 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16436 ins_encode %{
16437 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16438 %}
16439 ins_pipe(ialu_cr_reg_imm);
16440 %}
16441
16442 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16443 %{
16444 predicate(!UseCompactObjectHeaders);
16445 match(Set cr (CmpN src (LoadNKlass mem)));
16446
16447 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16448 ins_encode %{
16449 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16450 %}
16451 ins_pipe(ialu_cr_reg_mem);
16452 %}
16453
16454 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16455 match(Set cr (CmpN src zero));
16456
16457 format %{ "testl $src, $src\t# compressed ptr" %}
16458 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16459 ins_pipe(ialu_cr_reg_imm);
16460 %}
16461
16462 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16463 %{
16464 predicate(CompressedOops::base() != nullptr &&
16465 n->in(1)->as_Load()->barrier_data() == 0);
16466 match(Set cr (CmpN (LoadN mem) zero));
16467
16468 ins_cost(500); // XXX
16469 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16470 ins_encode %{
16471 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16472 %}
16473 ins_pipe(ialu_cr_reg_mem);
16474 %}
16475
16476 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16477 %{
16478 predicate(CompressedOops::base() == nullptr &&
16479 n->in(1)->as_Load()->barrier_data() == 0);
16480 match(Set cr (CmpN (LoadN mem) zero));
16481
16482 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16483 ins_encode %{
16484 __ cmpl(r12, $mem$$Address);
16485 %}
16486 ins_pipe(ialu_cr_reg_mem);
16487 %}
16488
16489 // Yanked all unsigned pointer compare operations.
16490 // Pointer compares are done with CmpP which is already unsigned.
16491
16492 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16493 %{
16494 match(Set cr (CmpL op1 op2));
16495
16496 format %{ "cmpq $op1, $op2" %}
16497 ins_encode %{
16498 __ cmpq($op1$$Register, $op2$$Register);
16499 %}
16500 ins_pipe(ialu_cr_reg_reg);
16501 %}
16502
16503 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16504 %{
16505 match(Set cr (CmpL op1 op2));
16506
16507 format %{ "cmpq $op1, $op2" %}
16508 ins_encode %{
16509 __ cmpq($op1$$Register, $op2$$constant);
16510 %}
16511 ins_pipe(ialu_cr_reg_imm);
16512 %}
16513
16514 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16515 %{
16516 match(Set cr (CmpL op1 (LoadL op2)));
16517
16518 format %{ "cmpq $op1, $op2" %}
16519 ins_encode %{
16520 __ cmpq($op1$$Register, $op2$$Address);
16521 %}
16522 ins_pipe(ialu_cr_reg_mem);
16523 %}
16524
16525 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16526 %{
16527 match(Set cr (CmpL src zero));
16528
16529 format %{ "testq $src, $src" %}
16530 ins_encode %{
16531 __ testq($src$$Register, $src$$Register);
16532 %}
16533 ins_pipe(ialu_cr_reg_imm);
16534 %}
16535
16536 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16537 %{
16538 match(Set cr (CmpL (AndL src con) zero));
16539
16540 format %{ "testq $src, $con\t# long" %}
16541 ins_encode %{
16542 __ testq($src$$Register, $con$$constant);
16543 %}
16544 ins_pipe(ialu_cr_reg_imm);
16545 %}
16546
16547 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16548 %{
16549 match(Set cr (CmpL (AndL src1 src2) zero));
16550
16551 format %{ "testq $src1, $src2\t# long" %}
16552 ins_encode %{
16553 __ testq($src1$$Register, $src2$$Register);
16554 %}
16555 ins_pipe(ialu_cr_reg_imm);
16556 %}
16557
16558 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16559 %{
16560 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16561
16562 format %{ "testq $src, $mem" %}
16563 ins_encode %{
16564 __ testq($src$$Register, $mem$$Address);
16565 %}
16566 ins_pipe(ialu_cr_reg_mem);
16567 %}
16568
16569 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16570 %{
16571 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16572
16573 format %{ "testq $src, $mem" %}
16574 ins_encode %{
16575 __ testq($src$$Register, $mem$$Address);
16576 %}
16577 ins_pipe(ialu_cr_reg_mem);
16578 %}
16579
16580 // Manifest a CmpU result in an integer register. Very painful.
16581 // This is the test to avoid.
16582 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16583 %{
16584 match(Set dst (CmpU3 src1 src2));
16585 effect(KILL flags);
16586
16587 ins_cost(275); // XXX
16588 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16589 "movl $dst, -1\n\t"
16590 "jb,u done\n\t"
16591 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16592 "done:" %}
16593 ins_encode %{
16594 Label done;
16595 __ cmpl($src1$$Register, $src2$$Register);
16596 __ movl($dst$$Register, -1);
16597 __ jccb(Assembler::below, done);
16598 __ setcc(Assembler::notZero, $dst$$Register);
16599 __ bind(done);
16600 %}
16601 ins_pipe(pipe_slow);
16602 %}
16603
16604 // Manifest a CmpL result in an integer register. Very painful.
16605 // This is the test to avoid.
16606 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16607 %{
16608 match(Set dst (CmpL3 src1 src2));
16609 effect(KILL flags);
16610
16611 ins_cost(275); // XXX
16612 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16613 "movl $dst, -1\n\t"
16614 "jl,s done\n\t"
16615 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16616 "done:" %}
16617 ins_encode %{
16618 Label done;
16619 __ cmpq($src1$$Register, $src2$$Register);
16620 __ movl($dst$$Register, -1);
16621 __ jccb(Assembler::less, done);
16622 __ setcc(Assembler::notZero, $dst$$Register);
16623 __ bind(done);
16624 %}
16625 ins_pipe(pipe_slow);
16626 %}
16627
16628 // Manifest a CmpUL result in an integer register. Very painful.
16629 // This is the test to avoid.
16630 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16631 %{
16632 match(Set dst (CmpUL3 src1 src2));
16633 effect(KILL flags);
16634
16635 ins_cost(275); // XXX
16636 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16637 "movl $dst, -1\n\t"
16638 "jb,u done\n\t"
16639 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16640 "done:" %}
16641 ins_encode %{
16642 Label done;
16643 __ cmpq($src1$$Register, $src2$$Register);
16644 __ movl($dst$$Register, -1);
16645 __ jccb(Assembler::below, done);
16646 __ setcc(Assembler::notZero, $dst$$Register);
16647 __ bind(done);
16648 %}
16649 ins_pipe(pipe_slow);
16650 %}
16651
16652 // Unsigned long compare Instructions; really, same as signed long except they
16653 // produce an rFlagsRegU instead of rFlagsReg.
16654 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16655 %{
16656 match(Set cr (CmpUL op1 op2));
16657
16658 format %{ "cmpq $op1, $op2\t# unsigned" %}
16659 ins_encode %{
16660 __ cmpq($op1$$Register, $op2$$Register);
16661 %}
16662 ins_pipe(ialu_cr_reg_reg);
16663 %}
16664
16665 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16666 %{
16667 match(Set cr (CmpUL op1 op2));
16668
16669 format %{ "cmpq $op1, $op2\t# unsigned" %}
16670 ins_encode %{
16671 __ cmpq($op1$$Register, $op2$$constant);
16672 %}
16673 ins_pipe(ialu_cr_reg_imm);
16674 %}
16675
16676 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16677 %{
16678 match(Set cr (CmpUL op1 (LoadL op2)));
16679
16680 format %{ "cmpq $op1, $op2\t# unsigned" %}
16681 ins_encode %{
16682 __ cmpq($op1$$Register, $op2$$Address);
16683 %}
16684 ins_pipe(ialu_cr_reg_mem);
16685 %}
16686
16687 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16688 %{
16689 match(Set cr (CmpUL src zero));
16690
16691 format %{ "testq $src, $src\t# unsigned" %}
16692 ins_encode %{
16693 __ testq($src$$Register, $src$$Register);
16694 %}
16695 ins_pipe(ialu_cr_reg_imm);
16696 %}
16697
16698 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16699 %{
16700 match(Set cr (CmpI (LoadB mem) imm));
16701
16702 ins_cost(125);
16703 format %{ "cmpb $mem, $imm" %}
16704 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16705 ins_pipe(ialu_cr_reg_mem);
16706 %}
16707
16708 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16709 %{
16710 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16711
16712 ins_cost(125);
16713 format %{ "testb $mem, $imm\t# ubyte" %}
16714 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16715 ins_pipe(ialu_cr_reg_mem);
16716 %}
16717
16718 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16719 %{
16720 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16721
16722 ins_cost(125);
16723 format %{ "testb $mem, $imm\t# byte" %}
16724 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16725 ins_pipe(ialu_cr_reg_mem);
16726 %}
16727
16728 //----------Max and Min--------------------------------------------------------
16729 // Min Instructions
16730
16731 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16732 %{
16733 predicate(!UseAPX);
16734 effect(USE_DEF dst, USE src, USE cr);
16735
16736 format %{ "cmovlgt $dst, $src\t# min" %}
16737 ins_encode %{
16738 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16739 %}
16740 ins_pipe(pipe_cmov_reg);
16741 %}
16742
16743 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16744 %{
16745 predicate(UseAPX);
16746 effect(DEF dst, USE src1, USE src2, USE cr);
16747
16748 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16749 ins_encode %{
16750 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16751 %}
16752 ins_pipe(pipe_cmov_reg);
16753 %}
16754
16755 instruct minI_rReg(rRegI dst, rRegI src)
16756 %{
16757 predicate(!UseAPX);
16758 match(Set dst (MinI dst src));
16759
16760 ins_cost(200);
16761 expand %{
16762 rFlagsReg cr;
16763 compI_rReg(cr, dst, src);
16764 cmovI_reg_g(dst, src, cr);
16765 %}
16766 %}
16767
16768 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16769 %{
16770 predicate(UseAPX);
16771 match(Set dst (MinI src1 src2));
16772 effect(DEF dst, USE src1, USE src2);
16773 flag(PD::Flag_ndd_demotable_opr1);
16774
16775 ins_cost(200);
16776 expand %{
16777 rFlagsReg cr;
16778 compI_rReg(cr, src1, src2);
16779 cmovI_reg_g_ndd(dst, src1, src2, cr);
16780 %}
16781 %}
16782
16783 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16784 %{
16785 predicate(!UseAPX);
16786 effect(USE_DEF dst, USE src, USE cr);
16787
16788 format %{ "cmovllt $dst, $src\t# max" %}
16789 ins_encode %{
16790 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16791 %}
16792 ins_pipe(pipe_cmov_reg);
16793 %}
16794
16795 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16796 %{
16797 predicate(UseAPX);
16798 effect(DEF dst, USE src1, USE src2, USE cr);
16799
16800 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16801 ins_encode %{
16802 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16803 %}
16804 ins_pipe(pipe_cmov_reg);
16805 %}
16806
16807 instruct maxI_rReg(rRegI dst, rRegI src)
16808 %{
16809 predicate(!UseAPX);
16810 match(Set dst (MaxI dst src));
16811
16812 ins_cost(200);
16813 expand %{
16814 rFlagsReg cr;
16815 compI_rReg(cr, dst, src);
16816 cmovI_reg_l(dst, src, cr);
16817 %}
16818 %}
16819
16820 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16821 %{
16822 predicate(UseAPX);
16823 match(Set dst (MaxI src1 src2));
16824 effect(DEF dst, USE src1, USE src2);
16825 flag(PD::Flag_ndd_demotable_opr1);
16826
16827 ins_cost(200);
16828 expand %{
16829 rFlagsReg cr;
16830 compI_rReg(cr, src1, src2);
16831 cmovI_reg_l_ndd(dst, src1, src2, cr);
16832 %}
16833 %}
16834
16835 // ============================================================================
16836 // Branch Instructions
16837
16838 // Jump Direct - Label defines a relative address from JMP+1
16839 instruct jmpDir(label labl)
16840 %{
16841 match(Goto);
16842 effect(USE labl);
16843
16844 ins_cost(300);
16845 format %{ "jmp $labl" %}
16846 size(5);
16847 ins_encode %{
16848 Label* L = $labl$$label;
16849 __ jmp(*L, false); // Always long jump
16850 %}
16851 ins_pipe(pipe_jmp);
16852 %}
16853
16854 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16855 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16856 %{
16857 match(If cop cr);
16858 effect(USE labl);
16859
16860 ins_cost(300);
16861 format %{ "j$cop $labl" %}
16862 size(6);
16863 ins_encode %{
16864 Label* L = $labl$$label;
16865 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16866 %}
16867 ins_pipe(pipe_jcc);
16868 %}
16869
16870 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16871 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16872 %{
16873 match(CountedLoopEnd cop cr);
16874 effect(USE labl);
16875
16876 ins_cost(300);
16877 format %{ "j$cop $labl\t# loop end" %}
16878 size(6);
16879 ins_encode %{
16880 Label* L = $labl$$label;
16881 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16882 %}
16883 ins_pipe(pipe_jcc);
16884 %}
16885
16886 // Jump Direct Conditional - using unsigned comparison
16887 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16888 match(If cop cmp);
16889 effect(USE labl);
16890
16891 ins_cost(300);
16892 format %{ "j$cop,u $labl" %}
16893 size(6);
16894 ins_encode %{
16895 Label* L = $labl$$label;
16896 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16897 %}
16898 ins_pipe(pipe_jcc);
16899 %}
16900
16901 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16902 match(If cop cmp);
16903 effect(USE labl);
16904
16905 ins_cost(200);
16906 format %{ "j$cop,u $labl" %}
16907 size(6);
16908 ins_encode %{
16909 Label* L = $labl$$label;
16910 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16911 %}
16912 ins_pipe(pipe_jcc);
16913 %}
16914
16915 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16916 match(If cop cmp);
16917 effect(USE labl);
16918
16919 ins_cost(200);
16920 format %{ $$template
16921 if ($cop$$cmpcode == Assembler::notEqual) {
16922 $$emit$$"jp,u $labl\n\t"
16923 $$emit$$"j$cop,u $labl"
16924 } else {
16925 $$emit$$"jp,u done\n\t"
16926 $$emit$$"j$cop,u $labl\n\t"
16927 $$emit$$"done:"
16928 }
16929 %}
16930 ins_encode %{
16931 Label* l = $labl$$label;
16932 if ($cop$$cmpcode == Assembler::notEqual) {
16933 __ jcc(Assembler::parity, *l, false);
16934 __ jcc(Assembler::notEqual, *l, false);
16935 } else if ($cop$$cmpcode == Assembler::equal) {
16936 Label done;
16937 __ jccb(Assembler::parity, done);
16938 __ jcc(Assembler::equal, *l, false);
16939 __ bind(done);
16940 } else {
16941 ShouldNotReachHere();
16942 }
16943 %}
16944 ins_pipe(pipe_jcc);
16945 %}
16946
16947 // Jump Direct Conditional - using signed and unsigned comparison
16948 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16949 match(If cop cmp);
16950 effect(USE labl);
16951
16952 ins_cost(200);
16953 format %{ "j$cop,su $labl" %}
16954 size(6);
16955 ins_encode %{
16956 Label* L = $labl$$label;
16957 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16958 %}
16959 ins_pipe(pipe_jcc);
16960 %}
16961
16962 // ============================================================================
16963 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16964 // superklass array for an instance of the superklass. Set a hidden
16965 // internal cache on a hit (cache is checked with exposed code in
16966 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16967 // encoding ALSO sets flags.
16968
16969 instruct partialSubtypeCheck(rdi_RegP result,
16970 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16971 rFlagsReg cr)
16972 %{
16973 match(Set result (PartialSubtypeCheck sub super));
16974 predicate(!UseSecondarySupersTable);
16975 effect(KILL rcx, KILL cr);
16976
16977 ins_cost(1100); // slightly larger than the next version
16978 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16979 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16980 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16981 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16982 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16983 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16984 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16985 "miss:\t" %}
16986
16987 ins_encode %{
16988 Label miss;
16989 // NB: Callers may assume that, when $result is a valid register,
16990 // check_klass_subtype_slow_path_linear sets it to a nonzero
16991 // value.
16992 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16993 $rcx$$Register, $result$$Register,
16994 nullptr, &miss,
16995 /*set_cond_codes:*/ true);
16996 __ xorptr($result$$Register, $result$$Register);
16997 __ bind(miss);
16998 %}
16999
17000 ins_pipe(pipe_slow);
17001 %}
17002
17003 // ============================================================================
17004 // Two versions of hashtable-based partialSubtypeCheck, both used when
17005 // we need to search for a super class in the secondary supers array.
17006 // The first is used when we don't know _a priori_ the class being
17007 // searched for. The second, far more common, is used when we do know:
17008 // this is used for instanceof, checkcast, and any case where C2 can
17009 // determine it by constant propagation.
17010
17011 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17012 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17013 rFlagsReg cr)
17014 %{
17015 match(Set result (PartialSubtypeCheck sub super));
17016 predicate(UseSecondarySupersTable);
17017 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17018
17019 ins_cost(1000);
17020 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17021
17022 ins_encode %{
17023 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17024 $temp3$$Register, $temp4$$Register, $result$$Register);
17025 %}
17026
17027 ins_pipe(pipe_slow);
17028 %}
17029
17030 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17031 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17032 rFlagsReg cr)
17033 %{
17034 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17035 predicate(UseSecondarySupersTable);
17036 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17037
17038 ins_cost(700); // smaller than the next version
17039 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17040
17041 ins_encode %{
17042 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17043 if (InlineSecondarySupersTest) {
17044 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17045 $temp3$$Register, $temp4$$Register, $result$$Register,
17046 super_klass_slot);
17047 } else {
17048 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17049 }
17050 %}
17051
17052 ins_pipe(pipe_slow);
17053 %}
17054
17055 // ============================================================================
17056 // Branch Instructions -- short offset versions
17057 //
17058 // These instructions are used to replace jumps of a long offset (the default
17059 // match) with jumps of a shorter offset. These instructions are all tagged
17060 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17061 // match rules in general matching. Instead, the ADLC generates a conversion
17062 // method in the MachNode which can be used to do in-place replacement of the
17063 // long variant with the shorter variant. The compiler will determine if a
17064 // branch can be taken by the is_short_branch_offset() predicate in the machine
17065 // specific code section of the file.
17066
17067 // Jump Direct - Label defines a relative address from JMP+1
17068 instruct jmpDir_short(label labl) %{
17069 match(Goto);
17070 effect(USE labl);
17071
17072 ins_cost(300);
17073 format %{ "jmp,s $labl" %}
17074 size(2);
17075 ins_encode %{
17076 Label* L = $labl$$label;
17077 __ jmpb(*L);
17078 %}
17079 ins_pipe(pipe_jmp);
17080 ins_short_branch(1);
17081 %}
17082
17083 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17084 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17085 match(If cop cr);
17086 effect(USE labl);
17087
17088 ins_cost(300);
17089 format %{ "j$cop,s $labl" %}
17090 size(2);
17091 ins_encode %{
17092 Label* L = $labl$$label;
17093 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17094 %}
17095 ins_pipe(pipe_jcc);
17096 ins_short_branch(1);
17097 %}
17098
17099 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17100 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17101 match(CountedLoopEnd cop cr);
17102 effect(USE labl);
17103
17104 ins_cost(300);
17105 format %{ "j$cop,s $labl\t# loop end" %}
17106 size(2);
17107 ins_encode %{
17108 Label* L = $labl$$label;
17109 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17110 %}
17111 ins_pipe(pipe_jcc);
17112 ins_short_branch(1);
17113 %}
17114
17115 // Jump Direct Conditional - using unsigned comparison
17116 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17117 match(If cop cmp);
17118 effect(USE labl);
17119
17120 ins_cost(300);
17121 format %{ "j$cop,us $labl" %}
17122 size(2);
17123 ins_encode %{
17124 Label* L = $labl$$label;
17125 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17126 %}
17127 ins_pipe(pipe_jcc);
17128 ins_short_branch(1);
17129 %}
17130
17131 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17132 match(If cop cmp);
17133 effect(USE labl);
17134
17135 ins_cost(300);
17136 format %{ "j$cop,us $labl" %}
17137 size(2);
17138 ins_encode %{
17139 Label* L = $labl$$label;
17140 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17141 %}
17142 ins_pipe(pipe_jcc);
17143 ins_short_branch(1);
17144 %}
17145
17146 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17147 match(If cop cmp);
17148 effect(USE labl);
17149
17150 ins_cost(300);
17151 format %{ $$template
17152 if ($cop$$cmpcode == Assembler::notEqual) {
17153 $$emit$$"jp,u,s $labl\n\t"
17154 $$emit$$"j$cop,u,s $labl"
17155 } else {
17156 $$emit$$"jp,u,s done\n\t"
17157 $$emit$$"j$cop,u,s $labl\n\t"
17158 $$emit$$"done:"
17159 }
17160 %}
17161 size(4);
17162 ins_encode %{
17163 Label* l = $labl$$label;
17164 if ($cop$$cmpcode == Assembler::notEqual) {
17165 __ jccb(Assembler::parity, *l);
17166 __ jccb(Assembler::notEqual, *l);
17167 } else if ($cop$$cmpcode == Assembler::equal) {
17168 Label done;
17169 __ jccb(Assembler::parity, done);
17170 __ jccb(Assembler::equal, *l);
17171 __ bind(done);
17172 } else {
17173 ShouldNotReachHere();
17174 }
17175 %}
17176 ins_pipe(pipe_jcc);
17177 ins_short_branch(1);
17178 %}
17179
17180 // Jump Direct Conditional - using signed and unsigned comparison
17181 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17182 match(If cop cmp);
17183 effect(USE labl);
17184
17185 ins_cost(300);
17186 format %{ "j$cop,sus $labl" %}
17187 size(2);
17188 ins_encode %{
17189 Label* L = $labl$$label;
17190 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17191 %}
17192 ins_pipe(pipe_jcc);
17193 ins_short_branch(1);
17194 %}
17195
17196 // ============================================================================
17197 // inlined locking and unlocking
17198
17199 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17200 match(Set cr (FastLock object box));
17201 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17202 ins_cost(300);
17203 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17204 ins_encode %{
17205 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17206 %}
17207 ins_pipe(pipe_slow);
17208 %}
17209
17210 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17211 match(Set cr (FastUnlock object rax_reg));
17212 effect(TEMP tmp, USE_KILL rax_reg);
17213 ins_cost(300);
17214 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17215 ins_encode %{
17216 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17217 %}
17218 ins_pipe(pipe_slow);
17219 %}
17220
17221
17222 // ============================================================================
17223 // Safepoint Instructions
17224 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17225 %{
17226 match(SafePoint poll);
17227 effect(KILL cr, USE poll);
17228
17229 format %{ "testl rax, [$poll]\t"
17230 "# Safepoint: poll for GC" %}
17231 ins_cost(125);
17232 ins_encode %{
17233 __ relocate(relocInfo::poll_type);
17234 address pre_pc = __ pc();
17235 __ testl(rax, Address($poll$$Register, 0));
17236 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17237 %}
17238 ins_pipe(ialu_reg_mem);
17239 %}
17240
17241 instruct mask_all_evexL(kReg dst, rRegL src) %{
17242 match(Set dst (MaskAll src));
17243 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17244 ins_encode %{
17245 int mask_len = Matcher::vector_length(this);
17246 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17247 %}
17248 ins_pipe( pipe_slow );
17249 %}
17250
17251 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17252 predicate(Matcher::vector_length(n) > 32);
17253 match(Set dst (MaskAll src));
17254 effect(TEMP tmp);
17255 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17256 ins_encode %{
17257 int mask_len = Matcher::vector_length(this);
17258 __ movslq($tmp$$Register, $src$$Register);
17259 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17260 %}
17261 ins_pipe( pipe_slow );
17262 %}
17263
17264 // ============================================================================
17265 // Procedure Call/Return Instructions
17266 // Call Java Static Instruction
17267 // Note: If this code changes, the corresponding ret_addr_offset() and
17268 // compute_padding() functions will have to be adjusted.
17269 instruct CallStaticJavaDirect(method meth) %{
17270 match(CallStaticJava);
17271 effect(USE meth);
17272
17273 ins_cost(300);
17274 format %{ "call,static " %}
17275 opcode(0xE8); /* E8 cd */
17276 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17277 ins_pipe(pipe_slow);
17278 ins_alignment(4);
17279 %}
17280
17281 // Call Java Dynamic Instruction
17282 // Note: If this code changes, the corresponding ret_addr_offset() and
17283 // compute_padding() functions will have to be adjusted.
17284 instruct CallDynamicJavaDirect(method meth)
17285 %{
17286 match(CallDynamicJava);
17287 effect(USE meth);
17288
17289 ins_cost(300);
17290 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17291 "call,dynamic " %}
17292 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17293 ins_pipe(pipe_slow);
17294 ins_alignment(4);
17295 %}
17296
17297 // Call Runtime Instruction
17298 instruct CallRuntimeDirect(method meth)
17299 %{
17300 match(CallRuntime);
17301 effect(USE meth);
17302
17303 ins_cost(300);
17304 format %{ "call,runtime " %}
17305 ins_encode(clear_avx, Java_To_Runtime(meth));
17306 ins_pipe(pipe_slow);
17307 %}
17308
17309 // Call runtime without safepoint
17310 instruct CallLeafDirect(method meth)
17311 %{
17312 match(CallLeaf);
17313 effect(USE meth);
17314
17315 ins_cost(300);
17316 format %{ "call_leaf,runtime " %}
17317 ins_encode(clear_avx, Java_To_Runtime(meth));
17318 ins_pipe(pipe_slow);
17319 %}
17320
17321 // Call runtime without safepoint and with vector arguments
17322 instruct CallLeafDirectVector(method meth)
17323 %{
17324 match(CallLeafVector);
17325 effect(USE meth);
17326
17327 ins_cost(300);
17328 format %{ "call_leaf,vector " %}
17329 ins_encode(Java_To_Runtime(meth));
17330 ins_pipe(pipe_slow);
17331 %}
17332
17333 // Call runtime without safepoint
17334 instruct CallLeafNoFPDirect(method meth)
17335 %{
17336 match(CallLeafNoFP);
17337 effect(USE meth);
17338
17339 ins_cost(300);
17340 format %{ "call_leaf_nofp,runtime " %}
17341 ins_encode(clear_avx, Java_To_Runtime(meth));
17342 ins_pipe(pipe_slow);
17343 %}
17344
17345 // Return Instruction
17346 // Remove the return address & jump to it.
17347 // Notice: We always emit a nop after a ret to make sure there is room
17348 // for safepoint patching
17349 instruct Ret()
17350 %{
17351 match(Return);
17352
17353 format %{ "ret" %}
17354 ins_encode %{
17355 __ ret(0);
17356 %}
17357 ins_pipe(pipe_jmp);
17358 %}
17359
17360 // Tail Call; Jump from runtime stub to Java code.
17361 // Also known as an 'interprocedural jump'.
17362 // Target of jump will eventually return to caller.
17363 // TailJump below removes the return address.
17364 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17365 // emitted just above the TailCall which has reset rbp to the caller state.
17366 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17367 %{
17368 match(TailCall jump_target method_ptr);
17369
17370 ins_cost(300);
17371 format %{ "jmp $jump_target\t# rbx holds method" %}
17372 ins_encode %{
17373 __ jmp($jump_target$$Register);
17374 %}
17375 ins_pipe(pipe_jmp);
17376 %}
17377
17378 // Tail Jump; remove the return address; jump to target.
17379 // TailCall above leaves the return address around.
17380 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17381 %{
17382 match(TailJump jump_target ex_oop);
17383
17384 ins_cost(300);
17385 format %{ "popq rdx\t# pop return address\n\t"
17386 "jmp $jump_target" %}
17387 ins_encode %{
17388 __ popq(as_Register(RDX_enc));
17389 __ jmp($jump_target$$Register);
17390 %}
17391 ins_pipe(pipe_jmp);
17392 %}
17393
17394 // Forward exception.
17395 instruct ForwardExceptionjmp()
17396 %{
17397 match(ForwardException);
17398
17399 format %{ "jmp forward_exception_stub" %}
17400 ins_encode %{
17401 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17402 %}
17403 ins_pipe(pipe_jmp);
17404 %}
17405
17406 // Create exception oop: created by stack-crawling runtime code.
17407 // Created exception is now available to this handler, and is setup
17408 // just prior to jumping to this handler. No code emitted.
17409 instruct CreateException(rax_RegP ex_oop)
17410 %{
17411 match(Set ex_oop (CreateEx));
17412
17413 size(0);
17414 // use the following format syntax
17415 format %{ "# exception oop is in rax; no code emitted" %}
17416 ins_encode();
17417 ins_pipe(empty);
17418 %}
17419
17420 // Rethrow exception:
17421 // The exception oop will come in the first argument position.
17422 // Then JUMP (not call) to the rethrow stub code.
17423 instruct RethrowException()
17424 %{
17425 match(Rethrow);
17426
17427 // use the following format syntax
17428 format %{ "jmp rethrow_stub" %}
17429 ins_encode %{
17430 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17431 %}
17432 ins_pipe(pipe_jmp);
17433 %}
17434
17435 // ============================================================================
17436 // This name is KNOWN by the ADLC and cannot be changed.
17437 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17438 // for this guy.
17439 instruct tlsLoadP(r15_RegP dst) %{
17440 match(Set dst (ThreadLocal));
17441 effect(DEF dst);
17442
17443 size(0);
17444 format %{ "# TLS is in R15" %}
17445 ins_encode( /*empty encoding*/ );
17446 ins_pipe(ialu_reg_reg);
17447 %}
17448
17449 instruct addF_reg(regF dst, regF src) %{
17450 predicate(UseAVX == 0);
17451 match(Set dst (AddF dst src));
17452
17453 format %{ "addss $dst, $src" %}
17454 ins_cost(150);
17455 ins_encode %{
17456 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17457 %}
17458 ins_pipe(pipe_slow);
17459 %}
17460
17461 instruct addF_mem(regF dst, memory src) %{
17462 predicate(UseAVX == 0);
17463 match(Set dst (AddF dst (LoadF src)));
17464
17465 format %{ "addss $dst, $src" %}
17466 ins_cost(150);
17467 ins_encode %{
17468 __ addss($dst$$XMMRegister, $src$$Address);
17469 %}
17470 ins_pipe(pipe_slow);
17471 %}
17472
17473 instruct addF_imm(regF dst, immF con) %{
17474 predicate(UseAVX == 0);
17475 match(Set dst (AddF dst con));
17476 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17477 ins_cost(150);
17478 ins_encode %{
17479 __ addss($dst$$XMMRegister, $constantaddress($con));
17480 %}
17481 ins_pipe(pipe_slow);
17482 %}
17483
17484 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17485 predicate(UseAVX > 0);
17486 match(Set dst (AddF src1 src2));
17487
17488 format %{ "vaddss $dst, $src1, $src2" %}
17489 ins_cost(150);
17490 ins_encode %{
17491 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17492 %}
17493 ins_pipe(pipe_slow);
17494 %}
17495
17496 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17497 predicate(UseAVX > 0);
17498 match(Set dst (AddF src1 (LoadF src2)));
17499
17500 format %{ "vaddss $dst, $src1, $src2" %}
17501 ins_cost(150);
17502 ins_encode %{
17503 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17504 %}
17505 ins_pipe(pipe_slow);
17506 %}
17507
17508 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17509 predicate(UseAVX > 0);
17510 match(Set dst (AddF src con));
17511
17512 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17513 ins_cost(150);
17514 ins_encode %{
17515 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17516 %}
17517 ins_pipe(pipe_slow);
17518 %}
17519
17520 instruct addD_reg(regD dst, regD src) %{
17521 predicate(UseAVX == 0);
17522 match(Set dst (AddD dst src));
17523
17524 format %{ "addsd $dst, $src" %}
17525 ins_cost(150);
17526 ins_encode %{
17527 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17528 %}
17529 ins_pipe(pipe_slow);
17530 %}
17531
17532 instruct addD_mem(regD dst, memory src) %{
17533 predicate(UseAVX == 0);
17534 match(Set dst (AddD dst (LoadD src)));
17535
17536 format %{ "addsd $dst, $src" %}
17537 ins_cost(150);
17538 ins_encode %{
17539 __ addsd($dst$$XMMRegister, $src$$Address);
17540 %}
17541 ins_pipe(pipe_slow);
17542 %}
17543
17544 instruct addD_imm(regD dst, immD con) %{
17545 predicate(UseAVX == 0);
17546 match(Set dst (AddD dst con));
17547 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17548 ins_cost(150);
17549 ins_encode %{
17550 __ addsd($dst$$XMMRegister, $constantaddress($con));
17551 %}
17552 ins_pipe(pipe_slow);
17553 %}
17554
17555 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17556 predicate(UseAVX > 0);
17557 match(Set dst (AddD src1 src2));
17558
17559 format %{ "vaddsd $dst, $src1, $src2" %}
17560 ins_cost(150);
17561 ins_encode %{
17562 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17563 %}
17564 ins_pipe(pipe_slow);
17565 %}
17566
17567 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17568 predicate(UseAVX > 0);
17569 match(Set dst (AddD src1 (LoadD src2)));
17570
17571 format %{ "vaddsd $dst, $src1, $src2" %}
17572 ins_cost(150);
17573 ins_encode %{
17574 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17575 %}
17576 ins_pipe(pipe_slow);
17577 %}
17578
17579 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17580 predicate(UseAVX > 0);
17581 match(Set dst (AddD src con));
17582
17583 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17584 ins_cost(150);
17585 ins_encode %{
17586 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17587 %}
17588 ins_pipe(pipe_slow);
17589 %}
17590
17591 instruct subF_reg(regF dst, regF src) %{
17592 predicate(UseAVX == 0);
17593 match(Set dst (SubF dst src));
17594
17595 format %{ "subss $dst, $src" %}
17596 ins_cost(150);
17597 ins_encode %{
17598 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17599 %}
17600 ins_pipe(pipe_slow);
17601 %}
17602
17603 instruct subF_mem(regF dst, memory src) %{
17604 predicate(UseAVX == 0);
17605 match(Set dst (SubF dst (LoadF src)));
17606
17607 format %{ "subss $dst, $src" %}
17608 ins_cost(150);
17609 ins_encode %{
17610 __ subss($dst$$XMMRegister, $src$$Address);
17611 %}
17612 ins_pipe(pipe_slow);
17613 %}
17614
17615 instruct subF_imm(regF dst, immF con) %{
17616 predicate(UseAVX == 0);
17617 match(Set dst (SubF dst con));
17618 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17619 ins_cost(150);
17620 ins_encode %{
17621 __ subss($dst$$XMMRegister, $constantaddress($con));
17622 %}
17623 ins_pipe(pipe_slow);
17624 %}
17625
17626 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17627 predicate(UseAVX > 0);
17628 match(Set dst (SubF src1 src2));
17629
17630 format %{ "vsubss $dst, $src1, $src2" %}
17631 ins_cost(150);
17632 ins_encode %{
17633 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17634 %}
17635 ins_pipe(pipe_slow);
17636 %}
17637
17638 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17639 predicate(UseAVX > 0);
17640 match(Set dst (SubF src1 (LoadF src2)));
17641
17642 format %{ "vsubss $dst, $src1, $src2" %}
17643 ins_cost(150);
17644 ins_encode %{
17645 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17646 %}
17647 ins_pipe(pipe_slow);
17648 %}
17649
17650 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17651 predicate(UseAVX > 0);
17652 match(Set dst (SubF src con));
17653
17654 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17655 ins_cost(150);
17656 ins_encode %{
17657 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17658 %}
17659 ins_pipe(pipe_slow);
17660 %}
17661
17662 instruct subD_reg(regD dst, regD src) %{
17663 predicate(UseAVX == 0);
17664 match(Set dst (SubD dst src));
17665
17666 format %{ "subsd $dst, $src" %}
17667 ins_cost(150);
17668 ins_encode %{
17669 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17670 %}
17671 ins_pipe(pipe_slow);
17672 %}
17673
17674 instruct subD_mem(regD dst, memory src) %{
17675 predicate(UseAVX == 0);
17676 match(Set dst (SubD dst (LoadD src)));
17677
17678 format %{ "subsd $dst, $src" %}
17679 ins_cost(150);
17680 ins_encode %{
17681 __ subsd($dst$$XMMRegister, $src$$Address);
17682 %}
17683 ins_pipe(pipe_slow);
17684 %}
17685
17686 instruct subD_imm(regD dst, immD con) %{
17687 predicate(UseAVX == 0);
17688 match(Set dst (SubD dst con));
17689 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17690 ins_cost(150);
17691 ins_encode %{
17692 __ subsd($dst$$XMMRegister, $constantaddress($con));
17693 %}
17694 ins_pipe(pipe_slow);
17695 %}
17696
17697 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17698 predicate(UseAVX > 0);
17699 match(Set dst (SubD src1 src2));
17700
17701 format %{ "vsubsd $dst, $src1, $src2" %}
17702 ins_cost(150);
17703 ins_encode %{
17704 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17705 %}
17706 ins_pipe(pipe_slow);
17707 %}
17708
17709 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17710 predicate(UseAVX > 0);
17711 match(Set dst (SubD src1 (LoadD src2)));
17712
17713 format %{ "vsubsd $dst, $src1, $src2" %}
17714 ins_cost(150);
17715 ins_encode %{
17716 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17717 %}
17718 ins_pipe(pipe_slow);
17719 %}
17720
17721 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17722 predicate(UseAVX > 0);
17723 match(Set dst (SubD src con));
17724
17725 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17726 ins_cost(150);
17727 ins_encode %{
17728 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17729 %}
17730 ins_pipe(pipe_slow);
17731 %}
17732
17733 instruct mulF_reg(regF dst, regF src) %{
17734 predicate(UseAVX == 0);
17735 match(Set dst (MulF dst src));
17736
17737 format %{ "mulss $dst, $src" %}
17738 ins_cost(150);
17739 ins_encode %{
17740 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17741 %}
17742 ins_pipe(pipe_slow);
17743 %}
17744
17745 instruct mulF_mem(regF dst, memory src) %{
17746 predicate(UseAVX == 0);
17747 match(Set dst (MulF dst (LoadF src)));
17748
17749 format %{ "mulss $dst, $src" %}
17750 ins_cost(150);
17751 ins_encode %{
17752 __ mulss($dst$$XMMRegister, $src$$Address);
17753 %}
17754 ins_pipe(pipe_slow);
17755 %}
17756
17757 instruct mulF_imm(regF dst, immF con) %{
17758 predicate(UseAVX == 0);
17759 match(Set dst (MulF dst con));
17760 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17761 ins_cost(150);
17762 ins_encode %{
17763 __ mulss($dst$$XMMRegister, $constantaddress($con));
17764 %}
17765 ins_pipe(pipe_slow);
17766 %}
17767
17768 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17769 predicate(UseAVX > 0);
17770 match(Set dst (MulF src1 src2));
17771
17772 format %{ "vmulss $dst, $src1, $src2" %}
17773 ins_cost(150);
17774 ins_encode %{
17775 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17776 %}
17777 ins_pipe(pipe_slow);
17778 %}
17779
17780 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17781 predicate(UseAVX > 0);
17782 match(Set dst (MulF src1 (LoadF src2)));
17783
17784 format %{ "vmulss $dst, $src1, $src2" %}
17785 ins_cost(150);
17786 ins_encode %{
17787 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17788 %}
17789 ins_pipe(pipe_slow);
17790 %}
17791
17792 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17793 predicate(UseAVX > 0);
17794 match(Set dst (MulF src con));
17795
17796 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17797 ins_cost(150);
17798 ins_encode %{
17799 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17800 %}
17801 ins_pipe(pipe_slow);
17802 %}
17803
17804 instruct mulD_reg(regD dst, regD src) %{
17805 predicate(UseAVX == 0);
17806 match(Set dst (MulD dst src));
17807
17808 format %{ "mulsd $dst, $src" %}
17809 ins_cost(150);
17810 ins_encode %{
17811 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17812 %}
17813 ins_pipe(pipe_slow);
17814 %}
17815
17816 instruct mulD_mem(regD dst, memory src) %{
17817 predicate(UseAVX == 0);
17818 match(Set dst (MulD dst (LoadD src)));
17819
17820 format %{ "mulsd $dst, $src" %}
17821 ins_cost(150);
17822 ins_encode %{
17823 __ mulsd($dst$$XMMRegister, $src$$Address);
17824 %}
17825 ins_pipe(pipe_slow);
17826 %}
17827
17828 instruct mulD_imm(regD dst, immD con) %{
17829 predicate(UseAVX == 0);
17830 match(Set dst (MulD dst con));
17831 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17832 ins_cost(150);
17833 ins_encode %{
17834 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17835 %}
17836 ins_pipe(pipe_slow);
17837 %}
17838
17839 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17840 predicate(UseAVX > 0);
17841 match(Set dst (MulD src1 src2));
17842
17843 format %{ "vmulsd $dst, $src1, $src2" %}
17844 ins_cost(150);
17845 ins_encode %{
17846 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17847 %}
17848 ins_pipe(pipe_slow);
17849 %}
17850
17851 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17852 predicate(UseAVX > 0);
17853 match(Set dst (MulD src1 (LoadD src2)));
17854
17855 format %{ "vmulsd $dst, $src1, $src2" %}
17856 ins_cost(150);
17857 ins_encode %{
17858 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17859 %}
17860 ins_pipe(pipe_slow);
17861 %}
17862
17863 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17864 predicate(UseAVX > 0);
17865 match(Set dst (MulD src con));
17866
17867 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17868 ins_cost(150);
17869 ins_encode %{
17870 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17871 %}
17872 ins_pipe(pipe_slow);
17873 %}
17874
17875 instruct divF_reg(regF dst, regF src) %{
17876 predicate(UseAVX == 0);
17877 match(Set dst (DivF dst src));
17878
17879 format %{ "divss $dst, $src" %}
17880 ins_cost(150);
17881 ins_encode %{
17882 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17883 %}
17884 ins_pipe(pipe_slow);
17885 %}
17886
17887 instruct divF_mem(regF dst, memory src) %{
17888 predicate(UseAVX == 0);
17889 match(Set dst (DivF dst (LoadF src)));
17890
17891 format %{ "divss $dst, $src" %}
17892 ins_cost(150);
17893 ins_encode %{
17894 __ divss($dst$$XMMRegister, $src$$Address);
17895 %}
17896 ins_pipe(pipe_slow);
17897 %}
17898
17899 instruct divF_imm(regF dst, immF con) %{
17900 predicate(UseAVX == 0);
17901 match(Set dst (DivF dst con));
17902 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17903 ins_cost(150);
17904 ins_encode %{
17905 __ divss($dst$$XMMRegister, $constantaddress($con));
17906 %}
17907 ins_pipe(pipe_slow);
17908 %}
17909
17910 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17911 predicate(UseAVX > 0);
17912 match(Set dst (DivF src1 src2));
17913
17914 format %{ "vdivss $dst, $src1, $src2" %}
17915 ins_cost(150);
17916 ins_encode %{
17917 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17918 %}
17919 ins_pipe(pipe_slow);
17920 %}
17921
17922 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17923 predicate(UseAVX > 0);
17924 match(Set dst (DivF src1 (LoadF src2)));
17925
17926 format %{ "vdivss $dst, $src1, $src2" %}
17927 ins_cost(150);
17928 ins_encode %{
17929 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17930 %}
17931 ins_pipe(pipe_slow);
17932 %}
17933
17934 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17935 predicate(UseAVX > 0);
17936 match(Set dst (DivF src con));
17937
17938 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17939 ins_cost(150);
17940 ins_encode %{
17941 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17942 %}
17943 ins_pipe(pipe_slow);
17944 %}
17945
17946 instruct divD_reg(regD dst, regD src) %{
17947 predicate(UseAVX == 0);
17948 match(Set dst (DivD dst src));
17949
17950 format %{ "divsd $dst, $src" %}
17951 ins_cost(150);
17952 ins_encode %{
17953 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17954 %}
17955 ins_pipe(pipe_slow);
17956 %}
17957
17958 instruct divD_mem(regD dst, memory src) %{
17959 predicate(UseAVX == 0);
17960 match(Set dst (DivD dst (LoadD src)));
17961
17962 format %{ "divsd $dst, $src" %}
17963 ins_cost(150);
17964 ins_encode %{
17965 __ divsd($dst$$XMMRegister, $src$$Address);
17966 %}
17967 ins_pipe(pipe_slow);
17968 %}
17969
17970 instruct divD_imm(regD dst, immD con) %{
17971 predicate(UseAVX == 0);
17972 match(Set dst (DivD dst con));
17973 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17974 ins_cost(150);
17975 ins_encode %{
17976 __ divsd($dst$$XMMRegister, $constantaddress($con));
17977 %}
17978 ins_pipe(pipe_slow);
17979 %}
17980
17981 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17982 predicate(UseAVX > 0);
17983 match(Set dst (DivD src1 src2));
17984
17985 format %{ "vdivsd $dst, $src1, $src2" %}
17986 ins_cost(150);
17987 ins_encode %{
17988 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17989 %}
17990 ins_pipe(pipe_slow);
17991 %}
17992
17993 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17994 predicate(UseAVX > 0);
17995 match(Set dst (DivD src1 (LoadD src2)));
17996
17997 format %{ "vdivsd $dst, $src1, $src2" %}
17998 ins_cost(150);
17999 ins_encode %{
18000 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18001 %}
18002 ins_pipe(pipe_slow);
18003 %}
18004
18005 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18006 predicate(UseAVX > 0);
18007 match(Set dst (DivD src con));
18008
18009 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18010 ins_cost(150);
18011 ins_encode %{
18012 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18013 %}
18014 ins_pipe(pipe_slow);
18015 %}
18016
18017 instruct absF_reg(regF dst) %{
18018 predicate(UseAVX == 0);
18019 match(Set dst (AbsF dst));
18020 ins_cost(150);
18021 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18022 ins_encode %{
18023 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18024 %}
18025 ins_pipe(pipe_slow);
18026 %}
18027
18028 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18029 predicate(UseAVX > 0);
18030 match(Set dst (AbsF src));
18031 ins_cost(150);
18032 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18033 ins_encode %{
18034 int vlen_enc = Assembler::AVX_128bit;
18035 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18036 ExternalAddress(float_signmask()), vlen_enc);
18037 %}
18038 ins_pipe(pipe_slow);
18039 %}
18040
18041 instruct absD_reg(regD dst) %{
18042 predicate(UseAVX == 0);
18043 match(Set dst (AbsD dst));
18044 ins_cost(150);
18045 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18046 "# abs double by sign masking" %}
18047 ins_encode %{
18048 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18049 %}
18050 ins_pipe(pipe_slow);
18051 %}
18052
18053 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18054 predicate(UseAVX > 0);
18055 match(Set dst (AbsD src));
18056 ins_cost(150);
18057 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18058 "# abs double by sign masking" %}
18059 ins_encode %{
18060 int vlen_enc = Assembler::AVX_128bit;
18061 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18062 ExternalAddress(double_signmask()), vlen_enc);
18063 %}
18064 ins_pipe(pipe_slow);
18065 %}
18066
18067 instruct negF_reg(regF dst) %{
18068 predicate(UseAVX == 0);
18069 match(Set dst (NegF dst));
18070 ins_cost(150);
18071 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18072 ins_encode %{
18073 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18074 %}
18075 ins_pipe(pipe_slow);
18076 %}
18077
18078 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18079 predicate(UseAVX > 0);
18080 match(Set dst (NegF src));
18081 ins_cost(150);
18082 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18083 ins_encode %{
18084 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18085 ExternalAddress(float_signflip()));
18086 %}
18087 ins_pipe(pipe_slow);
18088 %}
18089
18090 instruct negD_reg(regD dst) %{
18091 predicate(UseAVX == 0);
18092 match(Set dst (NegD dst));
18093 ins_cost(150);
18094 format %{ "xorpd $dst, [0x8000000000000000]\t"
18095 "# neg double by sign flipping" %}
18096 ins_encode %{
18097 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18098 %}
18099 ins_pipe(pipe_slow);
18100 %}
18101
18102 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18103 predicate(UseAVX > 0);
18104 match(Set dst (NegD src));
18105 ins_cost(150);
18106 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18107 "# neg double by sign flipping" %}
18108 ins_encode %{
18109 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18110 ExternalAddress(double_signflip()));
18111 %}
18112 ins_pipe(pipe_slow);
18113 %}
18114
18115 // sqrtss instruction needs destination register to be pre initialized for best performance
18116 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18117 instruct sqrtF_reg(regF dst) %{
18118 match(Set dst (SqrtF dst));
18119 format %{ "sqrtss $dst, $dst" %}
18120 ins_encode %{
18121 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18122 %}
18123 ins_pipe(pipe_slow);
18124 %}
18125
18126 // sqrtsd instruction needs destination register to be pre initialized for best performance
18127 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18128 instruct sqrtD_reg(regD dst) %{
18129 match(Set dst (SqrtD dst));
18130 format %{ "sqrtsd $dst, $dst" %}
18131 ins_encode %{
18132 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18133 %}
18134 ins_pipe(pipe_slow);
18135 %}
18136
18137 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18138 effect(TEMP tmp);
18139 match(Set dst (ConvF2HF src));
18140 ins_cost(125);
18141 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18142 ins_encode %{
18143 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18144 %}
18145 ins_pipe( pipe_slow );
18146 %}
18147
18148 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18149 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18150 effect(TEMP ktmp, TEMP rtmp);
18151 match(Set mem (StoreC mem (ConvF2HF src)));
18152 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18153 ins_encode %{
18154 __ movl($rtmp$$Register, 0x1);
18155 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18156 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18157 %}
18158 ins_pipe( pipe_slow );
18159 %}
18160
18161 instruct vconvF2HF(vec dst, vec src) %{
18162 match(Set dst (VectorCastF2HF src));
18163 format %{ "vector_conv_F2HF $dst $src" %}
18164 ins_encode %{
18165 int vlen_enc = vector_length_encoding(this, $src);
18166 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18167 %}
18168 ins_pipe( pipe_slow );
18169 %}
18170
18171 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18172 predicate(n->as_StoreVector()->memory_size() >= 16);
18173 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18174 format %{ "vcvtps2ph $mem,$src" %}
18175 ins_encode %{
18176 int vlen_enc = vector_length_encoding(this, $src);
18177 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18178 %}
18179 ins_pipe( pipe_slow );
18180 %}
18181
18182 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18183 match(Set dst (ConvHF2F src));
18184 format %{ "vcvtph2ps $dst,$src" %}
18185 ins_encode %{
18186 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18187 %}
18188 ins_pipe( pipe_slow );
18189 %}
18190
18191 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18192 match(Set dst (VectorCastHF2F (LoadVector mem)));
18193 format %{ "vcvtph2ps $dst,$mem" %}
18194 ins_encode %{
18195 int vlen_enc = vector_length_encoding(this);
18196 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18197 %}
18198 ins_pipe( pipe_slow );
18199 %}
18200
18201 instruct vconvHF2F(vec dst, vec src) %{
18202 match(Set dst (VectorCastHF2F src));
18203 ins_cost(125);
18204 format %{ "vector_conv_HF2F $dst,$src" %}
18205 ins_encode %{
18206 int vlen_enc = vector_length_encoding(this);
18207 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18208 %}
18209 ins_pipe( pipe_slow );
18210 %}
18211
18212 // ---------------------------------------- VectorReinterpret ------------------------------------
18213 instruct reinterpret_mask(kReg dst) %{
18214 predicate(n->bottom_type()->isa_vectmask() &&
18215 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18216 match(Set dst (VectorReinterpret dst));
18217 ins_cost(125);
18218 format %{ "vector_reinterpret $dst\t!" %}
18219 ins_encode %{
18220 // empty
18221 %}
18222 ins_pipe( pipe_slow );
18223 %}
18224
18225 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18226 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18227 n->bottom_type()->isa_vectmask() &&
18228 n->in(1)->bottom_type()->isa_vectmask() &&
18229 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18230 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18231 match(Set dst (VectorReinterpret src));
18232 effect(TEMP xtmp);
18233 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18234 ins_encode %{
18235 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18236 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18237 assert(src_sz == dst_sz , "src and dst size mismatch");
18238 int vlen_enc = vector_length_encoding(src_sz);
18239 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18240 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18241 %}
18242 ins_pipe( pipe_slow );
18243 %}
18244
18245 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18246 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18247 n->bottom_type()->isa_vectmask() &&
18248 n->in(1)->bottom_type()->isa_vectmask() &&
18249 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18250 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18251 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18252 match(Set dst (VectorReinterpret src));
18253 effect(TEMP xtmp);
18254 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18255 ins_encode %{
18256 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18257 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18258 assert(src_sz == dst_sz , "src and dst size mismatch");
18259 int vlen_enc = vector_length_encoding(src_sz);
18260 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18261 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18262 %}
18263 ins_pipe( pipe_slow );
18264 %}
18265
18266 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18267 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18268 n->bottom_type()->isa_vectmask() &&
18269 n->in(1)->bottom_type()->isa_vectmask() &&
18270 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18271 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18272 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18273 match(Set dst (VectorReinterpret src));
18274 effect(TEMP xtmp);
18275 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18276 ins_encode %{
18277 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18278 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18279 assert(src_sz == dst_sz , "src and dst size mismatch");
18280 int vlen_enc = vector_length_encoding(src_sz);
18281 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18282 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18283 %}
18284 ins_pipe( pipe_slow );
18285 %}
18286
18287 instruct reinterpret(vec dst) %{
18288 predicate(!n->bottom_type()->isa_vectmask() &&
18289 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18290 match(Set dst (VectorReinterpret dst));
18291 ins_cost(125);
18292 format %{ "vector_reinterpret $dst\t!" %}
18293 ins_encode %{
18294 // empty
18295 %}
18296 ins_pipe( pipe_slow );
18297 %}
18298
18299 instruct reinterpret_expand(vec dst, vec src) %{
18300 predicate(UseAVX == 0 &&
18301 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18302 match(Set dst (VectorReinterpret src));
18303 ins_cost(125);
18304 effect(TEMP dst);
18305 format %{ "vector_reinterpret_expand $dst,$src" %}
18306 ins_encode %{
18307 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18308 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18309
18310 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18311 if (src_vlen_in_bytes == 4) {
18312 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18313 } else {
18314 assert(src_vlen_in_bytes == 8, "");
18315 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18316 }
18317 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18318 %}
18319 ins_pipe( pipe_slow );
18320 %}
18321
18322 instruct vreinterpret_expand4(legVec dst, vec src) %{
18323 predicate(UseAVX > 0 &&
18324 !n->bottom_type()->isa_vectmask() &&
18325 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18326 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18327 match(Set dst (VectorReinterpret src));
18328 ins_cost(125);
18329 format %{ "vector_reinterpret_expand $dst,$src" %}
18330 ins_encode %{
18331 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18332 %}
18333 ins_pipe( pipe_slow );
18334 %}
18335
18336
18337 instruct vreinterpret_expand(legVec dst, vec src) %{
18338 predicate(UseAVX > 0 &&
18339 !n->bottom_type()->isa_vectmask() &&
18340 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18341 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18342 match(Set dst (VectorReinterpret src));
18343 ins_cost(125);
18344 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18345 ins_encode %{
18346 switch (Matcher::vector_length_in_bytes(this, $src)) {
18347 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18348 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18349 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18350 default: ShouldNotReachHere();
18351 }
18352 %}
18353 ins_pipe( pipe_slow );
18354 %}
18355
18356 instruct reinterpret_shrink(vec dst, legVec src) %{
18357 predicate(!n->bottom_type()->isa_vectmask() &&
18358 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18359 match(Set dst (VectorReinterpret src));
18360 ins_cost(125);
18361 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18362 ins_encode %{
18363 switch (Matcher::vector_length_in_bytes(this)) {
18364 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18365 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18366 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18367 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18368 default: ShouldNotReachHere();
18369 }
18370 %}
18371 ins_pipe( pipe_slow );
18372 %}
18373
18374 // ----------------------------------------------------------------------------------------------------
18375
18376 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18377 match(Set dst (RoundDoubleMode src rmode));
18378 format %{ "roundsd $dst,$src" %}
18379 ins_cost(150);
18380 ins_encode %{
18381 assert(UseSSE >= 4, "required");
18382 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18383 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18384 }
18385 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18386 %}
18387 ins_pipe(pipe_slow);
18388 %}
18389
18390 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18391 match(Set dst (RoundDoubleMode con rmode));
18392 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18393 ins_cost(150);
18394 ins_encode %{
18395 assert(UseSSE >= 4, "required");
18396 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18397 %}
18398 ins_pipe(pipe_slow);
18399 %}
18400
18401 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18402 predicate(Matcher::vector_length(n) < 8);
18403 match(Set dst (RoundDoubleModeV src rmode));
18404 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18405 ins_encode %{
18406 assert(UseAVX > 0, "required");
18407 int vlen_enc = vector_length_encoding(this);
18408 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18409 %}
18410 ins_pipe( pipe_slow );
18411 %}
18412
18413 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18414 predicate(Matcher::vector_length(n) == 8);
18415 match(Set dst (RoundDoubleModeV src rmode));
18416 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18417 ins_encode %{
18418 assert(UseAVX > 2, "required");
18419 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18420 %}
18421 ins_pipe( pipe_slow );
18422 %}
18423
18424 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18425 predicate(Matcher::vector_length(n) < 8);
18426 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18427 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18428 ins_encode %{
18429 assert(UseAVX > 0, "required");
18430 int vlen_enc = vector_length_encoding(this);
18431 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18432 %}
18433 ins_pipe( pipe_slow );
18434 %}
18435
18436 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18437 predicate(Matcher::vector_length(n) == 8);
18438 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18439 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18440 ins_encode %{
18441 assert(UseAVX > 2, "required");
18442 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18443 %}
18444 ins_pipe( pipe_slow );
18445 %}
18446
18447 instruct onspinwait() %{
18448 match(OnSpinWait);
18449 ins_cost(200);
18450
18451 format %{
18452 $$template
18453 $$emit$$"pause\t! membar_onspinwait"
18454 %}
18455 ins_encode %{
18456 __ pause();
18457 %}
18458 ins_pipe(pipe_slow);
18459 %}
18460
18461 // a * b + c
18462 instruct fmaD_reg(regD a, regD b, regD c) %{
18463 match(Set c (FmaD c (Binary a b)));
18464 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18465 ins_cost(150);
18466 ins_encode %{
18467 assert(UseFMA, "Needs FMA instructions support.");
18468 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18469 %}
18470 ins_pipe( pipe_slow );
18471 %}
18472
18473 // a * b + c
18474 instruct fmaF_reg(regF a, regF b, regF c) %{
18475 match(Set c (FmaF c (Binary a b)));
18476 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18477 ins_cost(150);
18478 ins_encode %{
18479 assert(UseFMA, "Needs FMA instructions support.");
18480 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18481 %}
18482 ins_pipe( pipe_slow );
18483 %}
18484
18485 // ====================VECTOR INSTRUCTIONS=====================================
18486
18487 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18488 instruct MoveVec2Leg(legVec dst, vec src) %{
18489 match(Set dst src);
18490 format %{ "" %}
18491 ins_encode %{
18492 ShouldNotReachHere();
18493 %}
18494 ins_pipe( fpu_reg_reg );
18495 %}
18496
18497 instruct MoveLeg2Vec(vec dst, legVec src) %{
18498 match(Set dst src);
18499 format %{ "" %}
18500 ins_encode %{
18501 ShouldNotReachHere();
18502 %}
18503 ins_pipe( fpu_reg_reg );
18504 %}
18505
18506 // ============================================================================
18507
18508 // Load vectors generic operand pattern
18509 instruct loadV(vec dst, memory mem) %{
18510 match(Set dst (LoadVector mem));
18511 ins_cost(125);
18512 format %{ "load_vector $dst,$mem" %}
18513 ins_encode %{
18514 BasicType bt = Matcher::vector_element_basic_type(this);
18515 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18516 %}
18517 ins_pipe( pipe_slow );
18518 %}
18519
18520 // Store vectors generic operand pattern.
18521 instruct storeV(memory mem, vec src) %{
18522 match(Set mem (StoreVector mem src));
18523 ins_cost(145);
18524 format %{ "store_vector $mem,$src\n\t" %}
18525 ins_encode %{
18526 switch (Matcher::vector_length_in_bytes(this, $src)) {
18527 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18528 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18529 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18530 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18531 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18532 default: ShouldNotReachHere();
18533 }
18534 %}
18535 ins_pipe( pipe_slow );
18536 %}
18537
18538 // ---------------------------------------- Gather ------------------------------------
18539
18540 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18541
18542 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18543 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18544 Matcher::vector_length_in_bytes(n) <= 32);
18545 match(Set dst (LoadVectorGather mem idx));
18546 effect(TEMP dst, TEMP tmp, TEMP mask);
18547 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18548 ins_encode %{
18549 int vlen_enc = vector_length_encoding(this);
18550 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18551 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18552 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18553 __ lea($tmp$$Register, $mem$$Address);
18554 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18555 %}
18556 ins_pipe( pipe_slow );
18557 %}
18558
18559
18560 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18561 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18562 !is_subword_type(Matcher::vector_element_basic_type(n)));
18563 match(Set dst (LoadVectorGather mem idx));
18564 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18565 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18566 ins_encode %{
18567 int vlen_enc = vector_length_encoding(this);
18568 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18569 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18570 __ lea($tmp$$Register, $mem$$Address);
18571 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18572 %}
18573 ins_pipe( pipe_slow );
18574 %}
18575
18576 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18577 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18578 !is_subword_type(Matcher::vector_element_basic_type(n)));
18579 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18580 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18581 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18582 ins_encode %{
18583 assert(UseAVX > 2, "sanity");
18584 int vlen_enc = vector_length_encoding(this);
18585 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18586 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18587 // Note: Since gather instruction partially updates the opmask register used
18588 // for predication hense moving mask operand to a temporary.
18589 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18590 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18591 __ lea($tmp$$Register, $mem$$Address);
18592 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18593 %}
18594 ins_pipe( pipe_slow );
18595 %}
18596
18597 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18598 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18599 match(Set dst (LoadVectorGather mem idx_base));
18600 effect(TEMP tmp, TEMP rtmp);
18601 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18602 ins_encode %{
18603 int vlen_enc = vector_length_encoding(this);
18604 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18605 __ lea($tmp$$Register, $mem$$Address);
18606 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18607 %}
18608 ins_pipe( pipe_slow );
18609 %}
18610
18611 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18612 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18613 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18614 match(Set dst (LoadVectorGather mem idx_base));
18615 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18616 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18617 ins_encode %{
18618 int vlen_enc = vector_length_encoding(this);
18619 int vector_len = Matcher::vector_length(this);
18620 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18621 __ lea($tmp$$Register, $mem$$Address);
18622 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18623 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18624 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18625 %}
18626 ins_pipe( pipe_slow );
18627 %}
18628
18629 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18630 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18631 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18632 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18633 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18634 ins_encode %{
18635 int vlen_enc = vector_length_encoding(this);
18636 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18637 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18638 __ lea($tmp$$Register, $mem$$Address);
18639 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18640 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18641 %}
18642 ins_pipe( pipe_slow );
18643 %}
18644
18645 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18646 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18647 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18648 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18649 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18650 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18651 ins_encode %{
18652 int vlen_enc = vector_length_encoding(this);
18653 int vector_len = Matcher::vector_length(this);
18654 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18655 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18656 __ lea($tmp$$Register, $mem$$Address);
18657 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18658 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18659 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18660 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18661 %}
18662 ins_pipe( pipe_slow );
18663 %}
18664
18665 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18666 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18667 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18668 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18669 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18670 ins_encode %{
18671 int vlen_enc = vector_length_encoding(this);
18672 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18673 __ lea($tmp$$Register, $mem$$Address);
18674 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18675 if (elem_bt == T_SHORT) {
18676 __ movl($mask_idx$$Register, 0x55555555);
18677 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18678 }
18679 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18680 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18681 %}
18682 ins_pipe( pipe_slow );
18683 %}
18684
18685 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18686 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18687 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18688 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18689 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18690 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18691 ins_encode %{
18692 int vlen_enc = vector_length_encoding(this);
18693 int vector_len = Matcher::vector_length(this);
18694 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18695 __ lea($tmp$$Register, $mem$$Address);
18696 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18697 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18698 if (elem_bt == T_SHORT) {
18699 __ movl($mask_idx$$Register, 0x55555555);
18700 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18701 }
18702 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18703 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18704 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18705 %}
18706 ins_pipe( pipe_slow );
18707 %}
18708
18709 // ====================Scatter=======================================
18710
18711 // Scatter INT, LONG, FLOAT, DOUBLE
18712
18713 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18714 predicate(UseAVX > 2);
18715 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18716 effect(TEMP tmp, TEMP ktmp);
18717 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18718 ins_encode %{
18719 int vlen_enc = vector_length_encoding(this, $src);
18720 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18721
18722 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18723 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18724
18725 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18726 __ lea($tmp$$Register, $mem$$Address);
18727 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18728 %}
18729 ins_pipe( pipe_slow );
18730 %}
18731
18732 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18733 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18734 effect(TEMP tmp, TEMP ktmp);
18735 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18736 ins_encode %{
18737 int vlen_enc = vector_length_encoding(this, $src);
18738 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18739 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18740 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18741 // Note: Since scatter instruction partially updates the opmask register used
18742 // for predication hense moving mask operand to a temporary.
18743 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18744 __ lea($tmp$$Register, $mem$$Address);
18745 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18746 %}
18747 ins_pipe( pipe_slow );
18748 %}
18749
18750 // ====================REPLICATE=======================================
18751
18752 // Replicate byte scalar to be vector
18753 instruct vReplB_reg(vec dst, rRegI src) %{
18754 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18755 match(Set dst (Replicate src));
18756 format %{ "replicateB $dst,$src" %}
18757 ins_encode %{
18758 uint vlen = Matcher::vector_length(this);
18759 if (UseAVX >= 2) {
18760 int vlen_enc = vector_length_encoding(this);
18761 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18762 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18763 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18764 } else {
18765 __ movdl($dst$$XMMRegister, $src$$Register);
18766 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18767 }
18768 } else {
18769 assert(UseAVX < 2, "");
18770 __ movdl($dst$$XMMRegister, $src$$Register);
18771 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18772 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18773 if (vlen >= 16) {
18774 assert(vlen == 16, "");
18775 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18776 }
18777 }
18778 %}
18779 ins_pipe( pipe_slow );
18780 %}
18781
18782 instruct ReplB_mem(vec dst, memory mem) %{
18783 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18784 match(Set dst (Replicate (LoadB mem)));
18785 format %{ "replicateB $dst,$mem" %}
18786 ins_encode %{
18787 int vlen_enc = vector_length_encoding(this);
18788 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18789 %}
18790 ins_pipe( pipe_slow );
18791 %}
18792
18793 // ====================ReplicateS=======================================
18794
18795 instruct vReplS_reg(vec dst, rRegI src) %{
18796 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18797 match(Set dst (Replicate src));
18798 format %{ "replicateS $dst,$src" %}
18799 ins_encode %{
18800 uint vlen = Matcher::vector_length(this);
18801 int vlen_enc = vector_length_encoding(this);
18802 if (UseAVX >= 2) {
18803 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18804 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18805 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18806 } else {
18807 __ movdl($dst$$XMMRegister, $src$$Register);
18808 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18809 }
18810 } else {
18811 assert(UseAVX < 2, "");
18812 __ movdl($dst$$XMMRegister, $src$$Register);
18813 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18814 if (vlen >= 8) {
18815 assert(vlen == 8, "");
18816 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18817 }
18818 }
18819 %}
18820 ins_pipe( pipe_slow );
18821 %}
18822
18823 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18824 match(Set dst (Replicate con));
18825 effect(TEMP rtmp);
18826 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18827 ins_encode %{
18828 int vlen_enc = vector_length_encoding(this);
18829 BasicType bt = Matcher::vector_element_basic_type(this);
18830 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18831 __ movl($rtmp$$Register, $con$$constant);
18832 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18833 %}
18834 ins_pipe( pipe_slow );
18835 %}
18836
18837 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18838 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18839 match(Set dst (Replicate src));
18840 effect(TEMP rtmp);
18841 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18842 ins_encode %{
18843 int vlen_enc = vector_length_encoding(this);
18844 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18845 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18846 %}
18847 ins_pipe( pipe_slow );
18848 %}
18849
18850 instruct ReplS_mem(vec dst, memory mem) %{
18851 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18852 match(Set dst (Replicate (LoadS mem)));
18853 format %{ "replicateS $dst,$mem" %}
18854 ins_encode %{
18855 int vlen_enc = vector_length_encoding(this);
18856 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18857 %}
18858 ins_pipe( pipe_slow );
18859 %}
18860
18861 // ====================ReplicateI=======================================
18862
18863 instruct ReplI_reg(vec dst, rRegI src) %{
18864 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18865 match(Set dst (Replicate src));
18866 format %{ "replicateI $dst,$src" %}
18867 ins_encode %{
18868 uint vlen = Matcher::vector_length(this);
18869 int vlen_enc = vector_length_encoding(this);
18870 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18871 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18872 } else if (VM_Version::supports_avx2()) {
18873 __ movdl($dst$$XMMRegister, $src$$Register);
18874 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18875 } else {
18876 __ movdl($dst$$XMMRegister, $src$$Register);
18877 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18878 }
18879 %}
18880 ins_pipe( pipe_slow );
18881 %}
18882
18883 instruct ReplI_mem(vec dst, memory mem) %{
18884 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18885 match(Set dst (Replicate (LoadI mem)));
18886 format %{ "replicateI $dst,$mem" %}
18887 ins_encode %{
18888 int vlen_enc = vector_length_encoding(this);
18889 if (VM_Version::supports_avx2()) {
18890 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18891 } else if (VM_Version::supports_avx()) {
18892 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18893 } else {
18894 __ movdl($dst$$XMMRegister, $mem$$Address);
18895 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18896 }
18897 %}
18898 ins_pipe( pipe_slow );
18899 %}
18900
18901 instruct ReplI_imm(vec dst, immI con) %{
18902 predicate(Matcher::is_non_long_integral_vector(n));
18903 match(Set dst (Replicate con));
18904 format %{ "replicateI $dst,$con" %}
18905 ins_encode %{
18906 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18907 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18908 type2aelembytes(Matcher::vector_element_basic_type(this))));
18909 BasicType bt = Matcher::vector_element_basic_type(this);
18910 int vlen = Matcher::vector_length_in_bytes(this);
18911 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18912 %}
18913 ins_pipe( pipe_slow );
18914 %}
18915
18916 // Replicate scalar zero to be vector
18917 instruct ReplI_zero(vec dst, immI_0 zero) %{
18918 predicate(Matcher::is_non_long_integral_vector(n));
18919 match(Set dst (Replicate zero));
18920 format %{ "replicateI $dst,$zero" %}
18921 ins_encode %{
18922 int vlen_enc = vector_length_encoding(this);
18923 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18924 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18925 } else {
18926 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18927 }
18928 %}
18929 ins_pipe( fpu_reg_reg );
18930 %}
18931
18932 instruct ReplI_M1(vec dst, immI_M1 con) %{
18933 predicate(Matcher::is_non_long_integral_vector(n));
18934 match(Set dst (Replicate con));
18935 format %{ "vallones $dst" %}
18936 ins_encode %{
18937 int vector_len = vector_length_encoding(this);
18938 __ vallones($dst$$XMMRegister, vector_len);
18939 %}
18940 ins_pipe( pipe_slow );
18941 %}
18942
18943 // ====================ReplicateL=======================================
18944
18945 // Replicate long (8 byte) scalar to be vector
18946 instruct ReplL_reg(vec dst, rRegL src) %{
18947 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18948 match(Set dst (Replicate src));
18949 format %{ "replicateL $dst,$src" %}
18950 ins_encode %{
18951 int vlen = Matcher::vector_length(this);
18952 int vlen_enc = vector_length_encoding(this);
18953 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18954 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18955 } else if (VM_Version::supports_avx2()) {
18956 __ movdq($dst$$XMMRegister, $src$$Register);
18957 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18958 } else {
18959 __ movdq($dst$$XMMRegister, $src$$Register);
18960 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18961 }
18962 %}
18963 ins_pipe( pipe_slow );
18964 %}
18965
18966 instruct ReplL_mem(vec dst, memory mem) %{
18967 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18968 match(Set dst (Replicate (LoadL mem)));
18969 format %{ "replicateL $dst,$mem" %}
18970 ins_encode %{
18971 int vlen_enc = vector_length_encoding(this);
18972 if (VM_Version::supports_avx2()) {
18973 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18974 } else if (VM_Version::supports_sse3()) {
18975 __ movddup($dst$$XMMRegister, $mem$$Address);
18976 } else {
18977 __ movq($dst$$XMMRegister, $mem$$Address);
18978 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18979 }
18980 %}
18981 ins_pipe( pipe_slow );
18982 %}
18983
18984 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18985 instruct ReplL_imm(vec dst, immL con) %{
18986 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18987 match(Set dst (Replicate con));
18988 format %{ "replicateL $dst,$con" %}
18989 ins_encode %{
18990 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18991 int vlen = Matcher::vector_length_in_bytes(this);
18992 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18993 %}
18994 ins_pipe( pipe_slow );
18995 %}
18996
18997 instruct ReplL_zero(vec dst, immL0 zero) %{
18998 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18999 match(Set dst (Replicate zero));
19000 format %{ "replicateL $dst,$zero" %}
19001 ins_encode %{
19002 int vlen_enc = vector_length_encoding(this);
19003 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19004 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19005 } else {
19006 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19007 }
19008 %}
19009 ins_pipe( fpu_reg_reg );
19010 %}
19011
19012 instruct ReplL_M1(vec dst, immL_M1 con) %{
19013 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19014 match(Set dst (Replicate con));
19015 format %{ "vallones $dst" %}
19016 ins_encode %{
19017 int vector_len = vector_length_encoding(this);
19018 __ vallones($dst$$XMMRegister, vector_len);
19019 %}
19020 ins_pipe( pipe_slow );
19021 %}
19022
19023 // ====================ReplicateF=======================================
19024
19025 instruct vReplF_reg(vec dst, vlRegF src) %{
19026 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19027 match(Set dst (Replicate src));
19028 format %{ "replicateF $dst,$src" %}
19029 ins_encode %{
19030 uint vlen = Matcher::vector_length(this);
19031 int vlen_enc = vector_length_encoding(this);
19032 if (vlen <= 4) {
19033 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19034 } else if (VM_Version::supports_avx2()) {
19035 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19036 } else {
19037 assert(vlen == 8, "sanity");
19038 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19039 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19040 }
19041 %}
19042 ins_pipe( pipe_slow );
19043 %}
19044
19045 instruct ReplF_reg(vec dst, vlRegF src) %{
19046 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19047 match(Set dst (Replicate src));
19048 format %{ "replicateF $dst,$src" %}
19049 ins_encode %{
19050 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19051 %}
19052 ins_pipe( pipe_slow );
19053 %}
19054
19055 instruct ReplF_mem(vec dst, memory mem) %{
19056 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19057 match(Set dst (Replicate (LoadF mem)));
19058 format %{ "replicateF $dst,$mem" %}
19059 ins_encode %{
19060 int vlen_enc = vector_length_encoding(this);
19061 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19062 %}
19063 ins_pipe( pipe_slow );
19064 %}
19065
19066 // Replicate float scalar immediate to be vector by loading from const table.
19067 instruct ReplF_imm(vec dst, immF con) %{
19068 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19069 match(Set dst (Replicate con));
19070 format %{ "replicateF $dst,$con" %}
19071 ins_encode %{
19072 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19073 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19074 int vlen = Matcher::vector_length_in_bytes(this);
19075 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19076 %}
19077 ins_pipe( pipe_slow );
19078 %}
19079
19080 instruct ReplF_zero(vec dst, immF0 zero) %{
19081 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19082 match(Set dst (Replicate zero));
19083 format %{ "replicateF $dst,$zero" %}
19084 ins_encode %{
19085 int vlen_enc = vector_length_encoding(this);
19086 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19087 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19088 } else {
19089 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19090 }
19091 %}
19092 ins_pipe( fpu_reg_reg );
19093 %}
19094
19095 // ====================ReplicateD=======================================
19096
19097 // Replicate double (8 bytes) scalar to be vector
19098 instruct vReplD_reg(vec dst, vlRegD src) %{
19099 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19100 match(Set dst (Replicate src));
19101 format %{ "replicateD $dst,$src" %}
19102 ins_encode %{
19103 uint vlen = Matcher::vector_length(this);
19104 int vlen_enc = vector_length_encoding(this);
19105 if (vlen <= 2) {
19106 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19107 } else if (VM_Version::supports_avx2()) {
19108 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19109 } else {
19110 assert(vlen == 4, "sanity");
19111 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19112 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19113 }
19114 %}
19115 ins_pipe( pipe_slow );
19116 %}
19117
19118 instruct ReplD_reg(vec dst, vlRegD src) %{
19119 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19120 match(Set dst (Replicate src));
19121 format %{ "replicateD $dst,$src" %}
19122 ins_encode %{
19123 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19124 %}
19125 ins_pipe( pipe_slow );
19126 %}
19127
19128 instruct ReplD_mem(vec dst, memory mem) %{
19129 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19130 match(Set dst (Replicate (LoadD mem)));
19131 format %{ "replicateD $dst,$mem" %}
19132 ins_encode %{
19133 if (Matcher::vector_length(this) >= 4) {
19134 int vlen_enc = vector_length_encoding(this);
19135 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19136 } else {
19137 __ movddup($dst$$XMMRegister, $mem$$Address);
19138 }
19139 %}
19140 ins_pipe( pipe_slow );
19141 %}
19142
19143 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19144 instruct ReplD_imm(vec dst, immD con) %{
19145 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19146 match(Set dst (Replicate con));
19147 format %{ "replicateD $dst,$con" %}
19148 ins_encode %{
19149 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19150 int vlen = Matcher::vector_length_in_bytes(this);
19151 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19152 %}
19153 ins_pipe( pipe_slow );
19154 %}
19155
19156 instruct ReplD_zero(vec dst, immD0 zero) %{
19157 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19158 match(Set dst (Replicate zero));
19159 format %{ "replicateD $dst,$zero" %}
19160 ins_encode %{
19161 int vlen_enc = vector_length_encoding(this);
19162 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19163 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19164 } else {
19165 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19166 }
19167 %}
19168 ins_pipe( fpu_reg_reg );
19169 %}
19170
19171 // ====================VECTOR INSERT=======================================
19172
19173 instruct insert(vec dst, rRegI val, immU8 idx) %{
19174 predicate(Matcher::vector_length_in_bytes(n) < 32);
19175 match(Set dst (VectorInsert (Binary dst val) idx));
19176 format %{ "vector_insert $dst,$val,$idx" %}
19177 ins_encode %{
19178 assert(UseSSE >= 4, "required");
19179 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19180
19181 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19182
19183 assert(is_integral_type(elem_bt), "");
19184 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19185
19186 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19187 %}
19188 ins_pipe( pipe_slow );
19189 %}
19190
19191 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19192 predicate(Matcher::vector_length_in_bytes(n) == 32);
19193 match(Set dst (VectorInsert (Binary src val) idx));
19194 effect(TEMP vtmp);
19195 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19196 ins_encode %{
19197 int vlen_enc = Assembler::AVX_256bit;
19198 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19199 int elem_per_lane = 16/type2aelembytes(elem_bt);
19200 int log2epr = log2(elem_per_lane);
19201
19202 assert(is_integral_type(elem_bt), "sanity");
19203 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19204
19205 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19206 uint y_idx = ($idx$$constant >> log2epr) & 1;
19207 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19208 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19209 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19210 %}
19211 ins_pipe( pipe_slow );
19212 %}
19213
19214 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19215 predicate(Matcher::vector_length_in_bytes(n) == 64);
19216 match(Set dst (VectorInsert (Binary src val) idx));
19217 effect(TEMP vtmp);
19218 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19219 ins_encode %{
19220 assert(UseAVX > 2, "sanity");
19221
19222 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19223 int elem_per_lane = 16/type2aelembytes(elem_bt);
19224 int log2epr = log2(elem_per_lane);
19225
19226 assert(is_integral_type(elem_bt), "");
19227 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19228
19229 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19230 uint y_idx = ($idx$$constant >> log2epr) & 3;
19231 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19232 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19233 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19234 %}
19235 ins_pipe( pipe_slow );
19236 %}
19237
19238 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19239 predicate(Matcher::vector_length(n) == 2);
19240 match(Set dst (VectorInsert (Binary dst val) idx));
19241 format %{ "vector_insert $dst,$val,$idx" %}
19242 ins_encode %{
19243 assert(UseSSE >= 4, "required");
19244 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19245 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19246
19247 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19248 %}
19249 ins_pipe( pipe_slow );
19250 %}
19251
19252 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19253 predicate(Matcher::vector_length(n) == 4);
19254 match(Set dst (VectorInsert (Binary src val) idx));
19255 effect(TEMP vtmp);
19256 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19257 ins_encode %{
19258 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19259 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19260
19261 uint x_idx = $idx$$constant & right_n_bits(1);
19262 uint y_idx = ($idx$$constant >> 1) & 1;
19263 int vlen_enc = Assembler::AVX_256bit;
19264 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19265 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19266 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19267 %}
19268 ins_pipe( pipe_slow );
19269 %}
19270
19271 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19272 predicate(Matcher::vector_length(n) == 8);
19273 match(Set dst (VectorInsert (Binary src val) idx));
19274 effect(TEMP vtmp);
19275 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19276 ins_encode %{
19277 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19278 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19279
19280 uint x_idx = $idx$$constant & right_n_bits(1);
19281 uint y_idx = ($idx$$constant >> 1) & 3;
19282 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19283 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19284 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19285 %}
19286 ins_pipe( pipe_slow );
19287 %}
19288
19289 instruct insertF(vec dst, regF val, immU8 idx) %{
19290 predicate(Matcher::vector_length(n) < 8);
19291 match(Set dst (VectorInsert (Binary dst val) idx));
19292 format %{ "vector_insert $dst,$val,$idx" %}
19293 ins_encode %{
19294 assert(UseSSE >= 4, "sanity");
19295
19296 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19297 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19298
19299 uint x_idx = $idx$$constant & right_n_bits(2);
19300 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19301 %}
19302 ins_pipe( pipe_slow );
19303 %}
19304
19305 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19306 predicate(Matcher::vector_length(n) >= 8);
19307 match(Set dst (VectorInsert (Binary src val) idx));
19308 effect(TEMP vtmp);
19309 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19310 ins_encode %{
19311 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19312 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19313
19314 int vlen = Matcher::vector_length(this);
19315 uint x_idx = $idx$$constant & right_n_bits(2);
19316 if (vlen == 8) {
19317 uint y_idx = ($idx$$constant >> 2) & 1;
19318 int vlen_enc = Assembler::AVX_256bit;
19319 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19320 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19321 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19322 } else {
19323 assert(vlen == 16, "sanity");
19324 uint y_idx = ($idx$$constant >> 2) & 3;
19325 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19326 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19327 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19328 }
19329 %}
19330 ins_pipe( pipe_slow );
19331 %}
19332
19333 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19334 predicate(Matcher::vector_length(n) == 2);
19335 match(Set dst (VectorInsert (Binary dst val) idx));
19336 effect(TEMP tmp);
19337 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19338 ins_encode %{
19339 assert(UseSSE >= 4, "sanity");
19340 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19341 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19342
19343 __ movq($tmp$$Register, $val$$XMMRegister);
19344 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19345 %}
19346 ins_pipe( pipe_slow );
19347 %}
19348
19349 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19350 predicate(Matcher::vector_length(n) == 4);
19351 match(Set dst (VectorInsert (Binary src val) idx));
19352 effect(TEMP vtmp, TEMP tmp);
19353 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19354 ins_encode %{
19355 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19356 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19357
19358 uint x_idx = $idx$$constant & right_n_bits(1);
19359 uint y_idx = ($idx$$constant >> 1) & 1;
19360 int vlen_enc = Assembler::AVX_256bit;
19361 __ movq($tmp$$Register, $val$$XMMRegister);
19362 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19363 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19364 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19365 %}
19366 ins_pipe( pipe_slow );
19367 %}
19368
19369 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19370 predicate(Matcher::vector_length(n) == 8);
19371 match(Set dst (VectorInsert (Binary src val) idx));
19372 effect(TEMP tmp, TEMP vtmp);
19373 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19374 ins_encode %{
19375 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19376 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19377
19378 uint x_idx = $idx$$constant & right_n_bits(1);
19379 uint y_idx = ($idx$$constant >> 1) & 3;
19380 __ movq($tmp$$Register, $val$$XMMRegister);
19381 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19382 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19383 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19384 %}
19385 ins_pipe( pipe_slow );
19386 %}
19387
19388 // ====================REDUCTION ARITHMETIC=======================================
19389
19390 // =======================Int Reduction==========================================
19391
19392 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19393 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19394 match(Set dst (AddReductionVI src1 src2));
19395 match(Set dst (MulReductionVI src1 src2));
19396 match(Set dst (AndReductionV src1 src2));
19397 match(Set dst ( OrReductionV src1 src2));
19398 match(Set dst (XorReductionV src1 src2));
19399 match(Set dst (MinReductionV src1 src2));
19400 match(Set dst (MaxReductionV src1 src2));
19401 match(Set dst (UMinReductionV src1 src2));
19402 match(Set dst (UMaxReductionV src1 src2));
19403 effect(TEMP vtmp1, TEMP vtmp2);
19404 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19405 ins_encode %{
19406 int opcode = this->ideal_Opcode();
19407 int vlen = Matcher::vector_length(this, $src2);
19408 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19409 %}
19410 ins_pipe( pipe_slow );
19411 %}
19412
19413 // =======================Long Reduction==========================================
19414
19415 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19416 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19417 match(Set dst (AddReductionVL src1 src2));
19418 match(Set dst (MulReductionVL src1 src2));
19419 match(Set dst (AndReductionV src1 src2));
19420 match(Set dst ( OrReductionV src1 src2));
19421 match(Set dst (XorReductionV src1 src2));
19422 match(Set dst (MinReductionV src1 src2));
19423 match(Set dst (MaxReductionV src1 src2));
19424 match(Set dst (UMinReductionV src1 src2));
19425 match(Set dst (UMaxReductionV src1 src2));
19426 effect(TEMP vtmp1, TEMP vtmp2);
19427 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19428 ins_encode %{
19429 int opcode = this->ideal_Opcode();
19430 int vlen = Matcher::vector_length(this, $src2);
19431 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19432 %}
19433 ins_pipe( pipe_slow );
19434 %}
19435
19436 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19437 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19438 match(Set dst (AddReductionVL src1 src2));
19439 match(Set dst (MulReductionVL src1 src2));
19440 match(Set dst (AndReductionV src1 src2));
19441 match(Set dst ( OrReductionV src1 src2));
19442 match(Set dst (XorReductionV src1 src2));
19443 match(Set dst (MinReductionV src1 src2));
19444 match(Set dst (MaxReductionV src1 src2));
19445 match(Set dst (UMinReductionV src1 src2));
19446 match(Set dst (UMaxReductionV src1 src2));
19447 effect(TEMP vtmp1, TEMP vtmp2);
19448 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449 ins_encode %{
19450 int opcode = this->ideal_Opcode();
19451 int vlen = Matcher::vector_length(this, $src2);
19452 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453 %}
19454 ins_pipe( pipe_slow );
19455 %}
19456
19457 // =======================Float Reduction==========================================
19458
19459 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19460 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19461 match(Set dst (AddReductionVF dst src));
19462 match(Set dst (MulReductionVF dst src));
19463 effect(TEMP dst, TEMP vtmp);
19464 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19465 ins_encode %{
19466 int opcode = this->ideal_Opcode();
19467 int vlen = Matcher::vector_length(this, $src);
19468 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19469 %}
19470 ins_pipe( pipe_slow );
19471 %}
19472
19473 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19474 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19475 match(Set dst (AddReductionVF dst src));
19476 match(Set dst (MulReductionVF dst src));
19477 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19478 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19479 ins_encode %{
19480 int opcode = this->ideal_Opcode();
19481 int vlen = Matcher::vector_length(this, $src);
19482 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19483 %}
19484 ins_pipe( pipe_slow );
19485 %}
19486
19487 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19488 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19489 match(Set dst (AddReductionVF dst src));
19490 match(Set dst (MulReductionVF dst src));
19491 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19492 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19493 ins_encode %{
19494 int opcode = this->ideal_Opcode();
19495 int vlen = Matcher::vector_length(this, $src);
19496 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19497 %}
19498 ins_pipe( pipe_slow );
19499 %}
19500
19501
19502 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19503 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19504 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19505 // src1 contains reduction identity
19506 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19507 match(Set dst (AddReductionVF src1 src2));
19508 match(Set dst (MulReductionVF src1 src2));
19509 effect(TEMP dst);
19510 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19511 ins_encode %{
19512 int opcode = this->ideal_Opcode();
19513 int vlen = Matcher::vector_length(this, $src2);
19514 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19515 %}
19516 ins_pipe( pipe_slow );
19517 %}
19518
19519 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19520 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19521 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19522 // src1 contains reduction identity
19523 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19524 match(Set dst (AddReductionVF src1 src2));
19525 match(Set dst (MulReductionVF src1 src2));
19526 effect(TEMP dst, TEMP vtmp);
19527 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19528 ins_encode %{
19529 int opcode = this->ideal_Opcode();
19530 int vlen = Matcher::vector_length(this, $src2);
19531 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19532 %}
19533 ins_pipe( pipe_slow );
19534 %}
19535
19536 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19537 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19538 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19539 // src1 contains reduction identity
19540 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19541 match(Set dst (AddReductionVF src1 src2));
19542 match(Set dst (MulReductionVF src1 src2));
19543 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19544 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19545 ins_encode %{
19546 int opcode = this->ideal_Opcode();
19547 int vlen = Matcher::vector_length(this, $src2);
19548 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19549 %}
19550 ins_pipe( pipe_slow );
19551 %}
19552
19553 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19554 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19555 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19556 // src1 contains reduction identity
19557 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19558 match(Set dst (AddReductionVF src1 src2));
19559 match(Set dst (MulReductionVF src1 src2));
19560 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19561 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19562 ins_encode %{
19563 int opcode = this->ideal_Opcode();
19564 int vlen = Matcher::vector_length(this, $src2);
19565 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19566 %}
19567 ins_pipe( pipe_slow );
19568 %}
19569
19570 // =======================Double Reduction==========================================
19571
19572 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19573 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19574 match(Set dst (AddReductionVD dst src));
19575 match(Set dst (MulReductionVD dst src));
19576 effect(TEMP dst, TEMP vtmp);
19577 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19578 ins_encode %{
19579 int opcode = this->ideal_Opcode();
19580 int vlen = Matcher::vector_length(this, $src);
19581 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19582 %}
19583 ins_pipe( pipe_slow );
19584 %}
19585
19586 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19587 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19588 match(Set dst (AddReductionVD dst src));
19589 match(Set dst (MulReductionVD dst src));
19590 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19591 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19592 ins_encode %{
19593 int opcode = this->ideal_Opcode();
19594 int vlen = Matcher::vector_length(this, $src);
19595 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19596 %}
19597 ins_pipe( pipe_slow );
19598 %}
19599
19600 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19601 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19602 match(Set dst (AddReductionVD dst src));
19603 match(Set dst (MulReductionVD dst src));
19604 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19605 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19606 ins_encode %{
19607 int opcode = this->ideal_Opcode();
19608 int vlen = Matcher::vector_length(this, $src);
19609 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19610 %}
19611 ins_pipe( pipe_slow );
19612 %}
19613
19614 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19615 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19616 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19617 // src1 contains reduction identity
19618 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19619 match(Set dst (AddReductionVD src1 src2));
19620 match(Set dst (MulReductionVD src1 src2));
19621 effect(TEMP dst);
19622 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19623 ins_encode %{
19624 int opcode = this->ideal_Opcode();
19625 int vlen = Matcher::vector_length(this, $src2);
19626 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19627 %}
19628 ins_pipe( pipe_slow );
19629 %}
19630
19631 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19632 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19633 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19634 // src1 contains reduction identity
19635 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19636 match(Set dst (AddReductionVD src1 src2));
19637 match(Set dst (MulReductionVD src1 src2));
19638 effect(TEMP dst, TEMP vtmp);
19639 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19640 ins_encode %{
19641 int opcode = this->ideal_Opcode();
19642 int vlen = Matcher::vector_length(this, $src2);
19643 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19644 %}
19645 ins_pipe( pipe_slow );
19646 %}
19647
19648 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19649 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19650 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19651 // src1 contains reduction identity
19652 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19653 match(Set dst (AddReductionVD src1 src2));
19654 match(Set dst (MulReductionVD src1 src2));
19655 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19656 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19657 ins_encode %{
19658 int opcode = this->ideal_Opcode();
19659 int vlen = Matcher::vector_length(this, $src2);
19660 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19661 %}
19662 ins_pipe( pipe_slow );
19663 %}
19664
19665 // =======================Byte Reduction==========================================
19666
19667 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19668 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19669 match(Set dst (AddReductionVI src1 src2));
19670 match(Set dst (AndReductionV src1 src2));
19671 match(Set dst ( OrReductionV src1 src2));
19672 match(Set dst (XorReductionV src1 src2));
19673 match(Set dst (MinReductionV src1 src2));
19674 match(Set dst (MaxReductionV src1 src2));
19675 match(Set dst (UMinReductionV src1 src2));
19676 match(Set dst (UMaxReductionV src1 src2));
19677 effect(TEMP vtmp1, TEMP vtmp2);
19678 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19679 ins_encode %{
19680 int opcode = this->ideal_Opcode();
19681 int vlen = Matcher::vector_length(this, $src2);
19682 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19683 %}
19684 ins_pipe( pipe_slow );
19685 %}
19686
19687 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19688 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19689 match(Set dst (AddReductionVI src1 src2));
19690 match(Set dst (AndReductionV src1 src2));
19691 match(Set dst ( OrReductionV src1 src2));
19692 match(Set dst (XorReductionV src1 src2));
19693 match(Set dst (MinReductionV src1 src2));
19694 match(Set dst (MaxReductionV src1 src2));
19695 match(Set dst (UMinReductionV src1 src2));
19696 match(Set dst (UMaxReductionV src1 src2));
19697 effect(TEMP vtmp1, TEMP vtmp2);
19698 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19699 ins_encode %{
19700 int opcode = this->ideal_Opcode();
19701 int vlen = Matcher::vector_length(this, $src2);
19702 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19703 %}
19704 ins_pipe( pipe_slow );
19705 %}
19706
19707 // =======================Short Reduction==========================================
19708
19709 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19710 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19711 match(Set dst (AddReductionVI src1 src2));
19712 match(Set dst (MulReductionVI src1 src2));
19713 match(Set dst (AndReductionV src1 src2));
19714 match(Set dst ( OrReductionV src1 src2));
19715 match(Set dst (XorReductionV src1 src2));
19716 match(Set dst (MinReductionV src1 src2));
19717 match(Set dst (MaxReductionV src1 src2));
19718 match(Set dst (UMinReductionV src1 src2));
19719 match(Set dst (UMaxReductionV src1 src2));
19720 effect(TEMP vtmp1, TEMP vtmp2);
19721 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19722 ins_encode %{
19723 int opcode = this->ideal_Opcode();
19724 int vlen = Matcher::vector_length(this, $src2);
19725 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19726 %}
19727 ins_pipe( pipe_slow );
19728 %}
19729
19730 // =======================Mul Reduction==========================================
19731
19732 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19733 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19734 Matcher::vector_length(n->in(2)) <= 32); // src2
19735 match(Set dst (MulReductionVI src1 src2));
19736 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19737 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19738 ins_encode %{
19739 int opcode = this->ideal_Opcode();
19740 int vlen = Matcher::vector_length(this, $src2);
19741 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19742 %}
19743 ins_pipe( pipe_slow );
19744 %}
19745
19746 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19747 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19748 Matcher::vector_length(n->in(2)) == 64); // src2
19749 match(Set dst (MulReductionVI src1 src2));
19750 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19751 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19752 ins_encode %{
19753 int opcode = this->ideal_Opcode();
19754 int vlen = Matcher::vector_length(this, $src2);
19755 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19756 %}
19757 ins_pipe( pipe_slow );
19758 %}
19759
19760 //--------------------Min/Max Float Reduction --------------------
19761 // Float Min Reduction
19762 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19763 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19764 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19765 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19766 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19767 Matcher::vector_length(n->in(2)) == 2);
19768 match(Set dst (MinReductionV src1 src2));
19769 match(Set dst (MaxReductionV src1 src2));
19770 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19771 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19772 ins_encode %{
19773 assert(UseAVX > 0, "sanity");
19774
19775 int opcode = this->ideal_Opcode();
19776 int vlen = Matcher::vector_length(this, $src2);
19777 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19778 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19779 %}
19780 ins_pipe( pipe_slow );
19781 %}
19782
19783 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19784 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19785 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19786 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19787 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19788 Matcher::vector_length(n->in(2)) >= 4);
19789 match(Set dst (MinReductionV src1 src2));
19790 match(Set dst (MaxReductionV src1 src2));
19791 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19792 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19793 ins_encode %{
19794 assert(UseAVX > 0, "sanity");
19795
19796 int opcode = this->ideal_Opcode();
19797 int vlen = Matcher::vector_length(this, $src2);
19798 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19799 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19800 %}
19801 ins_pipe( pipe_slow );
19802 %}
19803
19804 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19805 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19806 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19807 Matcher::vector_length(n->in(2)) == 2);
19808 match(Set dst (MinReductionV dst src));
19809 match(Set dst (MaxReductionV dst src));
19810 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19811 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19812 ins_encode %{
19813 assert(UseAVX > 0, "sanity");
19814
19815 int opcode = this->ideal_Opcode();
19816 int vlen = Matcher::vector_length(this, $src);
19817 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19818 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19819 %}
19820 ins_pipe( pipe_slow );
19821 %}
19822
19823
19824 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19825 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19826 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19827 Matcher::vector_length(n->in(2)) >= 4);
19828 match(Set dst (MinReductionV dst src));
19829 match(Set dst (MaxReductionV dst src));
19830 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19831 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19832 ins_encode %{
19833 assert(UseAVX > 0, "sanity");
19834
19835 int opcode = this->ideal_Opcode();
19836 int vlen = Matcher::vector_length(this, $src);
19837 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19838 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19839 %}
19840 ins_pipe( pipe_slow );
19841 %}
19842
19843 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19844 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19845 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19846 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19847 Matcher::vector_length(n->in(2)) == 2);
19848 match(Set dst (MinReductionV src1 src2));
19849 match(Set dst (MaxReductionV src1 src2));
19850 effect(TEMP dst, TEMP xtmp1);
19851 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19852 ins_encode %{
19853 int opcode = this->ideal_Opcode();
19854 int vlen = Matcher::vector_length(this, $src2);
19855 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19856 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19857 %}
19858 ins_pipe( pipe_slow );
19859 %}
19860
19861 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19862 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19863 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19864 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19865 Matcher::vector_length(n->in(2)) >= 4);
19866 match(Set dst (MinReductionV src1 src2));
19867 match(Set dst (MaxReductionV src1 src2));
19868 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19869 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19870 ins_encode %{
19871 int opcode = this->ideal_Opcode();
19872 int vlen = Matcher::vector_length(this, $src2);
19873 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19874 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19875 %}
19876 ins_pipe( pipe_slow );
19877 %}
19878
19879 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19880 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19881 Matcher::vector_length(n->in(2)) == 2);
19882 match(Set dst (MinReductionV dst src));
19883 match(Set dst (MaxReductionV dst src));
19884 effect(TEMP dst, TEMP xtmp1);
19885 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19886 ins_encode %{
19887 int opcode = this->ideal_Opcode();
19888 int vlen = Matcher::vector_length(this, $src);
19889 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19890 $xtmp1$$XMMRegister);
19891 %}
19892 ins_pipe( pipe_slow );
19893 %}
19894
19895 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19896 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19897 Matcher::vector_length(n->in(2)) >= 4);
19898 match(Set dst (MinReductionV dst src));
19899 match(Set dst (MaxReductionV dst src));
19900 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19901 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19902 ins_encode %{
19903 int opcode = this->ideal_Opcode();
19904 int vlen = Matcher::vector_length(this, $src);
19905 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19906 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19907 %}
19908 ins_pipe( pipe_slow );
19909 %}
19910
19911 //--------------------Min Double Reduction --------------------
19912 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19913 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19914 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19915 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19916 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19917 Matcher::vector_length(n->in(2)) == 2);
19918 match(Set dst (MinReductionV src1 src2));
19919 match(Set dst (MaxReductionV src1 src2));
19920 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19921 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19922 ins_encode %{
19923 assert(UseAVX > 0, "sanity");
19924
19925 int opcode = this->ideal_Opcode();
19926 int vlen = Matcher::vector_length(this, $src2);
19927 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19928 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19929 %}
19930 ins_pipe( pipe_slow );
19931 %}
19932
19933 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19934 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19935 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19936 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19937 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19938 Matcher::vector_length(n->in(2)) >= 4);
19939 match(Set dst (MinReductionV src1 src2));
19940 match(Set dst (MaxReductionV src1 src2));
19941 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19942 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19943 ins_encode %{
19944 assert(UseAVX > 0, "sanity");
19945
19946 int opcode = this->ideal_Opcode();
19947 int vlen = Matcher::vector_length(this, $src2);
19948 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19949 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19950 %}
19951 ins_pipe( pipe_slow );
19952 %}
19953
19954
19955 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19956 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19957 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19958 Matcher::vector_length(n->in(2)) == 2);
19959 match(Set dst (MinReductionV dst src));
19960 match(Set dst (MaxReductionV dst src));
19961 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19962 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19963 ins_encode %{
19964 assert(UseAVX > 0, "sanity");
19965
19966 int opcode = this->ideal_Opcode();
19967 int vlen = Matcher::vector_length(this, $src);
19968 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19969 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19970 %}
19971 ins_pipe( pipe_slow );
19972 %}
19973
19974 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19975 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19976 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19977 Matcher::vector_length(n->in(2)) >= 4);
19978 match(Set dst (MinReductionV dst src));
19979 match(Set dst (MaxReductionV dst src));
19980 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19981 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19982 ins_encode %{
19983 assert(UseAVX > 0, "sanity");
19984
19985 int opcode = this->ideal_Opcode();
19986 int vlen = Matcher::vector_length(this, $src);
19987 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19988 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19989 %}
19990 ins_pipe( pipe_slow );
19991 %}
19992
19993 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19994 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19995 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19996 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19997 Matcher::vector_length(n->in(2)) == 2);
19998 match(Set dst (MinReductionV src1 src2));
19999 match(Set dst (MaxReductionV src1 src2));
20000 effect(TEMP dst, TEMP xtmp1);
20001 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20002 ins_encode %{
20003 int opcode = this->ideal_Opcode();
20004 int vlen = Matcher::vector_length(this, $src2);
20005 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20006 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20007 %}
20008 ins_pipe( pipe_slow );
20009 %}
20010
20011 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20012 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20013 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20014 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20015 Matcher::vector_length(n->in(2)) >= 4);
20016 match(Set dst (MinReductionV src1 src2));
20017 match(Set dst (MaxReductionV src1 src2));
20018 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20019 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20020 ins_encode %{
20021 int opcode = this->ideal_Opcode();
20022 int vlen = Matcher::vector_length(this, $src2);
20023 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20024 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20025 %}
20026 ins_pipe( pipe_slow );
20027 %}
20028
20029
20030 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20031 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20032 Matcher::vector_length(n->in(2)) == 2);
20033 match(Set dst (MinReductionV dst src));
20034 match(Set dst (MaxReductionV dst src));
20035 effect(TEMP dst, TEMP xtmp1);
20036 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20037 ins_encode %{
20038 int opcode = this->ideal_Opcode();
20039 int vlen = Matcher::vector_length(this, $src);
20040 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20041 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20042 %}
20043 ins_pipe( pipe_slow );
20044 %}
20045
20046 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20047 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20048 Matcher::vector_length(n->in(2)) >= 4);
20049 match(Set dst (MinReductionV dst src));
20050 match(Set dst (MaxReductionV dst src));
20051 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20052 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20053 ins_encode %{
20054 int opcode = this->ideal_Opcode();
20055 int vlen = Matcher::vector_length(this, $src);
20056 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20057 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20058 %}
20059 ins_pipe( pipe_slow );
20060 %}
20061
20062 // ====================VECTOR ARITHMETIC=======================================
20063
20064 // --------------------------------- ADD --------------------------------------
20065
20066 // Bytes vector add
20067 instruct vaddB(vec dst, vec src) %{
20068 predicate(UseAVX == 0);
20069 match(Set dst (AddVB dst src));
20070 format %{ "paddb $dst,$src\t! add packedB" %}
20071 ins_encode %{
20072 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20073 %}
20074 ins_pipe( pipe_slow );
20075 %}
20076
20077 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20078 predicate(UseAVX > 0);
20079 match(Set dst (AddVB src1 src2));
20080 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20081 ins_encode %{
20082 int vlen_enc = vector_length_encoding(this);
20083 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20084 %}
20085 ins_pipe( pipe_slow );
20086 %}
20087
20088 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20089 predicate((UseAVX > 0) &&
20090 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20091 match(Set dst (AddVB src (LoadVector mem)));
20092 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20093 ins_encode %{
20094 int vlen_enc = vector_length_encoding(this);
20095 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20096 %}
20097 ins_pipe( pipe_slow );
20098 %}
20099
20100 // Shorts/Chars vector add
20101 instruct vaddS(vec dst, vec src) %{
20102 predicate(UseAVX == 0);
20103 match(Set dst (AddVS dst src));
20104 format %{ "paddw $dst,$src\t! add packedS" %}
20105 ins_encode %{
20106 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20107 %}
20108 ins_pipe( pipe_slow );
20109 %}
20110
20111 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20112 predicate(UseAVX > 0);
20113 match(Set dst (AddVS src1 src2));
20114 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20115 ins_encode %{
20116 int vlen_enc = vector_length_encoding(this);
20117 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20118 %}
20119 ins_pipe( pipe_slow );
20120 %}
20121
20122 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20123 predicate((UseAVX > 0) &&
20124 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20125 match(Set dst (AddVS src (LoadVector mem)));
20126 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20127 ins_encode %{
20128 int vlen_enc = vector_length_encoding(this);
20129 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20130 %}
20131 ins_pipe( pipe_slow );
20132 %}
20133
20134 // Integers vector add
20135 instruct vaddI(vec dst, vec src) %{
20136 predicate(UseAVX == 0);
20137 match(Set dst (AddVI dst src));
20138 format %{ "paddd $dst,$src\t! add packedI" %}
20139 ins_encode %{
20140 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20141 %}
20142 ins_pipe( pipe_slow );
20143 %}
20144
20145 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20146 predicate(UseAVX > 0);
20147 match(Set dst (AddVI src1 src2));
20148 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20149 ins_encode %{
20150 int vlen_enc = vector_length_encoding(this);
20151 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20152 %}
20153 ins_pipe( pipe_slow );
20154 %}
20155
20156
20157 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20158 predicate((UseAVX > 0) &&
20159 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20160 match(Set dst (AddVI src (LoadVector mem)));
20161 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20162 ins_encode %{
20163 int vlen_enc = vector_length_encoding(this);
20164 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20165 %}
20166 ins_pipe( pipe_slow );
20167 %}
20168
20169 // Longs vector add
20170 instruct vaddL(vec dst, vec src) %{
20171 predicate(UseAVX == 0);
20172 match(Set dst (AddVL dst src));
20173 format %{ "paddq $dst,$src\t! add packedL" %}
20174 ins_encode %{
20175 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20176 %}
20177 ins_pipe( pipe_slow );
20178 %}
20179
20180 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20181 predicate(UseAVX > 0);
20182 match(Set dst (AddVL src1 src2));
20183 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20184 ins_encode %{
20185 int vlen_enc = vector_length_encoding(this);
20186 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20187 %}
20188 ins_pipe( pipe_slow );
20189 %}
20190
20191 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20192 predicate((UseAVX > 0) &&
20193 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20194 match(Set dst (AddVL src (LoadVector mem)));
20195 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20196 ins_encode %{
20197 int vlen_enc = vector_length_encoding(this);
20198 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20199 %}
20200 ins_pipe( pipe_slow );
20201 %}
20202
20203 // Floats vector add
20204 instruct vaddF(vec dst, vec src) %{
20205 predicate(UseAVX == 0);
20206 match(Set dst (AddVF dst src));
20207 format %{ "addps $dst,$src\t! add packedF" %}
20208 ins_encode %{
20209 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20210 %}
20211 ins_pipe( pipe_slow );
20212 %}
20213
20214 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20215 predicate(UseAVX > 0);
20216 match(Set dst (AddVF src1 src2));
20217 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20218 ins_encode %{
20219 int vlen_enc = vector_length_encoding(this);
20220 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221 %}
20222 ins_pipe( pipe_slow );
20223 %}
20224
20225 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20226 predicate((UseAVX > 0) &&
20227 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20228 match(Set dst (AddVF src (LoadVector mem)));
20229 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20230 ins_encode %{
20231 int vlen_enc = vector_length_encoding(this);
20232 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20233 %}
20234 ins_pipe( pipe_slow );
20235 %}
20236
20237 // Doubles vector add
20238 instruct vaddD(vec dst, vec src) %{
20239 predicate(UseAVX == 0);
20240 match(Set dst (AddVD dst src));
20241 format %{ "addpd $dst,$src\t! add packedD" %}
20242 ins_encode %{
20243 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20244 %}
20245 ins_pipe( pipe_slow );
20246 %}
20247
20248 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20249 predicate(UseAVX > 0);
20250 match(Set dst (AddVD src1 src2));
20251 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20252 ins_encode %{
20253 int vlen_enc = vector_length_encoding(this);
20254 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255 %}
20256 ins_pipe( pipe_slow );
20257 %}
20258
20259 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20260 predicate((UseAVX > 0) &&
20261 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20262 match(Set dst (AddVD src (LoadVector mem)));
20263 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20264 ins_encode %{
20265 int vlen_enc = vector_length_encoding(this);
20266 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20267 %}
20268 ins_pipe( pipe_slow );
20269 %}
20270
20271 // --------------------------------- SUB --------------------------------------
20272
20273 // Bytes vector sub
20274 instruct vsubB(vec dst, vec src) %{
20275 predicate(UseAVX == 0);
20276 match(Set dst (SubVB dst src));
20277 format %{ "psubb $dst,$src\t! sub packedB" %}
20278 ins_encode %{
20279 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20280 %}
20281 ins_pipe( pipe_slow );
20282 %}
20283
20284 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20285 predicate(UseAVX > 0);
20286 match(Set dst (SubVB src1 src2));
20287 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20288 ins_encode %{
20289 int vlen_enc = vector_length_encoding(this);
20290 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20291 %}
20292 ins_pipe( pipe_slow );
20293 %}
20294
20295 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20296 predicate((UseAVX > 0) &&
20297 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20298 match(Set dst (SubVB src (LoadVector mem)));
20299 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20300 ins_encode %{
20301 int vlen_enc = vector_length_encoding(this);
20302 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20303 %}
20304 ins_pipe( pipe_slow );
20305 %}
20306
20307 // Shorts/Chars vector sub
20308 instruct vsubS(vec dst, vec src) %{
20309 predicate(UseAVX == 0);
20310 match(Set dst (SubVS dst src));
20311 format %{ "psubw $dst,$src\t! sub packedS" %}
20312 ins_encode %{
20313 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20314 %}
20315 ins_pipe( pipe_slow );
20316 %}
20317
20318
20319 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20320 predicate(UseAVX > 0);
20321 match(Set dst (SubVS src1 src2));
20322 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20323 ins_encode %{
20324 int vlen_enc = vector_length_encoding(this);
20325 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20326 %}
20327 ins_pipe( pipe_slow );
20328 %}
20329
20330 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20331 predicate((UseAVX > 0) &&
20332 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20333 match(Set dst (SubVS src (LoadVector mem)));
20334 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20335 ins_encode %{
20336 int vlen_enc = vector_length_encoding(this);
20337 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20338 %}
20339 ins_pipe( pipe_slow );
20340 %}
20341
20342 // Integers vector sub
20343 instruct vsubI(vec dst, vec src) %{
20344 predicate(UseAVX == 0);
20345 match(Set dst (SubVI dst src));
20346 format %{ "psubd $dst,$src\t! sub packedI" %}
20347 ins_encode %{
20348 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20349 %}
20350 ins_pipe( pipe_slow );
20351 %}
20352
20353 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20354 predicate(UseAVX > 0);
20355 match(Set dst (SubVI src1 src2));
20356 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20357 ins_encode %{
20358 int vlen_enc = vector_length_encoding(this);
20359 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20360 %}
20361 ins_pipe( pipe_slow );
20362 %}
20363
20364 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20365 predicate((UseAVX > 0) &&
20366 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20367 match(Set dst (SubVI src (LoadVector mem)));
20368 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20369 ins_encode %{
20370 int vlen_enc = vector_length_encoding(this);
20371 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20372 %}
20373 ins_pipe( pipe_slow );
20374 %}
20375
20376 // Longs vector sub
20377 instruct vsubL(vec dst, vec src) %{
20378 predicate(UseAVX == 0);
20379 match(Set dst (SubVL dst src));
20380 format %{ "psubq $dst,$src\t! sub packedL" %}
20381 ins_encode %{
20382 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20383 %}
20384 ins_pipe( pipe_slow );
20385 %}
20386
20387 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20388 predicate(UseAVX > 0);
20389 match(Set dst (SubVL src1 src2));
20390 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20391 ins_encode %{
20392 int vlen_enc = vector_length_encoding(this);
20393 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20394 %}
20395 ins_pipe( pipe_slow );
20396 %}
20397
20398
20399 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20400 predicate((UseAVX > 0) &&
20401 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20402 match(Set dst (SubVL src (LoadVector mem)));
20403 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20404 ins_encode %{
20405 int vlen_enc = vector_length_encoding(this);
20406 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20407 %}
20408 ins_pipe( pipe_slow );
20409 %}
20410
20411 // Floats vector sub
20412 instruct vsubF(vec dst, vec src) %{
20413 predicate(UseAVX == 0);
20414 match(Set dst (SubVF dst src));
20415 format %{ "subps $dst,$src\t! sub packedF" %}
20416 ins_encode %{
20417 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20418 %}
20419 ins_pipe( pipe_slow );
20420 %}
20421
20422 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20423 predicate(UseAVX > 0);
20424 match(Set dst (SubVF src1 src2));
20425 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20426 ins_encode %{
20427 int vlen_enc = vector_length_encoding(this);
20428 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429 %}
20430 ins_pipe( pipe_slow );
20431 %}
20432
20433 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20434 predicate((UseAVX > 0) &&
20435 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20436 match(Set dst (SubVF src (LoadVector mem)));
20437 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20438 ins_encode %{
20439 int vlen_enc = vector_length_encoding(this);
20440 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20441 %}
20442 ins_pipe( pipe_slow );
20443 %}
20444
20445 // Doubles vector sub
20446 instruct vsubD(vec dst, vec src) %{
20447 predicate(UseAVX == 0);
20448 match(Set dst (SubVD dst src));
20449 format %{ "subpd $dst,$src\t! sub packedD" %}
20450 ins_encode %{
20451 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20452 %}
20453 ins_pipe( pipe_slow );
20454 %}
20455
20456 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20457 predicate(UseAVX > 0);
20458 match(Set dst (SubVD src1 src2));
20459 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20460 ins_encode %{
20461 int vlen_enc = vector_length_encoding(this);
20462 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20468 predicate((UseAVX > 0) &&
20469 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470 match(Set dst (SubVD src (LoadVector mem)));
20471 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20472 ins_encode %{
20473 int vlen_enc = vector_length_encoding(this);
20474 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475 %}
20476 ins_pipe( pipe_slow );
20477 %}
20478
20479 // --------------------------------- MUL --------------------------------------
20480
20481 // Byte vector mul
20482 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20483 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20484 match(Set dst (MulVB src1 src2));
20485 effect(TEMP dst, TEMP xtmp);
20486 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20487 ins_encode %{
20488 assert(UseSSE > 3, "required");
20489 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20490 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20491 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20492 __ psllw($dst$$XMMRegister, 8);
20493 __ psrlw($dst$$XMMRegister, 8);
20494 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20495 %}
20496 ins_pipe( pipe_slow );
20497 %}
20498
20499 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20500 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20501 match(Set dst (MulVB src1 src2));
20502 effect(TEMP dst, TEMP xtmp);
20503 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20504 ins_encode %{
20505 assert(UseSSE > 3, "required");
20506 // Odd-index elements
20507 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20508 __ psrlw($dst$$XMMRegister, 8);
20509 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20510 __ psrlw($xtmp$$XMMRegister, 8);
20511 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20512 __ psllw($dst$$XMMRegister, 8);
20513 // Even-index elements
20514 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20515 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20516 __ psllw($xtmp$$XMMRegister, 8);
20517 __ psrlw($xtmp$$XMMRegister, 8);
20518 // Combine
20519 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20520 %}
20521 ins_pipe( pipe_slow );
20522 %}
20523
20524 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20525 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20526 match(Set dst (MulVB src1 src2));
20527 effect(TEMP xtmp1, TEMP xtmp2);
20528 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20529 ins_encode %{
20530 int vlen_enc = vector_length_encoding(this);
20531 // Odd-index elements
20532 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20533 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20534 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20535 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20536 // Even-index elements
20537 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20538 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20539 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20540 // Combine
20541 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20542 %}
20543 ins_pipe( pipe_slow );
20544 %}
20545
20546 // Shorts/Chars vector mul
20547 instruct vmulS(vec dst, vec src) %{
20548 predicate(UseAVX == 0);
20549 match(Set dst (MulVS dst src));
20550 format %{ "pmullw $dst,$src\t! mul packedS" %}
20551 ins_encode %{
20552 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20553 %}
20554 ins_pipe( pipe_slow );
20555 %}
20556
20557 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20558 predicate(UseAVX > 0);
20559 match(Set dst (MulVS src1 src2));
20560 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20561 ins_encode %{
20562 int vlen_enc = vector_length_encoding(this);
20563 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20564 %}
20565 ins_pipe( pipe_slow );
20566 %}
20567
20568 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20569 predicate((UseAVX > 0) &&
20570 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20571 match(Set dst (MulVS src (LoadVector mem)));
20572 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20573 ins_encode %{
20574 int vlen_enc = vector_length_encoding(this);
20575 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20576 %}
20577 ins_pipe( pipe_slow );
20578 %}
20579
20580 // Integers vector mul
20581 instruct vmulI(vec dst, vec src) %{
20582 predicate(UseAVX == 0);
20583 match(Set dst (MulVI dst src));
20584 format %{ "pmulld $dst,$src\t! mul packedI" %}
20585 ins_encode %{
20586 assert(UseSSE > 3, "required");
20587 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20593 predicate(UseAVX > 0);
20594 match(Set dst (MulVI src1 src2));
20595 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20596 ins_encode %{
20597 int vlen_enc = vector_length_encoding(this);
20598 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20599 %}
20600 ins_pipe( pipe_slow );
20601 %}
20602
20603 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20604 predicate((UseAVX > 0) &&
20605 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20606 match(Set dst (MulVI src (LoadVector mem)));
20607 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20608 ins_encode %{
20609 int vlen_enc = vector_length_encoding(this);
20610 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20611 %}
20612 ins_pipe( pipe_slow );
20613 %}
20614
20615 // Longs vector mul
20616 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20617 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20618 VM_Version::supports_avx512dq()) ||
20619 VM_Version::supports_avx512vldq());
20620 match(Set dst (MulVL src1 src2));
20621 ins_cost(500);
20622 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20623 ins_encode %{
20624 assert(UseAVX > 2, "required");
20625 int vlen_enc = vector_length_encoding(this);
20626 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20627 %}
20628 ins_pipe( pipe_slow );
20629 %}
20630
20631 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20632 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20633 VM_Version::supports_avx512dq()) ||
20634 (Matcher::vector_length_in_bytes(n) > 8 &&
20635 VM_Version::supports_avx512vldq()));
20636 match(Set dst (MulVL src (LoadVector mem)));
20637 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20638 ins_cost(500);
20639 ins_encode %{
20640 assert(UseAVX > 2, "required");
20641 int vlen_enc = vector_length_encoding(this);
20642 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20643 %}
20644 ins_pipe( pipe_slow );
20645 %}
20646
20647 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20648 predicate(UseAVX == 0);
20649 match(Set dst (MulVL src1 src2));
20650 ins_cost(500);
20651 effect(TEMP dst, TEMP xtmp);
20652 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20653 ins_encode %{
20654 assert(VM_Version::supports_sse4_1(), "required");
20655 // Get the lo-hi products, only the lower 32 bits is in concerns
20656 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20657 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20658 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20659 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20660 __ psllq($dst$$XMMRegister, 32);
20661 // Get the lo-lo products
20662 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20663 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20664 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20665 %}
20666 ins_pipe( pipe_slow );
20667 %}
20668
20669 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20670 predicate(UseAVX > 0 &&
20671 ((Matcher::vector_length_in_bytes(n) == 64 &&
20672 !VM_Version::supports_avx512dq()) ||
20673 (Matcher::vector_length_in_bytes(n) < 64 &&
20674 !VM_Version::supports_avx512vldq())));
20675 match(Set dst (MulVL src1 src2));
20676 effect(TEMP xtmp1, TEMP xtmp2);
20677 ins_cost(500);
20678 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20679 ins_encode %{
20680 int vlen_enc = vector_length_encoding(this);
20681 // Get the lo-hi products, only the lower 32 bits is in concerns
20682 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20683 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20684 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20685 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20686 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20687 // Get the lo-lo products
20688 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20689 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20690 %}
20691 ins_pipe( pipe_slow );
20692 %}
20693
20694 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20695 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20696 match(Set dst (MulVL src1 src2));
20697 ins_cost(100);
20698 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20699 ins_encode %{
20700 int vlen_enc = vector_length_encoding(this);
20701 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20702 %}
20703 ins_pipe( pipe_slow );
20704 %}
20705
20706 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20707 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20708 match(Set dst (MulVL src1 src2));
20709 ins_cost(100);
20710 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20711 ins_encode %{
20712 int vlen_enc = vector_length_encoding(this);
20713 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20714 %}
20715 ins_pipe( pipe_slow );
20716 %}
20717
20718 // Floats vector mul
20719 instruct vmulF(vec dst, vec src) %{
20720 predicate(UseAVX == 0);
20721 match(Set dst (MulVF dst src));
20722 format %{ "mulps $dst,$src\t! mul packedF" %}
20723 ins_encode %{
20724 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20725 %}
20726 ins_pipe( pipe_slow );
20727 %}
20728
20729 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20730 predicate(UseAVX > 0);
20731 match(Set dst (MulVF src1 src2));
20732 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20733 ins_encode %{
20734 int vlen_enc = vector_length_encoding(this);
20735 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20736 %}
20737 ins_pipe( pipe_slow );
20738 %}
20739
20740 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20741 predicate((UseAVX > 0) &&
20742 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20743 match(Set dst (MulVF src (LoadVector mem)));
20744 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20745 ins_encode %{
20746 int vlen_enc = vector_length_encoding(this);
20747 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20748 %}
20749 ins_pipe( pipe_slow );
20750 %}
20751
20752 // Doubles vector mul
20753 instruct vmulD(vec dst, vec src) %{
20754 predicate(UseAVX == 0);
20755 match(Set dst (MulVD dst src));
20756 format %{ "mulpd $dst,$src\t! mul packedD" %}
20757 ins_encode %{
20758 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20759 %}
20760 ins_pipe( pipe_slow );
20761 %}
20762
20763 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20764 predicate(UseAVX > 0);
20765 match(Set dst (MulVD src1 src2));
20766 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20767 ins_encode %{
20768 int vlen_enc = vector_length_encoding(this);
20769 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20770 %}
20771 ins_pipe( pipe_slow );
20772 %}
20773
20774 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20775 predicate((UseAVX > 0) &&
20776 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20777 match(Set dst (MulVD src (LoadVector mem)));
20778 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20779 ins_encode %{
20780 int vlen_enc = vector_length_encoding(this);
20781 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20782 %}
20783 ins_pipe( pipe_slow );
20784 %}
20785
20786 // --------------------------------- DIV --------------------------------------
20787
20788 // Floats vector div
20789 instruct vdivF(vec dst, vec src) %{
20790 predicate(UseAVX == 0);
20791 match(Set dst (DivVF dst src));
20792 format %{ "divps $dst,$src\t! div packedF" %}
20793 ins_encode %{
20794 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20795 %}
20796 ins_pipe( pipe_slow );
20797 %}
20798
20799 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20800 predicate(UseAVX > 0);
20801 match(Set dst (DivVF src1 src2));
20802 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20803 ins_encode %{
20804 int vlen_enc = vector_length_encoding(this);
20805 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20806 %}
20807 ins_pipe( pipe_slow );
20808 %}
20809
20810 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20811 predicate((UseAVX > 0) &&
20812 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20813 match(Set dst (DivVF src (LoadVector mem)));
20814 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20815 ins_encode %{
20816 int vlen_enc = vector_length_encoding(this);
20817 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20818 %}
20819 ins_pipe( pipe_slow );
20820 %}
20821
20822 // Doubles vector div
20823 instruct vdivD(vec dst, vec src) %{
20824 predicate(UseAVX == 0);
20825 match(Set dst (DivVD dst src));
20826 format %{ "divpd $dst,$src\t! div packedD" %}
20827 ins_encode %{
20828 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20829 %}
20830 ins_pipe( pipe_slow );
20831 %}
20832
20833 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20834 predicate(UseAVX > 0);
20835 match(Set dst (DivVD src1 src2));
20836 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20837 ins_encode %{
20838 int vlen_enc = vector_length_encoding(this);
20839 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20840 %}
20841 ins_pipe( pipe_slow );
20842 %}
20843
20844 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20845 predicate((UseAVX > 0) &&
20846 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20847 match(Set dst (DivVD src (LoadVector mem)));
20848 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20849 ins_encode %{
20850 int vlen_enc = vector_length_encoding(this);
20851 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20852 %}
20853 ins_pipe( pipe_slow );
20854 %}
20855
20856 // ------------------------------ MinMax ---------------------------------------
20857
20858 // Byte, Short, Int vector Min/Max
20859 instruct minmax_reg_sse(vec dst, vec src) %{
20860 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20861 UseAVX == 0);
20862 match(Set dst (MinV dst src));
20863 match(Set dst (MaxV dst src));
20864 format %{ "vector_minmax $dst,$src\t! " %}
20865 ins_encode %{
20866 assert(UseSSE >= 4, "required");
20867
20868 int opcode = this->ideal_Opcode();
20869 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20870 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20871 %}
20872 ins_pipe( pipe_slow );
20873 %}
20874
20875 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20876 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20877 UseAVX > 0);
20878 match(Set dst (MinV src1 src2));
20879 match(Set dst (MaxV src1 src2));
20880 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20881 ins_encode %{
20882 int opcode = this->ideal_Opcode();
20883 int vlen_enc = vector_length_encoding(this);
20884 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20885
20886 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20887 %}
20888 ins_pipe( pipe_slow );
20889 %}
20890
20891 // Long vector Min/Max
20892 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20893 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20894 UseAVX == 0);
20895 match(Set dst (MinV dst src));
20896 match(Set dst (MaxV src dst));
20897 effect(TEMP dst, TEMP tmp);
20898 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20899 ins_encode %{
20900 assert(UseSSE >= 4, "required");
20901
20902 int opcode = this->ideal_Opcode();
20903 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20904 assert(elem_bt == T_LONG, "sanity");
20905
20906 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20907 %}
20908 ins_pipe( pipe_slow );
20909 %}
20910
20911 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20912 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20913 UseAVX > 0 && !VM_Version::supports_avx512vl());
20914 match(Set dst (MinV src1 src2));
20915 match(Set dst (MaxV src1 src2));
20916 effect(TEMP dst);
20917 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20918 ins_encode %{
20919 int vlen_enc = vector_length_encoding(this);
20920 int opcode = this->ideal_Opcode();
20921 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20922 assert(elem_bt == T_LONG, "sanity");
20923
20924 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20925 %}
20926 ins_pipe( pipe_slow );
20927 %}
20928
20929 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20930 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20931 Matcher::vector_element_basic_type(n) == T_LONG);
20932 match(Set dst (MinV src1 src2));
20933 match(Set dst (MaxV src1 src2));
20934 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20935 ins_encode %{
20936 assert(UseAVX > 2, "required");
20937
20938 int vlen_enc = vector_length_encoding(this);
20939 int opcode = this->ideal_Opcode();
20940 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20941 assert(elem_bt == T_LONG, "sanity");
20942
20943 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20944 %}
20945 ins_pipe( pipe_slow );
20946 %}
20947
20948 // Float/Double vector Min/Max
20949 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20950 predicate(VM_Version::supports_avx10_2() &&
20951 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20952 match(Set dst (MinV a b));
20953 match(Set dst (MaxV a b));
20954 format %{ "vector_minmaxFP $dst, $a, $b" %}
20955 ins_encode %{
20956 int vlen_enc = vector_length_encoding(this);
20957 int opcode = this->ideal_Opcode();
20958 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20959 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20960 %}
20961 ins_pipe( pipe_slow );
20962 %}
20963
20964 // Float/Double vector Min/Max
20965 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20966 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20967 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20968 UseAVX > 0);
20969 match(Set dst (MinV a b));
20970 match(Set dst (MaxV a b));
20971 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20972 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20973 ins_encode %{
20974 assert(UseAVX > 0, "required");
20975
20976 int opcode = this->ideal_Opcode();
20977 int vlen_enc = vector_length_encoding(this);
20978 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20979
20980 __ vminmax_fp(opcode, elem_bt,
20981 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20982 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20983 %}
20984 ins_pipe( pipe_slow );
20985 %}
20986
20987 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20988 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20989 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20990 match(Set dst (MinV a b));
20991 match(Set dst (MaxV a b));
20992 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20993 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20994 ins_encode %{
20995 assert(UseAVX > 2, "required");
20996
20997 int opcode = this->ideal_Opcode();
20998 int vlen_enc = vector_length_encoding(this);
20999 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21000
21001 __ evminmax_fp(opcode, elem_bt,
21002 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21003 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21004 %}
21005 ins_pipe( pipe_slow );
21006 %}
21007
21008 // ------------------------------ Unsigned vector Min/Max ----------------------
21009
21010 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21011 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21012 match(Set dst (UMinV a b));
21013 match(Set dst (UMaxV a b));
21014 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21015 ins_encode %{
21016 int opcode = this->ideal_Opcode();
21017 int vlen_enc = vector_length_encoding(this);
21018 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21019 assert(is_integral_type(elem_bt), "");
21020 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21021 %}
21022 ins_pipe( pipe_slow );
21023 %}
21024
21025 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21026 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21027 match(Set dst (UMinV a (LoadVector b)));
21028 match(Set dst (UMaxV a (LoadVector b)));
21029 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21030 ins_encode %{
21031 int opcode = this->ideal_Opcode();
21032 int vlen_enc = vector_length_encoding(this);
21033 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21034 assert(is_integral_type(elem_bt), "");
21035 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21036 %}
21037 ins_pipe( pipe_slow );
21038 %}
21039
21040 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21041 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21042 match(Set dst (UMinV a b));
21043 match(Set dst (UMaxV a b));
21044 effect(TEMP xtmp1, TEMP xtmp2);
21045 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21046 ins_encode %{
21047 int opcode = this->ideal_Opcode();
21048 int vlen_enc = vector_length_encoding(this);
21049 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21050 %}
21051 ins_pipe( pipe_slow );
21052 %}
21053
21054 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21055 match(Set dst (UMinV (Binary dst src2) mask));
21056 match(Set dst (UMaxV (Binary dst src2) mask));
21057 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21058 ins_encode %{
21059 int vlen_enc = vector_length_encoding(this);
21060 BasicType bt = Matcher::vector_element_basic_type(this);
21061 int opc = this->ideal_Opcode();
21062 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21063 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21064 %}
21065 ins_pipe( pipe_slow );
21066 %}
21067
21068 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21069 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21070 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21071 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21072 ins_encode %{
21073 int vlen_enc = vector_length_encoding(this);
21074 BasicType bt = Matcher::vector_element_basic_type(this);
21075 int opc = this->ideal_Opcode();
21076 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21077 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21078 %}
21079 ins_pipe( pipe_slow );
21080 %}
21081
21082 // --------------------------------- Signum/CopySign ---------------------------
21083
21084 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21085 match(Set dst (SignumF dst (Binary zero one)));
21086 effect(KILL cr);
21087 format %{ "signumF $dst, $dst" %}
21088 ins_encode %{
21089 int opcode = this->ideal_Opcode();
21090 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21091 %}
21092 ins_pipe( pipe_slow );
21093 %}
21094
21095 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21096 match(Set dst (SignumD dst (Binary zero one)));
21097 effect(KILL cr);
21098 format %{ "signumD $dst, $dst" %}
21099 ins_encode %{
21100 int opcode = this->ideal_Opcode();
21101 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21102 %}
21103 ins_pipe( pipe_slow );
21104 %}
21105
21106 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21107 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21108 match(Set dst (SignumVF src (Binary zero one)));
21109 match(Set dst (SignumVD src (Binary zero one)));
21110 effect(TEMP dst, TEMP xtmp1);
21111 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21112 ins_encode %{
21113 int opcode = this->ideal_Opcode();
21114 int vec_enc = vector_length_encoding(this);
21115 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21116 $xtmp1$$XMMRegister, vec_enc);
21117 %}
21118 ins_pipe( pipe_slow );
21119 %}
21120
21121 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21122 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21123 match(Set dst (SignumVF src (Binary zero one)));
21124 match(Set dst (SignumVD src (Binary zero one)));
21125 effect(TEMP dst, TEMP ktmp1);
21126 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21127 ins_encode %{
21128 int opcode = this->ideal_Opcode();
21129 int vec_enc = vector_length_encoding(this);
21130 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21131 $ktmp1$$KRegister, vec_enc);
21132 %}
21133 ins_pipe( pipe_slow );
21134 %}
21135
21136 // ---------------------------------------
21137 // For copySign use 0xE4 as writemask for vpternlog
21138 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21139 // C (xmm2) is set to 0x7FFFFFFF
21140 // Wherever xmm2 is 0, we want to pick from B (sign)
21141 // Wherever xmm2 is 1, we want to pick from A (src)
21142 //
21143 // A B C Result
21144 // 0 0 0 0
21145 // 0 0 1 0
21146 // 0 1 0 1
21147 // 0 1 1 0
21148 // 1 0 0 0
21149 // 1 0 1 1
21150 // 1 1 0 1
21151 // 1 1 1 1
21152 //
21153 // Result going from high bit to low bit is 0x11100100 = 0xe4
21154 // ---------------------------------------
21155
21156 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21157 match(Set dst (CopySignF dst src));
21158 effect(TEMP tmp1, TEMP tmp2);
21159 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21160 ins_encode %{
21161 __ movl($tmp2$$Register, 0x7FFFFFFF);
21162 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21163 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21164 %}
21165 ins_pipe( pipe_slow );
21166 %}
21167
21168 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21169 match(Set dst (CopySignD dst (Binary src zero)));
21170 ins_cost(100);
21171 effect(TEMP tmp1, TEMP tmp2);
21172 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21173 ins_encode %{
21174 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21175 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21176 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21177 %}
21178 ins_pipe( pipe_slow );
21179 %}
21180
21181 //----------------------------- CompressBits/ExpandBits ------------------------
21182
21183 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21184 predicate(n->bottom_type()->isa_int());
21185 match(Set dst (CompressBits src mask));
21186 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21187 ins_encode %{
21188 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21189 %}
21190 ins_pipe( pipe_slow );
21191 %}
21192
21193 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21194 predicate(n->bottom_type()->isa_int());
21195 match(Set dst (ExpandBits src mask));
21196 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21197 ins_encode %{
21198 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21199 %}
21200 ins_pipe( pipe_slow );
21201 %}
21202
21203 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21204 predicate(n->bottom_type()->isa_int());
21205 match(Set dst (CompressBits src (LoadI mask)));
21206 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21207 ins_encode %{
21208 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21209 %}
21210 ins_pipe( pipe_slow );
21211 %}
21212
21213 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21214 predicate(n->bottom_type()->isa_int());
21215 match(Set dst (ExpandBits src (LoadI mask)));
21216 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21217 ins_encode %{
21218 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21219 %}
21220 ins_pipe( pipe_slow );
21221 %}
21222
21223 // --------------------------------- Sqrt --------------------------------------
21224
21225 instruct vsqrtF_reg(vec dst, vec src) %{
21226 match(Set dst (SqrtVF src));
21227 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21228 ins_encode %{
21229 assert(UseAVX > 0, "required");
21230 int vlen_enc = vector_length_encoding(this);
21231 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21232 %}
21233 ins_pipe( pipe_slow );
21234 %}
21235
21236 instruct vsqrtF_mem(vec dst, memory mem) %{
21237 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21238 match(Set dst (SqrtVF (LoadVector mem)));
21239 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21240 ins_encode %{
21241 assert(UseAVX > 0, "required");
21242 int vlen_enc = vector_length_encoding(this);
21243 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21244 %}
21245 ins_pipe( pipe_slow );
21246 %}
21247
21248 // Floating point vector sqrt
21249 instruct vsqrtD_reg(vec dst, vec src) %{
21250 match(Set dst (SqrtVD src));
21251 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21252 ins_encode %{
21253 assert(UseAVX > 0, "required");
21254 int vlen_enc = vector_length_encoding(this);
21255 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21256 %}
21257 ins_pipe( pipe_slow );
21258 %}
21259
21260 instruct vsqrtD_mem(vec dst, memory mem) %{
21261 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21262 match(Set dst (SqrtVD (LoadVector mem)));
21263 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21264 ins_encode %{
21265 assert(UseAVX > 0, "required");
21266 int vlen_enc = vector_length_encoding(this);
21267 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21268 %}
21269 ins_pipe( pipe_slow );
21270 %}
21271
21272 // ------------------------------ Shift ---------------------------------------
21273
21274 // Left and right shift count vectors are the same on x86
21275 // (only lowest bits of xmm reg are used for count).
21276 instruct vshiftcnt(vec dst, rRegI cnt) %{
21277 match(Set dst (LShiftCntV cnt));
21278 match(Set dst (RShiftCntV cnt));
21279 format %{ "movdl $dst,$cnt\t! load shift count" %}
21280 ins_encode %{
21281 __ movdl($dst$$XMMRegister, $cnt$$Register);
21282 %}
21283 ins_pipe( pipe_slow );
21284 %}
21285
21286 // Byte vector shift
21287 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21288 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21289 match(Set dst ( LShiftVB src shift));
21290 match(Set dst ( RShiftVB src shift));
21291 match(Set dst (URShiftVB src shift));
21292 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21293 format %{"vector_byte_shift $dst,$src,$shift" %}
21294 ins_encode %{
21295 assert(UseSSE > 3, "required");
21296 int opcode = this->ideal_Opcode();
21297 bool sign = (opcode != Op_URShiftVB);
21298 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21299 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21300 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21301 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21302 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21303 %}
21304 ins_pipe( pipe_slow );
21305 %}
21306
21307 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21308 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21309 UseAVX <= 1);
21310 match(Set dst ( LShiftVB src shift));
21311 match(Set dst ( RShiftVB src shift));
21312 match(Set dst (URShiftVB src shift));
21313 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21314 format %{"vector_byte_shift $dst,$src,$shift" %}
21315 ins_encode %{
21316 assert(UseSSE > 3, "required");
21317 int opcode = this->ideal_Opcode();
21318 bool sign = (opcode != Op_URShiftVB);
21319 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21320 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21321 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21322 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21323 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21324 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21325 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21326 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21327 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21328 %}
21329 ins_pipe( pipe_slow );
21330 %}
21331
21332 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21333 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21334 UseAVX > 1);
21335 match(Set dst ( LShiftVB src shift));
21336 match(Set dst ( RShiftVB src shift));
21337 match(Set dst (URShiftVB src shift));
21338 effect(TEMP dst, TEMP tmp);
21339 format %{"vector_byte_shift $dst,$src,$shift" %}
21340 ins_encode %{
21341 int opcode = this->ideal_Opcode();
21342 bool sign = (opcode != Op_URShiftVB);
21343 int vlen_enc = Assembler::AVX_256bit;
21344 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21345 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21346 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21347 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21348 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21349 %}
21350 ins_pipe( pipe_slow );
21351 %}
21352
21353 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21354 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21355 match(Set dst ( LShiftVB src shift));
21356 match(Set dst ( RShiftVB src shift));
21357 match(Set dst (URShiftVB src shift));
21358 effect(TEMP dst, TEMP tmp);
21359 format %{"vector_byte_shift $dst,$src,$shift" %}
21360 ins_encode %{
21361 assert(UseAVX > 1, "required");
21362 int opcode = this->ideal_Opcode();
21363 bool sign = (opcode != Op_URShiftVB);
21364 int vlen_enc = Assembler::AVX_256bit;
21365 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21366 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21367 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21368 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21369 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21370 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21371 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21372 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21373 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21374 %}
21375 ins_pipe( pipe_slow );
21376 %}
21377
21378 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21379 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21380 match(Set dst ( LShiftVB src shift));
21381 match(Set dst (RShiftVB src shift));
21382 match(Set dst (URShiftVB src shift));
21383 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21384 format %{"vector_byte_shift $dst,$src,$shift" %}
21385 ins_encode %{
21386 assert(UseAVX > 2, "required");
21387 int opcode = this->ideal_Opcode();
21388 bool sign = (opcode != Op_URShiftVB);
21389 int vlen_enc = Assembler::AVX_512bit;
21390 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21391 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21392 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21393 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21394 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21395 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21396 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21397 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21398 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21399 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21400 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21401 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402 %}
21403 ins_pipe( pipe_slow );
21404 %}
21405
21406 // Shorts vector logical right shift produces incorrect Java result
21407 // for negative data because java code convert short value into int with
21408 // sign extension before a shift. But char vectors are fine since chars are
21409 // unsigned values.
21410 // Shorts/Chars vector left shift
21411 instruct vshiftS(vec dst, vec src, vec shift) %{
21412 predicate(!n->as_ShiftV()->is_var_shift());
21413 match(Set dst ( LShiftVS src shift));
21414 match(Set dst ( RShiftVS src shift));
21415 match(Set dst (URShiftVS src shift));
21416 effect(TEMP dst, USE src, USE shift);
21417 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21418 ins_encode %{
21419 int opcode = this->ideal_Opcode();
21420 if (UseAVX > 0) {
21421 int vlen_enc = vector_length_encoding(this);
21422 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21423 } else {
21424 int vlen = Matcher::vector_length(this);
21425 if (vlen == 2) {
21426 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21427 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21428 } else if (vlen == 4) {
21429 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21430 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431 } else {
21432 assert (vlen == 8, "sanity");
21433 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21434 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435 }
21436 }
21437 %}
21438 ins_pipe( pipe_slow );
21439 %}
21440
21441 // Integers vector left shift
21442 instruct vshiftI(vec dst, vec src, vec shift) %{
21443 predicate(!n->as_ShiftV()->is_var_shift());
21444 match(Set dst ( LShiftVI src shift));
21445 match(Set dst ( RShiftVI src shift));
21446 match(Set dst (URShiftVI src shift));
21447 effect(TEMP dst, USE src, USE shift);
21448 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21449 ins_encode %{
21450 int opcode = this->ideal_Opcode();
21451 if (UseAVX > 0) {
21452 int vlen_enc = vector_length_encoding(this);
21453 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21454 } else {
21455 int vlen = Matcher::vector_length(this);
21456 if (vlen == 2) {
21457 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21458 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21459 } else {
21460 assert(vlen == 4, "sanity");
21461 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21462 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21463 }
21464 }
21465 %}
21466 ins_pipe( pipe_slow );
21467 %}
21468
21469 // Integers vector left constant shift
21470 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21471 match(Set dst (LShiftVI src (LShiftCntV shift)));
21472 match(Set dst (RShiftVI src (RShiftCntV shift)));
21473 match(Set dst (URShiftVI src (RShiftCntV shift)));
21474 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21475 ins_encode %{
21476 int opcode = this->ideal_Opcode();
21477 if (UseAVX > 0) {
21478 int vector_len = vector_length_encoding(this);
21479 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21480 } else {
21481 int vlen = Matcher::vector_length(this);
21482 if (vlen == 2) {
21483 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21484 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21485 } else {
21486 assert(vlen == 4, "sanity");
21487 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21488 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21489 }
21490 }
21491 %}
21492 ins_pipe( pipe_slow );
21493 %}
21494
21495 // Longs vector shift
21496 instruct vshiftL(vec dst, vec src, vec shift) %{
21497 predicate(!n->as_ShiftV()->is_var_shift());
21498 match(Set dst ( LShiftVL src shift));
21499 match(Set dst (URShiftVL src shift));
21500 effect(TEMP dst, USE src, USE shift);
21501 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21502 ins_encode %{
21503 int opcode = this->ideal_Opcode();
21504 if (UseAVX > 0) {
21505 int vlen_enc = vector_length_encoding(this);
21506 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21507 } else {
21508 assert(Matcher::vector_length(this) == 2, "");
21509 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21510 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21511 }
21512 %}
21513 ins_pipe( pipe_slow );
21514 %}
21515
21516 // Longs vector constant shift
21517 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21518 match(Set dst (LShiftVL src (LShiftCntV shift)));
21519 match(Set dst (URShiftVL src (RShiftCntV shift)));
21520 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21521 ins_encode %{
21522 int opcode = this->ideal_Opcode();
21523 if (UseAVX > 0) {
21524 int vector_len = vector_length_encoding(this);
21525 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21526 } else {
21527 assert(Matcher::vector_length(this) == 2, "");
21528 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21529 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21530 }
21531 %}
21532 ins_pipe( pipe_slow );
21533 %}
21534
21535 // -------------------ArithmeticRightShift -----------------------------------
21536 // Long vector arithmetic right shift
21537 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21538 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21539 match(Set dst (RShiftVL src shift));
21540 effect(TEMP dst, TEMP tmp);
21541 format %{ "vshiftq $dst,$src,$shift" %}
21542 ins_encode %{
21543 uint vlen = Matcher::vector_length(this);
21544 if (vlen == 2) {
21545 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21546 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21547 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21548 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21549 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21550 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21551 } else {
21552 assert(vlen == 4, "sanity");
21553 assert(UseAVX > 1, "required");
21554 int vlen_enc = Assembler::AVX_256bit;
21555 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21556 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21557 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21558 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21559 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21560 }
21561 %}
21562 ins_pipe( pipe_slow );
21563 %}
21564
21565 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21566 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21567 match(Set dst (RShiftVL src shift));
21568 format %{ "vshiftq $dst,$src,$shift" %}
21569 ins_encode %{
21570 int vlen_enc = vector_length_encoding(this);
21571 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21572 %}
21573 ins_pipe( pipe_slow );
21574 %}
21575
21576 // ------------------- Variable Shift -----------------------------
21577 // Byte variable shift
21578 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21579 predicate(Matcher::vector_length(n) <= 8 &&
21580 n->as_ShiftV()->is_var_shift() &&
21581 !VM_Version::supports_avx512bw());
21582 match(Set dst ( LShiftVB src shift));
21583 match(Set dst ( RShiftVB src shift));
21584 match(Set dst (URShiftVB src shift));
21585 effect(TEMP dst, TEMP vtmp);
21586 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21587 ins_encode %{
21588 assert(UseAVX >= 2, "required");
21589
21590 int opcode = this->ideal_Opcode();
21591 int vlen_enc = Assembler::AVX_128bit;
21592 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21593 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21594 %}
21595 ins_pipe( pipe_slow );
21596 %}
21597
21598 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21599 predicate(Matcher::vector_length(n) == 16 &&
21600 n->as_ShiftV()->is_var_shift() &&
21601 !VM_Version::supports_avx512bw());
21602 match(Set dst ( LShiftVB src shift));
21603 match(Set dst ( RShiftVB src shift));
21604 match(Set dst (URShiftVB src shift));
21605 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21606 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21607 ins_encode %{
21608 assert(UseAVX >= 2, "required");
21609
21610 int opcode = this->ideal_Opcode();
21611 int vlen_enc = Assembler::AVX_128bit;
21612 // Shift lower half and get word result in dst
21613 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21614
21615 // Shift upper half and get word result in vtmp1
21616 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21617 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21618 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21619
21620 // Merge and down convert the two word results to byte in dst
21621 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21622 %}
21623 ins_pipe( pipe_slow );
21624 %}
21625
21626 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21627 predicate(Matcher::vector_length(n) == 32 &&
21628 n->as_ShiftV()->is_var_shift() &&
21629 !VM_Version::supports_avx512bw());
21630 match(Set dst ( LShiftVB src shift));
21631 match(Set dst ( RShiftVB src shift));
21632 match(Set dst (URShiftVB src shift));
21633 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21634 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21635 ins_encode %{
21636 assert(UseAVX >= 2, "required");
21637
21638 int opcode = this->ideal_Opcode();
21639 int vlen_enc = Assembler::AVX_128bit;
21640 // Process lower 128 bits and get result in dst
21641 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21642 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21643 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21644 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21645 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21646
21647 // Process higher 128 bits and get result in vtmp3
21648 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21649 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21650 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21651 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21652 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21653 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21654 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21655
21656 // Merge the two results in dst
21657 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21658 %}
21659 ins_pipe( pipe_slow );
21660 %}
21661
21662 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21663 predicate(Matcher::vector_length(n) <= 32 &&
21664 n->as_ShiftV()->is_var_shift() &&
21665 VM_Version::supports_avx512bw());
21666 match(Set dst ( LShiftVB src shift));
21667 match(Set dst ( RShiftVB src shift));
21668 match(Set dst (URShiftVB src shift));
21669 effect(TEMP dst, TEMP vtmp);
21670 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21671 ins_encode %{
21672 assert(UseAVX > 2, "required");
21673
21674 int opcode = this->ideal_Opcode();
21675 int vlen_enc = vector_length_encoding(this);
21676 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21677 %}
21678 ins_pipe( pipe_slow );
21679 %}
21680
21681 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21682 predicate(Matcher::vector_length(n) == 64 &&
21683 n->as_ShiftV()->is_var_shift() &&
21684 VM_Version::supports_avx512bw());
21685 match(Set dst ( LShiftVB src shift));
21686 match(Set dst ( RShiftVB src shift));
21687 match(Set dst (URShiftVB src shift));
21688 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21689 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21690 ins_encode %{
21691 assert(UseAVX > 2, "required");
21692
21693 int opcode = this->ideal_Opcode();
21694 int vlen_enc = Assembler::AVX_256bit;
21695 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21696 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21697 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21698 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21699 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21700 %}
21701 ins_pipe( pipe_slow );
21702 %}
21703
21704 // Short variable shift
21705 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21706 predicate(Matcher::vector_length(n) <= 8 &&
21707 n->as_ShiftV()->is_var_shift() &&
21708 !VM_Version::supports_avx512bw());
21709 match(Set dst ( LShiftVS src shift));
21710 match(Set dst ( RShiftVS src shift));
21711 match(Set dst (URShiftVS src shift));
21712 effect(TEMP dst, TEMP vtmp);
21713 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21714 ins_encode %{
21715 assert(UseAVX >= 2, "required");
21716
21717 int opcode = this->ideal_Opcode();
21718 bool sign = (opcode != Op_URShiftVS);
21719 int vlen_enc = Assembler::AVX_256bit;
21720 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21721 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21722 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21723 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21724 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21725 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21726 %}
21727 ins_pipe( pipe_slow );
21728 %}
21729
21730 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21731 predicate(Matcher::vector_length(n) == 16 &&
21732 n->as_ShiftV()->is_var_shift() &&
21733 !VM_Version::supports_avx512bw());
21734 match(Set dst ( LShiftVS src shift));
21735 match(Set dst ( RShiftVS src shift));
21736 match(Set dst (URShiftVS src shift));
21737 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21738 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21739 ins_encode %{
21740 assert(UseAVX >= 2, "required");
21741
21742 int opcode = this->ideal_Opcode();
21743 bool sign = (opcode != Op_URShiftVS);
21744 int vlen_enc = Assembler::AVX_256bit;
21745 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21746 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21747 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21748 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21749 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21750
21751 // Shift upper half, with result in dst using vtmp1 as TEMP
21752 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21753 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21754 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21755 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21756 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21757 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21758
21759 // Merge lower and upper half result into dst
21760 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21762 %}
21763 ins_pipe( pipe_slow );
21764 %}
21765
21766 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21767 predicate(n->as_ShiftV()->is_var_shift() &&
21768 VM_Version::supports_avx512bw());
21769 match(Set dst ( LShiftVS src shift));
21770 match(Set dst ( RShiftVS src shift));
21771 match(Set dst (URShiftVS src shift));
21772 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21773 ins_encode %{
21774 assert(UseAVX > 2, "required");
21775
21776 int opcode = this->ideal_Opcode();
21777 int vlen_enc = vector_length_encoding(this);
21778 if (!VM_Version::supports_avx512vl()) {
21779 vlen_enc = Assembler::AVX_512bit;
21780 }
21781 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21782 %}
21783 ins_pipe( pipe_slow );
21784 %}
21785
21786 //Integer variable shift
21787 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21788 predicate(n->as_ShiftV()->is_var_shift());
21789 match(Set dst ( LShiftVI src shift));
21790 match(Set dst ( RShiftVI src shift));
21791 match(Set dst (URShiftVI src shift));
21792 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21793 ins_encode %{
21794 assert(UseAVX >= 2, "required");
21795
21796 int opcode = this->ideal_Opcode();
21797 int vlen_enc = vector_length_encoding(this);
21798 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21799 %}
21800 ins_pipe( pipe_slow );
21801 %}
21802
21803 //Long variable shift
21804 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21805 predicate(n->as_ShiftV()->is_var_shift());
21806 match(Set dst ( LShiftVL src shift));
21807 match(Set dst (URShiftVL src shift));
21808 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21809 ins_encode %{
21810 assert(UseAVX >= 2, "required");
21811
21812 int opcode = this->ideal_Opcode();
21813 int vlen_enc = vector_length_encoding(this);
21814 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21815 %}
21816 ins_pipe( pipe_slow );
21817 %}
21818
21819 //Long variable right shift arithmetic
21820 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21821 predicate(Matcher::vector_length(n) <= 4 &&
21822 n->as_ShiftV()->is_var_shift() &&
21823 UseAVX == 2);
21824 match(Set dst (RShiftVL src shift));
21825 effect(TEMP dst, TEMP vtmp);
21826 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21827 ins_encode %{
21828 int opcode = this->ideal_Opcode();
21829 int vlen_enc = vector_length_encoding(this);
21830 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21831 $vtmp$$XMMRegister);
21832 %}
21833 ins_pipe( pipe_slow );
21834 %}
21835
21836 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21837 predicate(n->as_ShiftV()->is_var_shift() &&
21838 UseAVX > 2);
21839 match(Set dst (RShiftVL src shift));
21840 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21841 ins_encode %{
21842 int opcode = this->ideal_Opcode();
21843 int vlen_enc = vector_length_encoding(this);
21844 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21845 %}
21846 ins_pipe( pipe_slow );
21847 %}
21848
21849 // --------------------------------- AND --------------------------------------
21850
21851 instruct vand(vec dst, vec src) %{
21852 predicate(UseAVX == 0);
21853 match(Set dst (AndV dst src));
21854 format %{ "pand $dst,$src\t! and vectors" %}
21855 ins_encode %{
21856 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21857 %}
21858 ins_pipe( pipe_slow );
21859 %}
21860
21861 instruct vand_reg(vec dst, vec src1, vec src2) %{
21862 predicate(UseAVX > 0);
21863 match(Set dst (AndV src1 src2));
21864 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21865 ins_encode %{
21866 int vlen_enc = vector_length_encoding(this);
21867 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21868 %}
21869 ins_pipe( pipe_slow );
21870 %}
21871
21872 instruct vand_mem(vec dst, vec src, memory mem) %{
21873 predicate((UseAVX > 0) &&
21874 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21875 match(Set dst (AndV src (LoadVector mem)));
21876 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21877 ins_encode %{
21878 int vlen_enc = vector_length_encoding(this);
21879 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21880 %}
21881 ins_pipe( pipe_slow );
21882 %}
21883
21884 // --------------------------------- OR ---------------------------------------
21885
21886 instruct vor(vec dst, vec src) %{
21887 predicate(UseAVX == 0);
21888 match(Set dst (OrV dst src));
21889 format %{ "por $dst,$src\t! or vectors" %}
21890 ins_encode %{
21891 __ por($dst$$XMMRegister, $src$$XMMRegister);
21892 %}
21893 ins_pipe( pipe_slow );
21894 %}
21895
21896 instruct vor_reg(vec dst, vec src1, vec src2) %{
21897 predicate(UseAVX > 0);
21898 match(Set dst (OrV src1 src2));
21899 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21900 ins_encode %{
21901 int vlen_enc = vector_length_encoding(this);
21902 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21903 %}
21904 ins_pipe( pipe_slow );
21905 %}
21906
21907 instruct vor_mem(vec dst, vec src, memory mem) %{
21908 predicate((UseAVX > 0) &&
21909 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21910 match(Set dst (OrV src (LoadVector mem)));
21911 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21912 ins_encode %{
21913 int vlen_enc = vector_length_encoding(this);
21914 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21915 %}
21916 ins_pipe( pipe_slow );
21917 %}
21918
21919 // --------------------------------- XOR --------------------------------------
21920
21921 instruct vxor(vec dst, vec src) %{
21922 predicate(UseAVX == 0);
21923 match(Set dst (XorV dst src));
21924 format %{ "pxor $dst,$src\t! xor vectors" %}
21925 ins_encode %{
21926 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21927 %}
21928 ins_pipe( pipe_slow );
21929 %}
21930
21931 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21932 predicate(UseAVX > 0);
21933 match(Set dst (XorV src1 src2));
21934 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21935 ins_encode %{
21936 int vlen_enc = vector_length_encoding(this);
21937 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21938 %}
21939 ins_pipe( pipe_slow );
21940 %}
21941
21942 instruct vxor_mem(vec dst, vec src, memory mem) %{
21943 predicate((UseAVX > 0) &&
21944 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21945 match(Set dst (XorV src (LoadVector mem)));
21946 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21947 ins_encode %{
21948 int vlen_enc = vector_length_encoding(this);
21949 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21950 %}
21951 ins_pipe( pipe_slow );
21952 %}
21953
21954 // --------------------------------- VectorCast --------------------------------------
21955
21956 instruct vcastBtoX(vec dst, vec src) %{
21957 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21958 match(Set dst (VectorCastB2X src));
21959 format %{ "vector_cast_b2x $dst,$src\t!" %}
21960 ins_encode %{
21961 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21962 int vlen_enc = vector_length_encoding(this);
21963 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21964 %}
21965 ins_pipe( pipe_slow );
21966 %}
21967
21968 instruct vcastBtoD(legVec dst, legVec src) %{
21969 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21970 match(Set dst (VectorCastB2X src));
21971 format %{ "vector_cast_b2x $dst,$src\t!" %}
21972 ins_encode %{
21973 int vlen_enc = vector_length_encoding(this);
21974 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21975 %}
21976 ins_pipe( pipe_slow );
21977 %}
21978
21979 instruct castStoX(vec dst, vec src) %{
21980 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21981 Matcher::vector_length(n->in(1)) <= 8 && // src
21982 Matcher::vector_element_basic_type(n) == T_BYTE);
21983 match(Set dst (VectorCastS2X src));
21984 format %{ "vector_cast_s2x $dst,$src" %}
21985 ins_encode %{
21986 assert(UseAVX > 0, "required");
21987
21988 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21989 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21990 %}
21991 ins_pipe( pipe_slow );
21992 %}
21993
21994 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21995 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21996 Matcher::vector_length(n->in(1)) == 16 && // src
21997 Matcher::vector_element_basic_type(n) == T_BYTE);
21998 effect(TEMP dst, TEMP vtmp);
21999 match(Set dst (VectorCastS2X src));
22000 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22001 ins_encode %{
22002 assert(UseAVX > 0, "required");
22003
22004 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22005 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22006 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22007 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22008 %}
22009 ins_pipe( pipe_slow );
22010 %}
22011
22012 instruct vcastStoX_evex(vec dst, vec src) %{
22013 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22014 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22015 match(Set dst (VectorCastS2X src));
22016 format %{ "vector_cast_s2x $dst,$src\t!" %}
22017 ins_encode %{
22018 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22019 int src_vlen_enc = vector_length_encoding(this, $src);
22020 int vlen_enc = vector_length_encoding(this);
22021 switch (to_elem_bt) {
22022 case T_BYTE:
22023 if (!VM_Version::supports_avx512vl()) {
22024 vlen_enc = Assembler::AVX_512bit;
22025 }
22026 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22027 break;
22028 case T_INT:
22029 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030 break;
22031 case T_FLOAT:
22032 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22034 break;
22035 case T_LONG:
22036 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037 break;
22038 case T_DOUBLE: {
22039 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22040 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22041 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22042 break;
22043 }
22044 default:
22045 ShouldNotReachHere();
22046 }
22047 %}
22048 ins_pipe( pipe_slow );
22049 %}
22050
22051 instruct castItoX(vec dst, vec src) %{
22052 predicate(UseAVX <= 2 &&
22053 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22054 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22055 match(Set dst (VectorCastI2X src));
22056 format %{ "vector_cast_i2x $dst,$src" %}
22057 ins_encode %{
22058 assert(UseAVX > 0, "required");
22059
22060 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22061 int vlen_enc = vector_length_encoding(this, $src);
22062
22063 if (to_elem_bt == T_BYTE) {
22064 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22065 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22066 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22067 } else {
22068 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22069 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22070 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071 }
22072 %}
22073 ins_pipe( pipe_slow );
22074 %}
22075
22076 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22077 predicate(UseAVX <= 2 &&
22078 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22079 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22080 match(Set dst (VectorCastI2X src));
22081 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22082 effect(TEMP dst, TEMP vtmp);
22083 ins_encode %{
22084 assert(UseAVX > 0, "required");
22085
22086 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22087 int vlen_enc = vector_length_encoding(this, $src);
22088
22089 if (to_elem_bt == T_BYTE) {
22090 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22091 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22092 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22093 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22094 } else {
22095 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22096 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22097 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22098 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22099 }
22100 %}
22101 ins_pipe( pipe_slow );
22102 %}
22103
22104 instruct vcastItoX_evex(vec dst, vec src) %{
22105 predicate(UseAVX > 2 ||
22106 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22107 match(Set dst (VectorCastI2X src));
22108 format %{ "vector_cast_i2x $dst,$src\t!" %}
22109 ins_encode %{
22110 assert(UseAVX > 0, "required");
22111
22112 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22113 int src_vlen_enc = vector_length_encoding(this, $src);
22114 int dst_vlen_enc = vector_length_encoding(this);
22115 switch (dst_elem_bt) {
22116 case T_BYTE:
22117 if (!VM_Version::supports_avx512vl()) {
22118 src_vlen_enc = Assembler::AVX_512bit;
22119 }
22120 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22121 break;
22122 case T_SHORT:
22123 if (!VM_Version::supports_avx512vl()) {
22124 src_vlen_enc = Assembler::AVX_512bit;
22125 }
22126 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22127 break;
22128 case T_FLOAT:
22129 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22130 break;
22131 case T_LONG:
22132 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133 break;
22134 case T_DOUBLE:
22135 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136 break;
22137 default:
22138 ShouldNotReachHere();
22139 }
22140 %}
22141 ins_pipe( pipe_slow );
22142 %}
22143
22144 instruct vcastLtoBS(vec dst, vec src) %{
22145 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22146 UseAVX <= 2);
22147 match(Set dst (VectorCastL2X src));
22148 format %{ "vector_cast_l2x $dst,$src" %}
22149 ins_encode %{
22150 assert(UseAVX > 0, "required");
22151
22152 int vlen = Matcher::vector_length_in_bytes(this, $src);
22153 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22154 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22155 : ExternalAddress(vector_int_to_short_mask());
22156 if (vlen <= 16) {
22157 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22158 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22159 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22160 } else {
22161 assert(vlen <= 32, "required");
22162 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22163 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22164 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22165 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166 }
22167 if (to_elem_bt == T_BYTE) {
22168 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22169 }
22170 %}
22171 ins_pipe( pipe_slow );
22172 %}
22173
22174 instruct vcastLtoX_evex(vec dst, vec src) %{
22175 predicate(UseAVX > 2 ||
22176 (Matcher::vector_element_basic_type(n) == T_INT ||
22177 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22178 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22179 match(Set dst (VectorCastL2X src));
22180 format %{ "vector_cast_l2x $dst,$src\t!" %}
22181 ins_encode %{
22182 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22183 int vlen = Matcher::vector_length_in_bytes(this, $src);
22184 int vlen_enc = vector_length_encoding(this, $src);
22185 switch (to_elem_bt) {
22186 case T_BYTE:
22187 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22188 vlen_enc = Assembler::AVX_512bit;
22189 }
22190 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22191 break;
22192 case T_SHORT:
22193 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22194 vlen_enc = Assembler::AVX_512bit;
22195 }
22196 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22197 break;
22198 case T_INT:
22199 if (vlen == 8) {
22200 if ($dst$$XMMRegister != $src$$XMMRegister) {
22201 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22202 }
22203 } else if (vlen == 16) {
22204 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22205 } else if (vlen == 32) {
22206 if (UseAVX > 2) {
22207 if (!VM_Version::supports_avx512vl()) {
22208 vlen_enc = Assembler::AVX_512bit;
22209 }
22210 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22211 } else {
22212 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22213 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22214 }
22215 } else { // vlen == 64
22216 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22217 }
22218 break;
22219 case T_FLOAT:
22220 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22221 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22222 break;
22223 case T_DOUBLE:
22224 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22225 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22226 break;
22227
22228 default: assert(false, "%s", type2name(to_elem_bt));
22229 }
22230 %}
22231 ins_pipe( pipe_slow );
22232 %}
22233
22234 instruct vcastFtoD_reg(vec dst, vec src) %{
22235 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22236 match(Set dst (VectorCastF2X src));
22237 format %{ "vector_cast_f2d $dst,$src\t!" %}
22238 ins_encode %{
22239 int vlen_enc = vector_length_encoding(this);
22240 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22241 %}
22242 ins_pipe( pipe_slow );
22243 %}
22244
22245
22246 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22247 predicate(!VM_Version::supports_avx10_2() &&
22248 !VM_Version::supports_avx512vl() &&
22249 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22250 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22251 is_integral_type(Matcher::vector_element_basic_type(n)));
22252 match(Set dst (VectorCastF2X src));
22253 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22254 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22255 ins_encode %{
22256 int vlen_enc = vector_length_encoding(this, $src);
22257 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22258 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22259 // 32 bit addresses for register indirect addressing mode since stub constants
22260 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22261 // However, targets are free to increase this limit, but having a large code cache size
22262 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22263 // cap we save a temporary register allocation which in limiting case can prevent
22264 // spilling in high register pressure blocks.
22265 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22266 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22267 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22268 %}
22269 ins_pipe( pipe_slow );
22270 %}
22271
22272 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22273 predicate(!VM_Version::supports_avx10_2() &&
22274 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22275 is_integral_type(Matcher::vector_element_basic_type(n)));
22276 match(Set dst (VectorCastF2X src));
22277 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22278 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22279 ins_encode %{
22280 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22281 if (to_elem_bt == T_LONG) {
22282 int vlen_enc = vector_length_encoding(this);
22283 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22284 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22285 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22286 } else {
22287 int vlen_enc = vector_length_encoding(this, $src);
22288 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22289 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22290 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22291 }
22292 %}
22293 ins_pipe( pipe_slow );
22294 %}
22295
22296 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22297 predicate(VM_Version::supports_avx10_2() &&
22298 is_integral_type(Matcher::vector_element_basic_type(n)));
22299 match(Set dst (VectorCastF2X src));
22300 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22301 ins_encode %{
22302 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22303 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22304 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22305 %}
22306 ins_pipe( pipe_slow );
22307 %}
22308
22309 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22310 predicate(VM_Version::supports_avx10_2() &&
22311 is_integral_type(Matcher::vector_element_basic_type(n)));
22312 match(Set dst (VectorCastF2X (LoadVector src)));
22313 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22314 ins_encode %{
22315 int vlen = Matcher::vector_length(this);
22316 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22317 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22318 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22319 %}
22320 ins_pipe( pipe_slow );
22321 %}
22322
22323 instruct vcastDtoF_reg(vec dst, vec src) %{
22324 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22325 match(Set dst (VectorCastD2X src));
22326 format %{ "vector_cast_d2x $dst,$src\t!" %}
22327 ins_encode %{
22328 int vlen_enc = vector_length_encoding(this, $src);
22329 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22330 %}
22331 ins_pipe( pipe_slow );
22332 %}
22333
22334 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22335 predicate(!VM_Version::supports_avx10_2() &&
22336 !VM_Version::supports_avx512vl() &&
22337 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22338 is_integral_type(Matcher::vector_element_basic_type(n)));
22339 match(Set dst (VectorCastD2X src));
22340 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22341 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22342 ins_encode %{
22343 int vlen_enc = vector_length_encoding(this, $src);
22344 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22345 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22346 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22347 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22348 %}
22349 ins_pipe( pipe_slow );
22350 %}
22351
22352 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22353 predicate(!VM_Version::supports_avx10_2() &&
22354 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22355 is_integral_type(Matcher::vector_element_basic_type(n)));
22356 match(Set dst (VectorCastD2X src));
22357 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22358 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22359 ins_encode %{
22360 int vlen_enc = vector_length_encoding(this, $src);
22361 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22362 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22363 ExternalAddress(vector_float_signflip());
22364 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22365 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22366 %}
22367 ins_pipe( pipe_slow );
22368 %}
22369
22370 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22371 predicate(VM_Version::supports_avx10_2() &&
22372 is_integral_type(Matcher::vector_element_basic_type(n)));
22373 match(Set dst (VectorCastD2X src));
22374 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22375 ins_encode %{
22376 int vlen_enc = vector_length_encoding(this, $src);
22377 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22378 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22379 %}
22380 ins_pipe( pipe_slow );
22381 %}
22382
22383 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22384 predicate(VM_Version::supports_avx10_2() &&
22385 is_integral_type(Matcher::vector_element_basic_type(n)));
22386 match(Set dst (VectorCastD2X (LoadVector src)));
22387 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22388 ins_encode %{
22389 int vlen = Matcher::vector_length(this);
22390 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22391 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22392 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22393 %}
22394 ins_pipe( pipe_slow );
22395 %}
22396
22397 instruct vucast(vec dst, vec src) %{
22398 match(Set dst (VectorUCastB2X src));
22399 match(Set dst (VectorUCastS2X src));
22400 match(Set dst (VectorUCastI2X src));
22401 format %{ "vector_ucast $dst,$src\t!" %}
22402 ins_encode %{
22403 assert(UseAVX > 0, "required");
22404
22405 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22406 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22407 int vlen_enc = vector_length_encoding(this);
22408 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22409 %}
22410 ins_pipe( pipe_slow );
22411 %}
22412
22413 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22414 predicate(!VM_Version::supports_avx512vl() &&
22415 Matcher::vector_length_in_bytes(n) < 64 &&
22416 Matcher::vector_element_basic_type(n) == T_INT);
22417 match(Set dst (RoundVF src));
22418 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22419 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22420 ins_encode %{
22421 int vlen_enc = vector_length_encoding(this);
22422 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22423 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22424 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22425 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22426 %}
22427 ins_pipe( pipe_slow );
22428 %}
22429
22430 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22431 predicate((VM_Version::supports_avx512vl() ||
22432 Matcher::vector_length_in_bytes(n) == 64) &&
22433 Matcher::vector_element_basic_type(n) == T_INT);
22434 match(Set dst (RoundVF src));
22435 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22436 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22437 ins_encode %{
22438 int vlen_enc = vector_length_encoding(this);
22439 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22440 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22441 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22442 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22443 %}
22444 ins_pipe( pipe_slow );
22445 %}
22446
22447 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22448 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22449 match(Set dst (RoundVD src));
22450 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22451 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22452 ins_encode %{
22453 int vlen_enc = vector_length_encoding(this);
22454 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22455 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22456 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22457 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22458 %}
22459 ins_pipe( pipe_slow );
22460 %}
22461
22462 // --------------------------------- VectorMaskCmp --------------------------------------
22463
22464 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22465 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22466 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22467 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22468 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22469 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22470 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22471 ins_encode %{
22472 int vlen_enc = vector_length_encoding(this, $src1);
22473 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22474 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22475 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22476 } else {
22477 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22478 }
22479 %}
22480 ins_pipe( pipe_slow );
22481 %}
22482
22483 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22484 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22485 n->bottom_type()->isa_vectmask() == nullptr &&
22486 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22487 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22488 effect(TEMP ktmp);
22489 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22490 ins_encode %{
22491 int vlen_enc = Assembler::AVX_512bit;
22492 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22493 KRegister mask = k0; // The comparison itself is not being masked.
22494 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22495 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22496 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22497 } else {
22498 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22499 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22500 }
22501 %}
22502 ins_pipe( pipe_slow );
22503 %}
22504
22505 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22506 predicate(n->bottom_type()->isa_vectmask() &&
22507 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22508 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22509 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22510 ins_encode %{
22511 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22512 int vlen_enc = vector_length_encoding(this, $src1);
22513 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22514 KRegister mask = k0; // The comparison itself is not being masked.
22515 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22516 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22517 } else {
22518 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22519 }
22520 %}
22521 ins_pipe( pipe_slow );
22522 %}
22523
22524 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22525 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22526 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22527 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22528 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22529 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22530 (n->in(2)->get_int() == BoolTest::eq ||
22531 n->in(2)->get_int() == BoolTest::lt ||
22532 n->in(2)->get_int() == BoolTest::gt)); // cond
22533 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22534 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22535 ins_encode %{
22536 int vlen_enc = vector_length_encoding(this, $src1);
22537 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22538 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22539 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22540 %}
22541 ins_pipe( pipe_slow );
22542 %}
22543
22544 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22545 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22546 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22547 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22548 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22549 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22550 (n->in(2)->get_int() == BoolTest::ne ||
22551 n->in(2)->get_int() == BoolTest::le ||
22552 n->in(2)->get_int() == BoolTest::ge)); // cond
22553 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22554 effect(TEMP dst, TEMP xtmp);
22555 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22556 ins_encode %{
22557 int vlen_enc = vector_length_encoding(this, $src1);
22558 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22559 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22560 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22561 %}
22562 ins_pipe( pipe_slow );
22563 %}
22564
22565 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22566 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22567 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22568 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22569 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22570 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22571 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22572 effect(TEMP dst, TEMP xtmp);
22573 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22574 ins_encode %{
22575 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22576 int vlen_enc = vector_length_encoding(this, $src1);
22577 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22578 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22579
22580 if (vlen_enc == Assembler::AVX_128bit) {
22581 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22582 } else {
22583 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22584 }
22585 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22586 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22587 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22588 %}
22589 ins_pipe( pipe_slow );
22590 %}
22591
22592 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22593 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22594 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22595 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22596 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22597 effect(TEMP ktmp);
22598 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22599 ins_encode %{
22600 assert(UseAVX > 2, "required");
22601
22602 int vlen_enc = vector_length_encoding(this, $src1);
22603 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22604 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22605 KRegister mask = k0; // The comparison itself is not being masked.
22606 bool merge = false;
22607 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22608
22609 switch (src1_elem_bt) {
22610 case T_INT: {
22611 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22612 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22613 break;
22614 }
22615 case T_LONG: {
22616 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22617 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22618 break;
22619 }
22620 default: assert(false, "%s", type2name(src1_elem_bt));
22621 }
22622 %}
22623 ins_pipe( pipe_slow );
22624 %}
22625
22626
22627 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22628 predicate(n->bottom_type()->isa_vectmask() &&
22629 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22630 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22631 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22632 ins_encode %{
22633 assert(UseAVX > 2, "required");
22634 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22635
22636 int vlen_enc = vector_length_encoding(this, $src1);
22637 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22638 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22639 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22640
22641 // Comparison i
22642 switch (src1_elem_bt) {
22643 case T_BYTE: {
22644 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22645 break;
22646 }
22647 case T_SHORT: {
22648 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649 break;
22650 }
22651 case T_INT: {
22652 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653 break;
22654 }
22655 case T_LONG: {
22656 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22657 break;
22658 }
22659 default: assert(false, "%s", type2name(src1_elem_bt));
22660 }
22661 %}
22662 ins_pipe( pipe_slow );
22663 %}
22664
22665 // Extract
22666
22667 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22668 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22669 match(Set dst (ExtractI src idx));
22670 match(Set dst (ExtractS src idx));
22671 match(Set dst (ExtractB src idx));
22672 format %{ "extractI $dst,$src,$idx\t!" %}
22673 ins_encode %{
22674 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22675
22676 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22677 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22678 %}
22679 ins_pipe( pipe_slow );
22680 %}
22681
22682 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22683 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22684 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22685 match(Set dst (ExtractI src idx));
22686 match(Set dst (ExtractS src idx));
22687 match(Set dst (ExtractB src idx));
22688 effect(TEMP vtmp);
22689 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22690 ins_encode %{
22691 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22692
22693 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22694 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22695 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22696 %}
22697 ins_pipe( pipe_slow );
22698 %}
22699
22700 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22701 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22702 match(Set dst (ExtractL src idx));
22703 format %{ "extractL $dst,$src,$idx\t!" %}
22704 ins_encode %{
22705 assert(UseSSE >= 4, "required");
22706 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22707
22708 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22709 %}
22710 ins_pipe( pipe_slow );
22711 %}
22712
22713 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22714 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22715 Matcher::vector_length(n->in(1)) == 8); // src
22716 match(Set dst (ExtractL src idx));
22717 effect(TEMP vtmp);
22718 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22719 ins_encode %{
22720 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22721
22722 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22723 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22724 %}
22725 ins_pipe( pipe_slow );
22726 %}
22727
22728 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22729 predicate(Matcher::vector_length(n->in(1)) <= 4);
22730 match(Set dst (ExtractF src idx));
22731 effect(TEMP dst, TEMP vtmp);
22732 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22733 ins_encode %{
22734 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22735
22736 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22737 %}
22738 ins_pipe( pipe_slow );
22739 %}
22740
22741 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22742 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22743 Matcher::vector_length(n->in(1)/*src*/) == 16);
22744 match(Set dst (ExtractF src idx));
22745 effect(TEMP vtmp);
22746 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22747 ins_encode %{
22748 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22749
22750 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22751 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22752 %}
22753 ins_pipe( pipe_slow );
22754 %}
22755
22756 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22757 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22758 match(Set dst (ExtractD src idx));
22759 format %{ "extractD $dst,$src,$idx\t!" %}
22760 ins_encode %{
22761 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22762
22763 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22764 %}
22765 ins_pipe( pipe_slow );
22766 %}
22767
22768 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22769 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22770 Matcher::vector_length(n->in(1)) == 8); // src
22771 match(Set dst (ExtractD src idx));
22772 effect(TEMP vtmp);
22773 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22774 ins_encode %{
22775 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22776
22777 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22778 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22779 %}
22780 ins_pipe( pipe_slow );
22781 %}
22782
22783 // --------------------------------- Vector Blend --------------------------------------
22784
22785 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22786 predicate(UseAVX == 0);
22787 match(Set dst (VectorBlend (Binary dst src) mask));
22788 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22789 effect(TEMP tmp);
22790 ins_encode %{
22791 assert(UseSSE >= 4, "required");
22792
22793 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22794 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22795 }
22796 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22797 %}
22798 ins_pipe( pipe_slow );
22799 %}
22800
22801 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22802 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22803 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22804 Matcher::vector_length_in_bytes(n) <= 32 &&
22805 is_integral_type(Matcher::vector_element_basic_type(n)));
22806 match(Set dst (VectorBlend (Binary src1 src2) mask));
22807 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22808 ins_encode %{
22809 int vlen_enc = vector_length_encoding(this);
22810 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22811 %}
22812 ins_pipe( pipe_slow );
22813 %}
22814
22815 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22816 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22817 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22818 Matcher::vector_length_in_bytes(n) <= 32 &&
22819 !is_integral_type(Matcher::vector_element_basic_type(n)));
22820 match(Set dst (VectorBlend (Binary src1 src2) mask));
22821 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22822 ins_encode %{
22823 int vlen_enc = vector_length_encoding(this);
22824 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22825 %}
22826 ins_pipe( pipe_slow );
22827 %}
22828
22829 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22830 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22831 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22832 Matcher::vector_length_in_bytes(n) <= 32);
22833 match(Set dst (VectorBlend (Binary src1 src2) mask));
22834 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22835 effect(TEMP vtmp, TEMP dst);
22836 ins_encode %{
22837 int vlen_enc = vector_length_encoding(this);
22838 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22839 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22840 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22841 %}
22842 ins_pipe( pipe_slow );
22843 %}
22844
22845 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22846 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22847 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22848 match(Set dst (VectorBlend (Binary src1 src2) mask));
22849 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22850 effect(TEMP ktmp);
22851 ins_encode %{
22852 int vlen_enc = Assembler::AVX_512bit;
22853 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22854 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22855 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22856 %}
22857 ins_pipe( pipe_slow );
22858 %}
22859
22860
22861 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22862 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22863 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22864 VM_Version::supports_avx512bw()));
22865 match(Set dst (VectorBlend (Binary src1 src2) mask));
22866 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22867 ins_encode %{
22868 int vlen_enc = vector_length_encoding(this);
22869 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22870 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22871 %}
22872 ins_pipe( pipe_slow );
22873 %}
22874
22875 // --------------------------------- ABS --------------------------------------
22876 // a = |a|
22877 instruct vabsB_reg(vec dst, vec src) %{
22878 match(Set dst (AbsVB src));
22879 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22880 ins_encode %{
22881 uint vlen = Matcher::vector_length(this);
22882 if (vlen <= 16) {
22883 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22884 } else {
22885 int vlen_enc = vector_length_encoding(this);
22886 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22887 }
22888 %}
22889 ins_pipe( pipe_slow );
22890 %}
22891
22892 instruct vabsS_reg(vec dst, vec src) %{
22893 match(Set dst (AbsVS src));
22894 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22895 ins_encode %{
22896 uint vlen = Matcher::vector_length(this);
22897 if (vlen <= 8) {
22898 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22899 } else {
22900 int vlen_enc = vector_length_encoding(this);
22901 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22902 }
22903 %}
22904 ins_pipe( pipe_slow );
22905 %}
22906
22907 instruct vabsI_reg(vec dst, vec src) %{
22908 match(Set dst (AbsVI src));
22909 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22910 ins_encode %{
22911 uint vlen = Matcher::vector_length(this);
22912 if (vlen <= 4) {
22913 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22914 } else {
22915 int vlen_enc = vector_length_encoding(this);
22916 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22917 }
22918 %}
22919 ins_pipe( pipe_slow );
22920 %}
22921
22922 instruct vabsL_reg(vec dst, vec src) %{
22923 match(Set dst (AbsVL src));
22924 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22925 ins_encode %{
22926 assert(UseAVX > 2, "required");
22927 int vlen_enc = vector_length_encoding(this);
22928 if (!VM_Version::supports_avx512vl()) {
22929 vlen_enc = Assembler::AVX_512bit;
22930 }
22931 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22932 %}
22933 ins_pipe( pipe_slow );
22934 %}
22935
22936 // --------------------------------- ABSNEG --------------------------------------
22937
22938 instruct vabsnegF(vec dst, vec src) %{
22939 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22940 match(Set dst (AbsVF src));
22941 match(Set dst (NegVF src));
22942 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22943 ins_cost(150);
22944 ins_encode %{
22945 int opcode = this->ideal_Opcode();
22946 int vlen = Matcher::vector_length(this);
22947 if (vlen == 2) {
22948 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22949 } else {
22950 assert(vlen == 8 || vlen == 16, "required");
22951 int vlen_enc = vector_length_encoding(this);
22952 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22953 }
22954 %}
22955 ins_pipe( pipe_slow );
22956 %}
22957
22958 instruct vabsneg4F(vec dst) %{
22959 predicate(Matcher::vector_length(n) == 4);
22960 match(Set dst (AbsVF dst));
22961 match(Set dst (NegVF dst));
22962 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22963 ins_cost(150);
22964 ins_encode %{
22965 int opcode = this->ideal_Opcode();
22966 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22967 %}
22968 ins_pipe( pipe_slow );
22969 %}
22970
22971 instruct vabsnegD(vec dst, vec src) %{
22972 match(Set dst (AbsVD src));
22973 match(Set dst (NegVD src));
22974 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22975 ins_encode %{
22976 int opcode = this->ideal_Opcode();
22977 uint vlen = Matcher::vector_length(this);
22978 if (vlen == 2) {
22979 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22980 } else {
22981 int vlen_enc = vector_length_encoding(this);
22982 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22983 }
22984 %}
22985 ins_pipe( pipe_slow );
22986 %}
22987
22988 //------------------------------------- VectorTest --------------------------------------------
22989
22990 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22991 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22992 match(Set cr (VectorTest src1 src2));
22993 effect(TEMP vtmp);
22994 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22995 ins_encode %{
22996 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22997 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22998 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22999 %}
23000 ins_pipe( pipe_slow );
23001 %}
23002
23003 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23004 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23005 match(Set cr (VectorTest src1 src2));
23006 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23007 ins_encode %{
23008 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23009 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23010 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23011 %}
23012 ins_pipe( pipe_slow );
23013 %}
23014
23015 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23016 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23017 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23018 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23019 match(Set cr (VectorTest src1 src2));
23020 effect(TEMP tmp);
23021 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23022 ins_encode %{
23023 uint masklen = Matcher::vector_length(this, $src1);
23024 __ kmovwl($tmp$$Register, $src1$$KRegister);
23025 __ andl($tmp$$Register, (1 << masklen) - 1);
23026 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23027 %}
23028 ins_pipe( pipe_slow );
23029 %}
23030
23031 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23032 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23033 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23034 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23035 match(Set cr (VectorTest src1 src2));
23036 effect(TEMP tmp);
23037 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23038 ins_encode %{
23039 uint masklen = Matcher::vector_length(this, $src1);
23040 __ kmovwl($tmp$$Register, $src1$$KRegister);
23041 __ andl($tmp$$Register, (1 << masklen) - 1);
23042 %}
23043 ins_pipe( pipe_slow );
23044 %}
23045
23046 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23047 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23048 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23049 match(Set cr (VectorTest src1 src2));
23050 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23051 ins_encode %{
23052 uint masklen = Matcher::vector_length(this, $src1);
23053 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23054 %}
23055 ins_pipe( pipe_slow );
23056 %}
23057
23058 //------------------------------------- LoadMask --------------------------------------------
23059
23060 instruct loadMask(legVec dst, legVec src) %{
23061 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23062 match(Set dst (VectorLoadMask src));
23063 effect(TEMP dst);
23064 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23065 ins_encode %{
23066 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23067 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23068 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23069 %}
23070 ins_pipe( pipe_slow );
23071 %}
23072
23073 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23074 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23075 match(Set dst (VectorLoadMask src));
23076 effect(TEMP xtmp);
23077 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23078 ins_encode %{
23079 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23080 true, Assembler::AVX_512bit);
23081 %}
23082 ins_pipe( pipe_slow );
23083 %}
23084
23085 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23086 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23087 match(Set dst (VectorLoadMask src));
23088 effect(TEMP xtmp);
23089 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23090 ins_encode %{
23091 int vlen_enc = vector_length_encoding(in(1));
23092 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23093 false, vlen_enc);
23094 %}
23095 ins_pipe( pipe_slow );
23096 %}
23097
23098 //------------------------------------- StoreMask --------------------------------------------
23099
23100 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23101 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23102 match(Set dst (VectorStoreMask src size));
23103 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23104 ins_encode %{
23105 int vlen = Matcher::vector_length(this);
23106 if (vlen <= 16 && UseAVX <= 2) {
23107 assert(UseSSE >= 3, "required");
23108 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23109 } else {
23110 assert(UseAVX > 0, "required");
23111 int src_vlen_enc = vector_length_encoding(this, $src);
23112 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23113 }
23114 %}
23115 ins_pipe( pipe_slow );
23116 %}
23117
23118 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23119 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23120 match(Set dst (VectorStoreMask src size));
23121 effect(TEMP_DEF dst, TEMP xtmp);
23122 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23123 ins_encode %{
23124 int vlen_enc = Assembler::AVX_128bit;
23125 int vlen = Matcher::vector_length(this);
23126 if (vlen <= 8) {
23127 assert(UseSSE >= 3, "required");
23128 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23129 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23130 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23131 } else {
23132 assert(UseAVX > 0, "required");
23133 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23134 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23135 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23136 }
23137 %}
23138 ins_pipe( pipe_slow );
23139 %}
23140
23141 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23142 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23143 match(Set dst (VectorStoreMask src size));
23144 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23145 effect(TEMP_DEF dst, TEMP xtmp);
23146 ins_encode %{
23147 int vlen_enc = Assembler::AVX_128bit;
23148 int vlen = Matcher::vector_length(this);
23149 if (vlen <= 4) {
23150 assert(UseSSE >= 3, "required");
23151 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23152 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23153 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23154 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23155 } else {
23156 assert(UseAVX > 0, "required");
23157 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23158 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23159 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23160 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23161 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23162 }
23163 %}
23164 ins_pipe( pipe_slow );
23165 %}
23166
23167 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23168 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23169 match(Set dst (VectorStoreMask src size));
23170 effect(TEMP_DEF dst, TEMP xtmp);
23171 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23172 ins_encode %{
23173 assert(UseSSE >= 3, "required");
23174 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23175 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23176 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23177 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23178 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23179 %}
23180 ins_pipe( pipe_slow );
23181 %}
23182
23183 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23184 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23185 match(Set dst (VectorStoreMask src size));
23186 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23187 effect(TEMP_DEF dst, TEMP vtmp);
23188 ins_encode %{
23189 int vlen_enc = Assembler::AVX_128bit;
23190 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23191 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23192 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23193 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23194 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23195 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23196 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23197 %}
23198 ins_pipe( pipe_slow );
23199 %}
23200
23201 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23202 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23203 match(Set dst (VectorStoreMask src size));
23204 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23205 ins_encode %{
23206 int src_vlen_enc = vector_length_encoding(this, $src);
23207 int dst_vlen_enc = vector_length_encoding(this);
23208 if (!VM_Version::supports_avx512vl()) {
23209 src_vlen_enc = Assembler::AVX_512bit;
23210 }
23211 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23212 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23213 %}
23214 ins_pipe( pipe_slow );
23215 %}
23216
23217 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23218 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23219 match(Set dst (VectorStoreMask src size));
23220 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23221 ins_encode %{
23222 int src_vlen_enc = vector_length_encoding(this, $src);
23223 int dst_vlen_enc = vector_length_encoding(this);
23224 if (!VM_Version::supports_avx512vl()) {
23225 src_vlen_enc = Assembler::AVX_512bit;
23226 }
23227 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23228 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23229 %}
23230 ins_pipe( pipe_slow );
23231 %}
23232
23233 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23234 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23235 match(Set dst (VectorStoreMask mask size));
23236 effect(TEMP_DEF dst);
23237 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23238 ins_encode %{
23239 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23240 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23241 false, Assembler::AVX_512bit, noreg);
23242 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23243 %}
23244 ins_pipe( pipe_slow );
23245 %}
23246
23247 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23248 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23249 match(Set dst (VectorStoreMask mask size));
23250 effect(TEMP_DEF dst);
23251 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23252 ins_encode %{
23253 int dst_vlen_enc = vector_length_encoding(this);
23254 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23255 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23256 %}
23257 ins_pipe( pipe_slow );
23258 %}
23259
23260 instruct vmaskcast_evex(kReg dst) %{
23261 match(Set dst (VectorMaskCast dst));
23262 ins_cost(0);
23263 format %{ "vector_mask_cast $dst" %}
23264 ins_encode %{
23265 // empty
23266 %}
23267 ins_pipe(empty);
23268 %}
23269
23270 instruct vmaskcast(vec dst) %{
23271 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23272 match(Set dst (VectorMaskCast dst));
23273 ins_cost(0);
23274 format %{ "vector_mask_cast $dst" %}
23275 ins_encode %{
23276 // empty
23277 %}
23278 ins_pipe(empty);
23279 %}
23280
23281 instruct vmaskcast_avx(vec dst, vec src) %{
23282 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23283 match(Set dst (VectorMaskCast src));
23284 format %{ "vector_mask_cast $dst, $src" %}
23285 ins_encode %{
23286 int vlen = Matcher::vector_length(this);
23287 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23288 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23289 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23290 %}
23291 ins_pipe(pipe_slow);
23292 %}
23293
23294 //-------------------------------- Load Iota Indices ----------------------------------
23295
23296 instruct loadIotaIndices(vec dst, immI_0 src) %{
23297 match(Set dst (VectorLoadConst src));
23298 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23299 ins_encode %{
23300 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23301 BasicType bt = Matcher::vector_element_basic_type(this);
23302 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23303 %}
23304 ins_pipe( pipe_slow );
23305 %}
23306
23307 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23308 match(Set dst (PopulateIndex src1 src2));
23309 effect(TEMP dst, TEMP vtmp);
23310 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23311 ins_encode %{
23312 assert($src2$$constant == 1, "required");
23313 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23314 int vlen_enc = vector_length_encoding(this);
23315 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23316 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23317 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23318 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23319 %}
23320 ins_pipe( pipe_slow );
23321 %}
23322
23323 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23324 match(Set dst (PopulateIndex src1 src2));
23325 effect(TEMP dst, TEMP vtmp);
23326 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23327 ins_encode %{
23328 assert($src2$$constant == 1, "required");
23329 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23330 int vlen_enc = vector_length_encoding(this);
23331 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23332 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23333 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23334 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23335 %}
23336 ins_pipe( pipe_slow );
23337 %}
23338
23339 //-------------------------------- Rearrange ----------------------------------
23340
23341 // LoadShuffle/Rearrange for Byte
23342 instruct rearrangeB(vec dst, vec shuffle) %{
23343 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23344 Matcher::vector_length(n) < 32);
23345 match(Set dst (VectorRearrange dst shuffle));
23346 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23347 ins_encode %{
23348 assert(UseSSE >= 4, "required");
23349 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23350 %}
23351 ins_pipe( pipe_slow );
23352 %}
23353
23354 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23355 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23356 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23357 match(Set dst (VectorRearrange src shuffle));
23358 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23359 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23360 ins_encode %{
23361 assert(UseAVX >= 2, "required");
23362 // Swap src into vtmp1
23363 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23364 // Shuffle swapped src to get entries from other 128 bit lane
23365 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23366 // Shuffle original src to get entries from self 128 bit lane
23367 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23368 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23369 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23370 // Perform the blend
23371 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23372 %}
23373 ins_pipe( pipe_slow );
23374 %}
23375
23376
23377 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23378 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23379 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23380 match(Set dst (VectorRearrange src shuffle));
23381 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23382 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23383 ins_encode %{
23384 int vlen_enc = vector_length_encoding(this);
23385 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23386 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23387 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23388 %}
23389 ins_pipe( pipe_slow );
23390 %}
23391
23392 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23393 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23394 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23395 match(Set dst (VectorRearrange src shuffle));
23396 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23397 ins_encode %{
23398 int vlen_enc = vector_length_encoding(this);
23399 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23400 %}
23401 ins_pipe( pipe_slow );
23402 %}
23403
23404 // LoadShuffle/Rearrange for Short
23405
23406 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23407 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23408 !VM_Version::supports_avx512bw());
23409 match(Set dst (VectorLoadShuffle src));
23410 effect(TEMP dst, TEMP vtmp);
23411 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23412 ins_encode %{
23413 // Create a byte shuffle mask from short shuffle mask
23414 // only byte shuffle instruction available on these platforms
23415 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23416 if (UseAVX == 0) {
23417 assert(vlen_in_bytes <= 16, "required");
23418 // Multiply each shuffle by two to get byte index
23419 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23420 __ psllw($vtmp$$XMMRegister, 1);
23421
23422 // Duplicate to create 2 copies of byte index
23423 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23424 __ psllw($dst$$XMMRegister, 8);
23425 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23426
23427 // Add one to get alternate byte index
23428 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23429 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23430 } else {
23431 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23432 int vlen_enc = vector_length_encoding(this);
23433 // Multiply each shuffle by two to get byte index
23434 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23435
23436 // Duplicate to create 2 copies of byte index
23437 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23438 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23439
23440 // Add one to get alternate byte index
23441 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23442 }
23443 %}
23444 ins_pipe( pipe_slow );
23445 %}
23446
23447 instruct rearrangeS(vec dst, vec shuffle) %{
23448 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23449 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23450 match(Set dst (VectorRearrange dst shuffle));
23451 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23452 ins_encode %{
23453 assert(UseSSE >= 4, "required");
23454 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23455 %}
23456 ins_pipe( pipe_slow );
23457 %}
23458
23459 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23460 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23461 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23462 match(Set dst (VectorRearrange src shuffle));
23463 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23464 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23465 ins_encode %{
23466 assert(UseAVX >= 2, "required");
23467 // Swap src into vtmp1
23468 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23469 // Shuffle swapped src to get entries from other 128 bit lane
23470 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23471 // Shuffle original src to get entries from self 128 bit lane
23472 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23473 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23474 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23475 // Perform the blend
23476 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23477 %}
23478 ins_pipe( pipe_slow );
23479 %}
23480
23481 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23482 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23483 VM_Version::supports_avx512bw());
23484 match(Set dst (VectorRearrange src shuffle));
23485 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23486 ins_encode %{
23487 int vlen_enc = vector_length_encoding(this);
23488 if (!VM_Version::supports_avx512vl()) {
23489 vlen_enc = Assembler::AVX_512bit;
23490 }
23491 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23492 %}
23493 ins_pipe( pipe_slow );
23494 %}
23495
23496 // LoadShuffle/Rearrange for Integer and Float
23497
23498 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23499 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23500 Matcher::vector_length(n) == 4 && UseAVX == 0);
23501 match(Set dst (VectorLoadShuffle src));
23502 effect(TEMP dst, TEMP vtmp);
23503 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23504 ins_encode %{
23505 assert(UseSSE >= 4, "required");
23506
23507 // Create a byte shuffle mask from int shuffle mask
23508 // only byte shuffle instruction available on these platforms
23509
23510 // Duplicate and multiply each shuffle by 4
23511 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23512 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23513 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23514 __ psllw($vtmp$$XMMRegister, 2);
23515
23516 // Duplicate again to create 4 copies of byte index
23517 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23518 __ psllw($dst$$XMMRegister, 8);
23519 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23520
23521 // Add 3,2,1,0 to get alternate byte index
23522 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23523 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23524 %}
23525 ins_pipe( pipe_slow );
23526 %}
23527
23528 instruct rearrangeI(vec dst, vec shuffle) %{
23529 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23530 UseAVX == 0);
23531 match(Set dst (VectorRearrange dst shuffle));
23532 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23533 ins_encode %{
23534 assert(UseSSE >= 4, "required");
23535 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23536 %}
23537 ins_pipe( pipe_slow );
23538 %}
23539
23540 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23541 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23542 UseAVX > 0);
23543 match(Set dst (VectorRearrange src shuffle));
23544 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23545 ins_encode %{
23546 int vlen_enc = vector_length_encoding(this);
23547 BasicType bt = Matcher::vector_element_basic_type(this);
23548 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23549 %}
23550 ins_pipe( pipe_slow );
23551 %}
23552
23553 // LoadShuffle/Rearrange for Long and Double
23554
23555 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23556 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23557 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23558 match(Set dst (VectorLoadShuffle src));
23559 effect(TEMP dst, TEMP vtmp);
23560 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23561 ins_encode %{
23562 assert(UseAVX >= 2, "required");
23563
23564 int vlen_enc = vector_length_encoding(this);
23565 // Create a double word shuffle mask from long shuffle mask
23566 // only double word shuffle instruction available on these platforms
23567
23568 // Multiply each shuffle by two to get double word index
23569 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23570
23571 // Duplicate each double word shuffle
23572 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23573 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23574
23575 // Add one to get alternate double word index
23576 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23577 %}
23578 ins_pipe( pipe_slow );
23579 %}
23580
23581 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23582 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23583 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23584 match(Set dst (VectorRearrange src shuffle));
23585 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23586 ins_encode %{
23587 assert(UseAVX >= 2, "required");
23588
23589 int vlen_enc = vector_length_encoding(this);
23590 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23591 %}
23592 ins_pipe( pipe_slow );
23593 %}
23594
23595 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23596 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23597 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23598 match(Set dst (VectorRearrange src shuffle));
23599 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23600 ins_encode %{
23601 assert(UseAVX > 2, "required");
23602
23603 int vlen_enc = vector_length_encoding(this);
23604 if (vlen_enc == Assembler::AVX_128bit) {
23605 vlen_enc = Assembler::AVX_256bit;
23606 }
23607 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23608 %}
23609 ins_pipe( pipe_slow );
23610 %}
23611
23612 // --------------------------------- FMA --------------------------------------
23613 // a * b + c
23614
23615 instruct vfmaF_reg(vec a, vec b, vec c) %{
23616 match(Set c (FmaVF c (Binary a b)));
23617 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23618 ins_cost(150);
23619 ins_encode %{
23620 assert(UseFMA, "not enabled");
23621 int vlen_enc = vector_length_encoding(this);
23622 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23623 %}
23624 ins_pipe( pipe_slow );
23625 %}
23626
23627 instruct vfmaF_mem(vec a, memory b, vec c) %{
23628 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23629 match(Set c (FmaVF c (Binary a (LoadVector b))));
23630 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23631 ins_cost(150);
23632 ins_encode %{
23633 assert(UseFMA, "not enabled");
23634 int vlen_enc = vector_length_encoding(this);
23635 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23636 %}
23637 ins_pipe( pipe_slow );
23638 %}
23639
23640 instruct vfmaD_reg(vec a, vec b, vec c) %{
23641 match(Set c (FmaVD c (Binary a b)));
23642 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23643 ins_cost(150);
23644 ins_encode %{
23645 assert(UseFMA, "not enabled");
23646 int vlen_enc = vector_length_encoding(this);
23647 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23648 %}
23649 ins_pipe( pipe_slow );
23650 %}
23651
23652 instruct vfmaD_mem(vec a, memory b, vec c) %{
23653 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23654 match(Set c (FmaVD c (Binary a (LoadVector b))));
23655 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23656 ins_cost(150);
23657 ins_encode %{
23658 assert(UseFMA, "not enabled");
23659 int vlen_enc = vector_length_encoding(this);
23660 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23661 %}
23662 ins_pipe( pipe_slow );
23663 %}
23664
23665 // --------------------------------- Vector Multiply Add --------------------------------------
23666
23667 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23668 predicate(UseAVX == 0);
23669 match(Set dst (MulAddVS2VI dst src1));
23670 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23671 ins_encode %{
23672 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23673 %}
23674 ins_pipe( pipe_slow );
23675 %}
23676
23677 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23678 predicate(UseAVX > 0);
23679 match(Set dst (MulAddVS2VI src1 src2));
23680 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23681 ins_encode %{
23682 int vlen_enc = vector_length_encoding(this);
23683 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23684 %}
23685 ins_pipe( pipe_slow );
23686 %}
23687
23688 // --------------------------------- Vector Multiply Add Add ----------------------------------
23689
23690 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23691 predicate(VM_Version::supports_avx512_vnni());
23692 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23693 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23694 ins_encode %{
23695 assert(UseAVX > 2, "required");
23696 int vlen_enc = vector_length_encoding(this);
23697 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23698 %}
23699 ins_pipe( pipe_slow );
23700 ins_cost(10);
23701 %}
23702
23703 // --------------------------------- PopCount --------------------------------------
23704
23705 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23706 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23707 match(Set dst (PopCountVI src));
23708 match(Set dst (PopCountVL src));
23709 format %{ "vector_popcount_integral $dst, $src" %}
23710 ins_encode %{
23711 int opcode = this->ideal_Opcode();
23712 int vlen_enc = vector_length_encoding(this, $src);
23713 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23714 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23715 %}
23716 ins_pipe( pipe_slow );
23717 %}
23718
23719 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23720 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23721 match(Set dst (PopCountVI src mask));
23722 match(Set dst (PopCountVL src mask));
23723 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23724 ins_encode %{
23725 int vlen_enc = vector_length_encoding(this, $src);
23726 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23727 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23728 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23729 %}
23730 ins_pipe( pipe_slow );
23731 %}
23732
23733 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23734 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23735 match(Set dst (PopCountVI src));
23736 match(Set dst (PopCountVL src));
23737 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23738 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23739 ins_encode %{
23740 int opcode = this->ideal_Opcode();
23741 int vlen_enc = vector_length_encoding(this, $src);
23742 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23743 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23744 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23745 %}
23746 ins_pipe( pipe_slow );
23747 %}
23748
23749 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23750
23751 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23752 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23753 Matcher::vector_length_in_bytes(n->in(1))));
23754 match(Set dst (CountTrailingZerosV src));
23755 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23756 ins_cost(400);
23757 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23758 ins_encode %{
23759 int vlen_enc = vector_length_encoding(this, $src);
23760 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23761 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23762 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23763 %}
23764 ins_pipe( pipe_slow );
23765 %}
23766
23767 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23768 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23769 VM_Version::supports_avx512cd() &&
23770 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23771 match(Set dst (CountTrailingZerosV src));
23772 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23773 ins_cost(400);
23774 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23775 ins_encode %{
23776 int vlen_enc = vector_length_encoding(this, $src);
23777 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23778 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23779 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23780 %}
23781 ins_pipe( pipe_slow );
23782 %}
23783
23784 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23785 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23786 match(Set dst (CountTrailingZerosV src));
23787 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23788 ins_cost(400);
23789 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23790 ins_encode %{
23791 int vlen_enc = vector_length_encoding(this, $src);
23792 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23793 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23794 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23795 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23796 %}
23797 ins_pipe( pipe_slow );
23798 %}
23799
23800 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23801 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23802 match(Set dst (CountTrailingZerosV src));
23803 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23804 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23805 ins_encode %{
23806 int vlen_enc = vector_length_encoding(this, $src);
23807 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23808 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23809 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23810 %}
23811 ins_pipe( pipe_slow );
23812 %}
23813
23814
23815 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23816
23817 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23818 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23819 effect(TEMP dst);
23820 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23821 ins_encode %{
23822 int vector_len = vector_length_encoding(this);
23823 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23824 %}
23825 ins_pipe( pipe_slow );
23826 %}
23827
23828 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23829 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23830 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23831 effect(TEMP dst);
23832 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23833 ins_encode %{
23834 int vector_len = vector_length_encoding(this);
23835 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23836 %}
23837 ins_pipe( pipe_slow );
23838 %}
23839
23840 // --------------------------------- Rotation Operations ----------------------------------
23841 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23842 match(Set dst (RotateLeftV src shift));
23843 match(Set dst (RotateRightV src shift));
23844 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23845 ins_encode %{
23846 int opcode = this->ideal_Opcode();
23847 int vector_len = vector_length_encoding(this);
23848 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23849 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23850 %}
23851 ins_pipe( pipe_slow );
23852 %}
23853
23854 instruct vprorate(vec dst, vec src, vec shift) %{
23855 match(Set dst (RotateLeftV src shift));
23856 match(Set dst (RotateRightV src shift));
23857 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23858 ins_encode %{
23859 int opcode = this->ideal_Opcode();
23860 int vector_len = vector_length_encoding(this);
23861 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23862 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23863 %}
23864 ins_pipe( pipe_slow );
23865 %}
23866
23867 // ---------------------------------- Masked Operations ------------------------------------
23868 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23869 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23870 match(Set dst (LoadVectorMasked mem mask));
23871 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23872 ins_encode %{
23873 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23874 int vlen_enc = vector_length_encoding(this);
23875 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23876 %}
23877 ins_pipe( pipe_slow );
23878 %}
23879
23880
23881 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23882 predicate(n->in(3)->bottom_type()->isa_vectmask());
23883 match(Set dst (LoadVectorMasked mem mask));
23884 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23885 ins_encode %{
23886 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23887 int vector_len = vector_length_encoding(this);
23888 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23889 %}
23890 ins_pipe( pipe_slow );
23891 %}
23892
23893 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23894 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23895 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23896 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23897 ins_encode %{
23898 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23899 int vlen_enc = vector_length_encoding(src_node);
23900 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23901 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23902 %}
23903 ins_pipe( pipe_slow );
23904 %}
23905
23906 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23907 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23908 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23909 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23910 ins_encode %{
23911 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23912 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23913 int vlen_enc = vector_length_encoding(src_node);
23914 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23915 %}
23916 ins_pipe( pipe_slow );
23917 %}
23918
23919 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23920 match(Set addr (VerifyVectorAlignment addr mask));
23921 effect(KILL cr);
23922 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23923 ins_encode %{
23924 Label Lskip;
23925 // check if masked bits of addr are zero
23926 __ testq($addr$$Register, $mask$$constant);
23927 __ jccb(Assembler::equal, Lskip);
23928 __ stop("verify_vector_alignment found a misaligned vector memory access");
23929 __ bind(Lskip);
23930 %}
23931 ins_pipe(pipe_slow);
23932 %}
23933
23934 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23935 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23936 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23937 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23938 ins_encode %{
23939 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23940 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23941
23942 Label DONE;
23943 int vlen_enc = vector_length_encoding(this, $src1);
23944 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23945
23946 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23947 __ mov64($dst$$Register, -1L);
23948 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23949 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23950 __ jccb(Assembler::carrySet, DONE);
23951 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23952 __ notq($dst$$Register);
23953 __ tzcntq($dst$$Register, $dst$$Register);
23954 __ bind(DONE);
23955 %}
23956 ins_pipe( pipe_slow );
23957 %}
23958
23959
23960 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23961 match(Set dst (VectorMaskGen len));
23962 effect(TEMP temp, KILL cr);
23963 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23964 ins_encode %{
23965 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23966 %}
23967 ins_pipe( pipe_slow );
23968 %}
23969
23970 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23971 match(Set dst (VectorMaskGen len));
23972 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23973 effect(TEMP temp);
23974 ins_encode %{
23975 if ($len$$constant > 0) {
23976 __ mov64($temp$$Register, right_n_bits($len$$constant));
23977 __ kmovql($dst$$KRegister, $temp$$Register);
23978 } else {
23979 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23980 }
23981 %}
23982 ins_pipe( pipe_slow );
23983 %}
23984
23985 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23986 predicate(n->in(1)->bottom_type()->isa_vectmask());
23987 match(Set dst (VectorMaskToLong mask));
23988 effect(TEMP dst, KILL cr);
23989 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23990 ins_encode %{
23991 int opcode = this->ideal_Opcode();
23992 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23993 int mask_len = Matcher::vector_length(this, $mask);
23994 int mask_size = mask_len * type2aelembytes(mbt);
23995 int vlen_enc = vector_length_encoding(this, $mask);
23996 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23997 $dst$$Register, mask_len, mask_size, vlen_enc);
23998 %}
23999 ins_pipe( pipe_slow );
24000 %}
24001
24002 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24003 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24004 match(Set dst (VectorMaskToLong mask));
24005 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24006 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24007 ins_encode %{
24008 int opcode = this->ideal_Opcode();
24009 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24010 int mask_len = Matcher::vector_length(this, $mask);
24011 int vlen_enc = vector_length_encoding(this, $mask);
24012 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24013 $dst$$Register, mask_len, mbt, vlen_enc);
24014 %}
24015 ins_pipe( pipe_slow );
24016 %}
24017
24018 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24019 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24020 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24021 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24022 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24023 ins_encode %{
24024 int opcode = this->ideal_Opcode();
24025 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24026 int mask_len = Matcher::vector_length(this, $mask);
24027 int vlen_enc = vector_length_encoding(this, $mask);
24028 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24029 $dst$$Register, mask_len, mbt, vlen_enc);
24030 %}
24031 ins_pipe( pipe_slow );
24032 %}
24033
24034 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24035 predicate(n->in(1)->bottom_type()->isa_vectmask());
24036 match(Set dst (VectorMaskTrueCount mask));
24037 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24038 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24039 ins_encode %{
24040 int opcode = this->ideal_Opcode();
24041 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24042 int mask_len = Matcher::vector_length(this, $mask);
24043 int mask_size = mask_len * type2aelembytes(mbt);
24044 int vlen_enc = vector_length_encoding(this, $mask);
24045 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24046 $tmp$$Register, mask_len, mask_size, vlen_enc);
24047 %}
24048 ins_pipe( pipe_slow );
24049 %}
24050
24051 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24052 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24053 match(Set dst (VectorMaskTrueCount mask));
24054 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24055 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24056 ins_encode %{
24057 int opcode = this->ideal_Opcode();
24058 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24059 int mask_len = Matcher::vector_length(this, $mask);
24060 int vlen_enc = vector_length_encoding(this, $mask);
24061 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24062 $tmp$$Register, mask_len, mbt, vlen_enc);
24063 %}
24064 ins_pipe( pipe_slow );
24065 %}
24066
24067 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24068 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24069 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24070 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24071 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24072 ins_encode %{
24073 int opcode = this->ideal_Opcode();
24074 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24075 int mask_len = Matcher::vector_length(this, $mask);
24076 int vlen_enc = vector_length_encoding(this, $mask);
24077 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24078 $tmp$$Register, mask_len, mbt, vlen_enc);
24079 %}
24080 ins_pipe( pipe_slow );
24081 %}
24082
24083 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24084 predicate(n->in(1)->bottom_type()->isa_vectmask());
24085 match(Set dst (VectorMaskFirstTrue mask));
24086 match(Set dst (VectorMaskLastTrue mask));
24087 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24088 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24089 ins_encode %{
24090 int opcode = this->ideal_Opcode();
24091 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24092 int mask_len = Matcher::vector_length(this, $mask);
24093 int mask_size = mask_len * type2aelembytes(mbt);
24094 int vlen_enc = vector_length_encoding(this, $mask);
24095 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24096 $tmp$$Register, mask_len, mask_size, vlen_enc);
24097 %}
24098 ins_pipe( pipe_slow );
24099 %}
24100
24101 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24102 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24103 match(Set dst (VectorMaskFirstTrue mask));
24104 match(Set dst (VectorMaskLastTrue mask));
24105 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24106 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24107 ins_encode %{
24108 int opcode = this->ideal_Opcode();
24109 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24110 int mask_len = Matcher::vector_length(this, $mask);
24111 int vlen_enc = vector_length_encoding(this, $mask);
24112 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24113 $tmp$$Register, mask_len, mbt, vlen_enc);
24114 %}
24115 ins_pipe( pipe_slow );
24116 %}
24117
24118 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24119 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24120 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24121 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24122 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24123 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24124 ins_encode %{
24125 int opcode = this->ideal_Opcode();
24126 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24127 int mask_len = Matcher::vector_length(this, $mask);
24128 int vlen_enc = vector_length_encoding(this, $mask);
24129 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24130 $tmp$$Register, mask_len, mbt, vlen_enc);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135 // --------------------------------- Compress/Expand Operations ---------------------------
24136 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24137 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24138 match(Set dst (CompressV src mask));
24139 match(Set dst (ExpandV src mask));
24140 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24141 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24142 ins_encode %{
24143 int opcode = this->ideal_Opcode();
24144 int vlen_enc = vector_length_encoding(this);
24145 BasicType bt = Matcher::vector_element_basic_type(this);
24146 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24147 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24148 %}
24149 ins_pipe( pipe_slow );
24150 %}
24151
24152 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24153 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24154 match(Set dst (CompressV src mask));
24155 match(Set dst (ExpandV src mask));
24156 format %{ "vector_compress_expand $dst, $src, $mask" %}
24157 ins_encode %{
24158 int opcode = this->ideal_Opcode();
24159 int vector_len = vector_length_encoding(this);
24160 BasicType bt = Matcher::vector_element_basic_type(this);
24161 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24162 %}
24163 ins_pipe( pipe_slow );
24164 %}
24165
24166 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24167 match(Set dst (CompressM mask));
24168 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24169 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24170 ins_encode %{
24171 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24172 int mask_len = Matcher::vector_length(this);
24173 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24174 %}
24175 ins_pipe( pipe_slow );
24176 %}
24177
24178 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24179
24180 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24181 predicate(!VM_Version::supports_gfni());
24182 match(Set dst (ReverseV src));
24183 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24184 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24185 ins_encode %{
24186 int vec_enc = vector_length_encoding(this);
24187 BasicType bt = Matcher::vector_element_basic_type(this);
24188 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24189 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24190 %}
24191 ins_pipe( pipe_slow );
24192 %}
24193
24194 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24195 predicate(VM_Version::supports_gfni());
24196 match(Set dst (ReverseV src));
24197 effect(TEMP dst, TEMP xtmp);
24198 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24199 ins_encode %{
24200 int vec_enc = vector_length_encoding(this);
24201 BasicType bt = Matcher::vector_element_basic_type(this);
24202 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24203 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24204 $xtmp$$XMMRegister);
24205 %}
24206 ins_pipe( pipe_slow );
24207 %}
24208
24209 instruct vreverse_byte_reg(vec dst, vec src) %{
24210 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24211 match(Set dst (ReverseBytesV src));
24212 effect(TEMP dst);
24213 format %{ "vector_reverse_byte $dst, $src" %}
24214 ins_encode %{
24215 int vec_enc = vector_length_encoding(this);
24216 BasicType bt = Matcher::vector_element_basic_type(this);
24217 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24218 %}
24219 ins_pipe( pipe_slow );
24220 %}
24221
24222 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24223 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24224 match(Set dst (ReverseBytesV src));
24225 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24226 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24227 ins_encode %{
24228 int vec_enc = vector_length_encoding(this);
24229 BasicType bt = Matcher::vector_element_basic_type(this);
24230 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24231 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24232 %}
24233 ins_pipe( pipe_slow );
24234 %}
24235
24236 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24237
24238 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24239 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24240 Matcher::vector_length_in_bytes(n->in(1))));
24241 match(Set dst (CountLeadingZerosV src));
24242 format %{ "vector_count_leading_zeros $dst, $src" %}
24243 ins_encode %{
24244 int vlen_enc = vector_length_encoding(this, $src);
24245 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24246 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24247 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24248 %}
24249 ins_pipe( pipe_slow );
24250 %}
24251
24252 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24253 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24254 Matcher::vector_length_in_bytes(n->in(1))));
24255 match(Set dst (CountLeadingZerosV src mask));
24256 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24257 ins_encode %{
24258 int vlen_enc = vector_length_encoding(this, $src);
24259 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24260 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24261 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24262 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24263 %}
24264 ins_pipe( pipe_slow );
24265 %}
24266
24267 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24268 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24269 VM_Version::supports_avx512cd() &&
24270 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24271 match(Set dst (CountLeadingZerosV src));
24272 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24273 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24274 ins_encode %{
24275 int vlen_enc = vector_length_encoding(this, $src);
24276 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24277 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24278 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24279 %}
24280 ins_pipe( pipe_slow );
24281 %}
24282
24283 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24284 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24285 match(Set dst (CountLeadingZerosV src));
24286 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24287 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24288 ins_encode %{
24289 int vlen_enc = vector_length_encoding(this, $src);
24290 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24291 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24292 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24293 $rtmp$$Register, true, vlen_enc);
24294 %}
24295 ins_pipe( pipe_slow );
24296 %}
24297
24298 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24299 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24300 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24301 match(Set dst (CountLeadingZerosV src));
24302 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24303 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24304 ins_encode %{
24305 int vlen_enc = vector_length_encoding(this, $src);
24306 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24307 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24308 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24309 %}
24310 ins_pipe( pipe_slow );
24311 %}
24312
24313 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24314 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24315 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24316 match(Set dst (CountLeadingZerosV src));
24317 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24318 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24319 ins_encode %{
24320 int vlen_enc = vector_length_encoding(this, $src);
24321 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24322 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24323 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24324 %}
24325 ins_pipe( pipe_slow );
24326 %}
24327
24328 // ---------------------------------- Vector Masked Operations ------------------------------------
24329
24330 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24331 match(Set dst (AddVB (Binary dst src2) mask));
24332 match(Set dst (AddVS (Binary dst src2) mask));
24333 match(Set dst (AddVI (Binary dst src2) mask));
24334 match(Set dst (AddVL (Binary dst src2) mask));
24335 match(Set dst (AddVF (Binary dst src2) mask));
24336 match(Set dst (AddVD (Binary dst src2) mask));
24337 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24338 ins_encode %{
24339 int vlen_enc = vector_length_encoding(this);
24340 BasicType bt = Matcher::vector_element_basic_type(this);
24341 int opc = this->ideal_Opcode();
24342 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24343 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24344 %}
24345 ins_pipe( pipe_slow );
24346 %}
24347
24348 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24349 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24350 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24351 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24352 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24353 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24354 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24355 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24356 ins_encode %{
24357 int vlen_enc = vector_length_encoding(this);
24358 BasicType bt = Matcher::vector_element_basic_type(this);
24359 int opc = this->ideal_Opcode();
24360 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24361 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24362 %}
24363 ins_pipe( pipe_slow );
24364 %}
24365
24366 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24367 match(Set dst (XorV (Binary dst src2) mask));
24368 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24369 ins_encode %{
24370 int vlen_enc = vector_length_encoding(this);
24371 BasicType bt = Matcher::vector_element_basic_type(this);
24372 int opc = this->ideal_Opcode();
24373 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24374 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24375 %}
24376 ins_pipe( pipe_slow );
24377 %}
24378
24379 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24380 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24381 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24382 ins_encode %{
24383 int vlen_enc = vector_length_encoding(this);
24384 BasicType bt = Matcher::vector_element_basic_type(this);
24385 int opc = this->ideal_Opcode();
24386 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24387 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24388 %}
24389 ins_pipe( pipe_slow );
24390 %}
24391
24392 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24393 match(Set dst (OrV (Binary dst src2) mask));
24394 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24395 ins_encode %{
24396 int vlen_enc = vector_length_encoding(this);
24397 BasicType bt = Matcher::vector_element_basic_type(this);
24398 int opc = this->ideal_Opcode();
24399 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24400 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24401 %}
24402 ins_pipe( pipe_slow );
24403 %}
24404
24405 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24406 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24407 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24408 ins_encode %{
24409 int vlen_enc = vector_length_encoding(this);
24410 BasicType bt = Matcher::vector_element_basic_type(this);
24411 int opc = this->ideal_Opcode();
24412 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24413 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24414 %}
24415 ins_pipe( pipe_slow );
24416 %}
24417
24418 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24419 match(Set dst (AndV (Binary dst src2) mask));
24420 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24421 ins_encode %{
24422 int vlen_enc = vector_length_encoding(this);
24423 BasicType bt = Matcher::vector_element_basic_type(this);
24424 int opc = this->ideal_Opcode();
24425 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24427 %}
24428 ins_pipe( pipe_slow );
24429 %}
24430
24431 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24432 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24433 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24434 ins_encode %{
24435 int vlen_enc = vector_length_encoding(this);
24436 BasicType bt = Matcher::vector_element_basic_type(this);
24437 int opc = this->ideal_Opcode();
24438 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24439 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24440 %}
24441 ins_pipe( pipe_slow );
24442 %}
24443
24444 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24445 match(Set dst (SubVB (Binary dst src2) mask));
24446 match(Set dst (SubVS (Binary dst src2) mask));
24447 match(Set dst (SubVI (Binary dst src2) mask));
24448 match(Set dst (SubVL (Binary dst src2) mask));
24449 match(Set dst (SubVF (Binary dst src2) mask));
24450 match(Set dst (SubVD (Binary dst src2) mask));
24451 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24452 ins_encode %{
24453 int vlen_enc = vector_length_encoding(this);
24454 BasicType bt = Matcher::vector_element_basic_type(this);
24455 int opc = this->ideal_Opcode();
24456 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24457 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24458 %}
24459 ins_pipe( pipe_slow );
24460 %}
24461
24462 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24463 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24464 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24465 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24466 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24467 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24468 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24469 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24470 ins_encode %{
24471 int vlen_enc = vector_length_encoding(this);
24472 BasicType bt = Matcher::vector_element_basic_type(this);
24473 int opc = this->ideal_Opcode();
24474 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24476 %}
24477 ins_pipe( pipe_slow );
24478 %}
24479
24480 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24481 match(Set dst (MulVS (Binary dst src2) mask));
24482 match(Set dst (MulVI (Binary dst src2) mask));
24483 match(Set dst (MulVL (Binary dst src2) mask));
24484 match(Set dst (MulVF (Binary dst src2) mask));
24485 match(Set dst (MulVD (Binary dst src2) mask));
24486 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24487 ins_encode %{
24488 int vlen_enc = vector_length_encoding(this);
24489 BasicType bt = Matcher::vector_element_basic_type(this);
24490 int opc = this->ideal_Opcode();
24491 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24492 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24493 %}
24494 ins_pipe( pipe_slow );
24495 %}
24496
24497 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24498 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24499 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24500 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24501 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24502 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24503 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24504 ins_encode %{
24505 int vlen_enc = vector_length_encoding(this);
24506 BasicType bt = Matcher::vector_element_basic_type(this);
24507 int opc = this->ideal_Opcode();
24508 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24509 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24510 %}
24511 ins_pipe( pipe_slow );
24512 %}
24513
24514 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24515 match(Set dst (SqrtVF dst mask));
24516 match(Set dst (SqrtVD dst mask));
24517 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24518 ins_encode %{
24519 int vlen_enc = vector_length_encoding(this);
24520 BasicType bt = Matcher::vector_element_basic_type(this);
24521 int opc = this->ideal_Opcode();
24522 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24523 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24524 %}
24525 ins_pipe( pipe_slow );
24526 %}
24527
24528 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24529 match(Set dst (DivVF (Binary dst src2) mask));
24530 match(Set dst (DivVD (Binary dst src2) mask));
24531 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24532 ins_encode %{
24533 int vlen_enc = vector_length_encoding(this);
24534 BasicType bt = Matcher::vector_element_basic_type(this);
24535 int opc = this->ideal_Opcode();
24536 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24538 %}
24539 ins_pipe( pipe_slow );
24540 %}
24541
24542 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24543 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24544 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24545 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24546 ins_encode %{
24547 int vlen_enc = vector_length_encoding(this);
24548 BasicType bt = Matcher::vector_element_basic_type(this);
24549 int opc = this->ideal_Opcode();
24550 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24551 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24552 %}
24553 ins_pipe( pipe_slow );
24554 %}
24555
24556
24557 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24558 match(Set dst (RotateLeftV (Binary dst shift) mask));
24559 match(Set dst (RotateRightV (Binary dst shift) mask));
24560 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24561 ins_encode %{
24562 int vlen_enc = vector_length_encoding(this);
24563 BasicType bt = Matcher::vector_element_basic_type(this);
24564 int opc = this->ideal_Opcode();
24565 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24566 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24567 %}
24568 ins_pipe( pipe_slow );
24569 %}
24570
24571 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24572 match(Set dst (RotateLeftV (Binary dst src2) mask));
24573 match(Set dst (RotateRightV (Binary dst src2) mask));
24574 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24575 ins_encode %{
24576 int vlen_enc = vector_length_encoding(this);
24577 BasicType bt = Matcher::vector_element_basic_type(this);
24578 int opc = this->ideal_Opcode();
24579 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24580 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24581 %}
24582 ins_pipe( pipe_slow );
24583 %}
24584
24585 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24586 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24587 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24588 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24589 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24590 ins_encode %{
24591 int vlen_enc = vector_length_encoding(this);
24592 BasicType bt = Matcher::vector_element_basic_type(this);
24593 int opc = this->ideal_Opcode();
24594 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24595 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24596 %}
24597 ins_pipe( pipe_slow );
24598 %}
24599
24600 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24601 predicate(!n->as_ShiftV()->is_var_shift());
24602 match(Set dst (LShiftVS (Binary dst src2) mask));
24603 match(Set dst (LShiftVI (Binary dst src2) mask));
24604 match(Set dst (LShiftVL (Binary dst src2) mask));
24605 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24606 ins_encode %{
24607 int vlen_enc = vector_length_encoding(this);
24608 BasicType bt = Matcher::vector_element_basic_type(this);
24609 int opc = this->ideal_Opcode();
24610 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24611 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24612 %}
24613 ins_pipe( pipe_slow );
24614 %}
24615
24616 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24617 predicate(n->as_ShiftV()->is_var_shift());
24618 match(Set dst (LShiftVS (Binary dst src2) mask));
24619 match(Set dst (LShiftVI (Binary dst src2) mask));
24620 match(Set dst (LShiftVL (Binary dst src2) mask));
24621 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24622 ins_encode %{
24623 int vlen_enc = vector_length_encoding(this);
24624 BasicType bt = Matcher::vector_element_basic_type(this);
24625 int opc = this->ideal_Opcode();
24626 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24627 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24628 %}
24629 ins_pipe( pipe_slow );
24630 %}
24631
24632 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24633 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24634 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24635 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24636 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24637 ins_encode %{
24638 int vlen_enc = vector_length_encoding(this);
24639 BasicType bt = Matcher::vector_element_basic_type(this);
24640 int opc = this->ideal_Opcode();
24641 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24642 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24643 %}
24644 ins_pipe( pipe_slow );
24645 %}
24646
24647 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24648 predicate(!n->as_ShiftV()->is_var_shift());
24649 match(Set dst (RShiftVS (Binary dst src2) mask));
24650 match(Set dst (RShiftVI (Binary dst src2) mask));
24651 match(Set dst (RShiftVL (Binary dst src2) mask));
24652 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24653 ins_encode %{
24654 int vlen_enc = vector_length_encoding(this);
24655 BasicType bt = Matcher::vector_element_basic_type(this);
24656 int opc = this->ideal_Opcode();
24657 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24658 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24659 %}
24660 ins_pipe( pipe_slow );
24661 %}
24662
24663 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24664 predicate(n->as_ShiftV()->is_var_shift());
24665 match(Set dst (RShiftVS (Binary dst src2) mask));
24666 match(Set dst (RShiftVI (Binary dst src2) mask));
24667 match(Set dst (RShiftVL (Binary dst src2) mask));
24668 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24669 ins_encode %{
24670 int vlen_enc = vector_length_encoding(this);
24671 BasicType bt = Matcher::vector_element_basic_type(this);
24672 int opc = this->ideal_Opcode();
24673 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24675 %}
24676 ins_pipe( pipe_slow );
24677 %}
24678
24679 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24680 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24681 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24682 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24683 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24684 ins_encode %{
24685 int vlen_enc = vector_length_encoding(this);
24686 BasicType bt = Matcher::vector_element_basic_type(this);
24687 int opc = this->ideal_Opcode();
24688 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24689 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24690 %}
24691 ins_pipe( pipe_slow );
24692 %}
24693
24694 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24695 predicate(!n->as_ShiftV()->is_var_shift());
24696 match(Set dst (URShiftVS (Binary dst src2) mask));
24697 match(Set dst (URShiftVI (Binary dst src2) mask));
24698 match(Set dst (URShiftVL (Binary dst src2) mask));
24699 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24700 ins_encode %{
24701 int vlen_enc = vector_length_encoding(this);
24702 BasicType bt = Matcher::vector_element_basic_type(this);
24703 int opc = this->ideal_Opcode();
24704 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24705 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24706 %}
24707 ins_pipe( pipe_slow );
24708 %}
24709
24710 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24711 predicate(n->as_ShiftV()->is_var_shift());
24712 match(Set dst (URShiftVS (Binary dst src2) mask));
24713 match(Set dst (URShiftVI (Binary dst src2) mask));
24714 match(Set dst (URShiftVL (Binary dst src2) mask));
24715 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24716 ins_encode %{
24717 int vlen_enc = vector_length_encoding(this);
24718 BasicType bt = Matcher::vector_element_basic_type(this);
24719 int opc = this->ideal_Opcode();
24720 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24721 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24722 %}
24723 ins_pipe( pipe_slow );
24724 %}
24725
24726 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24727 match(Set dst (MaxV (Binary dst src2) mask));
24728 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24729 ins_encode %{
24730 int vlen_enc = vector_length_encoding(this);
24731 BasicType bt = Matcher::vector_element_basic_type(this);
24732 int opc = this->ideal_Opcode();
24733 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24734 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24735 %}
24736 ins_pipe( pipe_slow );
24737 %}
24738
24739 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24740 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24741 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24742 ins_encode %{
24743 int vlen_enc = vector_length_encoding(this);
24744 BasicType bt = Matcher::vector_element_basic_type(this);
24745 int opc = this->ideal_Opcode();
24746 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24747 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24748 %}
24749 ins_pipe( pipe_slow );
24750 %}
24751
24752 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24753 match(Set dst (MinV (Binary dst src2) mask));
24754 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24755 ins_encode %{
24756 int vlen_enc = vector_length_encoding(this);
24757 BasicType bt = Matcher::vector_element_basic_type(this);
24758 int opc = this->ideal_Opcode();
24759 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24761 %}
24762 ins_pipe( pipe_slow );
24763 %}
24764
24765 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24766 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24767 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24768 ins_encode %{
24769 int vlen_enc = vector_length_encoding(this);
24770 BasicType bt = Matcher::vector_element_basic_type(this);
24771 int opc = this->ideal_Opcode();
24772 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24773 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24774 %}
24775 ins_pipe( pipe_slow );
24776 %}
24777
24778 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24779 match(Set dst (VectorRearrange (Binary dst src2) mask));
24780 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24781 ins_encode %{
24782 int vlen_enc = vector_length_encoding(this);
24783 BasicType bt = Matcher::vector_element_basic_type(this);
24784 int opc = this->ideal_Opcode();
24785 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24787 %}
24788 ins_pipe( pipe_slow );
24789 %}
24790
24791 instruct vabs_masked(vec dst, kReg mask) %{
24792 match(Set dst (AbsVB dst mask));
24793 match(Set dst (AbsVS dst mask));
24794 match(Set dst (AbsVI dst mask));
24795 match(Set dst (AbsVL dst mask));
24796 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24797 ins_encode %{
24798 int vlen_enc = vector_length_encoding(this);
24799 BasicType bt = Matcher::vector_element_basic_type(this);
24800 int opc = this->ideal_Opcode();
24801 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24802 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24803 %}
24804 ins_pipe( pipe_slow );
24805 %}
24806
24807 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24808 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24809 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24810 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24811 ins_encode %{
24812 assert(UseFMA, "Needs FMA instructions support.");
24813 int vlen_enc = vector_length_encoding(this);
24814 BasicType bt = Matcher::vector_element_basic_type(this);
24815 int opc = this->ideal_Opcode();
24816 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24817 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24818 %}
24819 ins_pipe( pipe_slow );
24820 %}
24821
24822 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24823 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24824 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24825 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24826 ins_encode %{
24827 assert(UseFMA, "Needs FMA instructions support.");
24828 int vlen_enc = vector_length_encoding(this);
24829 BasicType bt = Matcher::vector_element_basic_type(this);
24830 int opc = this->ideal_Opcode();
24831 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24832 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24833 %}
24834 ins_pipe( pipe_slow );
24835 %}
24836
24837 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24838 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24839 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24840 ins_encode %{
24841 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24842 int vlen_enc = vector_length_encoding(this, $src1);
24843 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24844
24845 // Comparison i
24846 switch (src1_elem_bt) {
24847 case T_BYTE: {
24848 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24849 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24850 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24851 break;
24852 }
24853 case T_SHORT: {
24854 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24855 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24856 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24857 break;
24858 }
24859 case T_INT: {
24860 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24861 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24862 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24863 break;
24864 }
24865 case T_LONG: {
24866 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24867 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24868 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24869 break;
24870 }
24871 case T_FLOAT: {
24872 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24873 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24874 break;
24875 }
24876 case T_DOUBLE: {
24877 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24878 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24879 break;
24880 }
24881 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24882 }
24883 %}
24884 ins_pipe( pipe_slow );
24885 %}
24886
24887 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24888 predicate(Matcher::vector_length(n) <= 32);
24889 match(Set dst (MaskAll src));
24890 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24891 ins_encode %{
24892 int mask_len = Matcher::vector_length(this);
24893 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24894 %}
24895 ins_pipe( pipe_slow );
24896 %}
24897
24898 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24899 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24900 match(Set dst (XorVMask src (MaskAll cnt)));
24901 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24902 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24903 ins_encode %{
24904 uint masklen = Matcher::vector_length(this);
24905 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24906 %}
24907 ins_pipe( pipe_slow );
24908 %}
24909
24910 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24911 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24912 (Matcher::vector_length(n) == 16) ||
24913 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24914 match(Set dst (XorVMask src (MaskAll cnt)));
24915 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24916 ins_encode %{
24917 uint masklen = Matcher::vector_length(this);
24918 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24919 %}
24920 ins_pipe( pipe_slow );
24921 %}
24922
24923 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24924 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24925 match(Set dst (VectorLongToMask src));
24926 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24927 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24928 ins_encode %{
24929 int mask_len = Matcher::vector_length(this);
24930 int vec_enc = vector_length_encoding(mask_len);
24931 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24932 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24933 %}
24934 ins_pipe( pipe_slow );
24935 %}
24936
24937
24938 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24939 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24940 match(Set dst (VectorLongToMask src));
24941 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24942 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24943 ins_encode %{
24944 int mask_len = Matcher::vector_length(this);
24945 assert(mask_len <= 32, "invalid mask length");
24946 int vec_enc = vector_length_encoding(mask_len);
24947 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24948 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24949 %}
24950 ins_pipe( pipe_slow );
24951 %}
24952
24953 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24954 predicate(n->bottom_type()->isa_vectmask());
24955 match(Set dst (VectorLongToMask src));
24956 format %{ "long_to_mask_evex $dst, $src\t!" %}
24957 ins_encode %{
24958 __ kmov($dst$$KRegister, $src$$Register);
24959 %}
24960 ins_pipe( pipe_slow );
24961 %}
24962
24963 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24964 match(Set dst (AndVMask src1 src2));
24965 match(Set dst (OrVMask src1 src2));
24966 match(Set dst (XorVMask src1 src2));
24967 effect(TEMP kscratch);
24968 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24969 ins_encode %{
24970 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24971 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24972 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24973 uint masklen = Matcher::vector_length(this);
24974 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24975 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24976 %}
24977 ins_pipe( pipe_slow );
24978 %}
24979
24980 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24981 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24982 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24983 ins_encode %{
24984 int vlen_enc = vector_length_encoding(this);
24985 BasicType bt = Matcher::vector_element_basic_type(this);
24986 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24987 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24988 %}
24989 ins_pipe( pipe_slow );
24990 %}
24991
24992 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24993 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24994 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24995 ins_encode %{
24996 int vlen_enc = vector_length_encoding(this);
24997 BasicType bt = Matcher::vector_element_basic_type(this);
24998 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24999 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25000 %}
25001 ins_pipe( pipe_slow );
25002 %}
25003
25004 instruct castMM(kReg dst)
25005 %{
25006 match(Set dst (CastVV dst));
25007
25008 size(0);
25009 format %{ "# castVV of $dst" %}
25010 ins_encode(/* empty encoding */);
25011 ins_cost(0);
25012 ins_pipe(empty);
25013 %}
25014
25015 instruct castVV(vec dst)
25016 %{
25017 match(Set dst (CastVV dst));
25018
25019 size(0);
25020 format %{ "# castVV of $dst" %}
25021 ins_encode(/* empty encoding */);
25022 ins_cost(0);
25023 ins_pipe(empty);
25024 %}
25025
25026 instruct castVVLeg(legVec dst)
25027 %{
25028 match(Set dst (CastVV dst));
25029
25030 size(0);
25031 format %{ "# castVV of $dst" %}
25032 ins_encode(/* empty encoding */);
25033 ins_cost(0);
25034 ins_pipe(empty);
25035 %}
25036
25037 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25038 %{
25039 match(Set dst (IsInfiniteF src));
25040 effect(TEMP ktmp, KILL cr);
25041 format %{ "float_class_check $dst, $src" %}
25042 ins_encode %{
25043 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25044 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25045 %}
25046 ins_pipe(pipe_slow);
25047 %}
25048
25049 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25050 %{
25051 match(Set dst (IsInfiniteD src));
25052 effect(TEMP ktmp, KILL cr);
25053 format %{ "double_class_check $dst, $src" %}
25054 ins_encode %{
25055 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25056 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25057 %}
25058 ins_pipe(pipe_slow);
25059 %}
25060
25061 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25062 %{
25063 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25064 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25065 match(Set dst (SaturatingAddV src1 src2));
25066 match(Set dst (SaturatingSubV src1 src2));
25067 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25068 ins_encode %{
25069 int vlen_enc = vector_length_encoding(this);
25070 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25071 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25072 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25073 %}
25074 ins_pipe(pipe_slow);
25075 %}
25076
25077 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25078 %{
25079 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25080 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25081 match(Set dst (SaturatingAddV src1 src2));
25082 match(Set dst (SaturatingSubV src1 src2));
25083 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25084 ins_encode %{
25085 int vlen_enc = vector_length_encoding(this);
25086 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25087 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25088 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25089 %}
25090 ins_pipe(pipe_slow);
25091 %}
25092
25093 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25094 %{
25095 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25096 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25097 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25098 match(Set dst (SaturatingAddV src1 src2));
25099 match(Set dst (SaturatingSubV src1 src2));
25100 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25101 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25102 ins_encode %{
25103 int vlen_enc = vector_length_encoding(this);
25104 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25105 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25106 $src1$$XMMRegister, $src2$$XMMRegister,
25107 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25108 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25109 %}
25110 ins_pipe(pipe_slow);
25111 %}
25112
25113 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25114 %{
25115 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25116 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25117 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25118 match(Set dst (SaturatingAddV src1 src2));
25119 match(Set dst (SaturatingSubV src1 src2));
25120 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25121 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25122 ins_encode %{
25123 int vlen_enc = vector_length_encoding(this);
25124 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25125 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25126 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25127 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25128 %}
25129 ins_pipe(pipe_slow);
25130 %}
25131
25132 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25133 %{
25134 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25135 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25136 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25137 match(Set dst (SaturatingAddV src1 src2));
25138 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25139 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25140 ins_encode %{
25141 int vlen_enc = vector_length_encoding(this);
25142 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25143 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25144 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25145 %}
25146 ins_pipe(pipe_slow);
25147 %}
25148
25149 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25150 %{
25151 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25152 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25153 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25154 match(Set dst (SaturatingAddV src1 src2));
25155 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25156 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25157 ins_encode %{
25158 int vlen_enc = vector_length_encoding(this);
25159 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25160 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25161 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25162 %}
25163 ins_pipe(pipe_slow);
25164 %}
25165
25166 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25167 %{
25168 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25169 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25170 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25171 match(Set dst (SaturatingSubV src1 src2));
25172 effect(TEMP ktmp);
25173 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25174 ins_encode %{
25175 int vlen_enc = vector_length_encoding(this);
25176 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25177 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25178 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25179 %}
25180 ins_pipe(pipe_slow);
25181 %}
25182
25183 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25184 %{
25185 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25186 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25187 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25188 match(Set dst (SaturatingSubV src1 src2));
25189 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25190 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25191 ins_encode %{
25192 int vlen_enc = vector_length_encoding(this);
25193 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25194 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25195 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25196 %}
25197 ins_pipe(pipe_slow);
25198 %}
25199
25200 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25201 %{
25202 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25203 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25204 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25205 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25206 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25207 ins_encode %{
25208 int vlen_enc = vector_length_encoding(this);
25209 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25210 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25211 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25212 %}
25213 ins_pipe(pipe_slow);
25214 %}
25215
25216 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25217 %{
25218 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25219 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25220 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25221 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25222 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25223 ins_encode %{
25224 int vlen_enc = vector_length_encoding(this);
25225 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25226 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25227 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25228 %}
25229 ins_pipe(pipe_slow);
25230 %}
25231
25232 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25233 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25234 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25235 match(Set dst (SaturatingAddV (Binary dst src) mask));
25236 match(Set dst (SaturatingSubV (Binary dst src) mask));
25237 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25238 ins_encode %{
25239 int vlen_enc = vector_length_encoding(this);
25240 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25241 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25242 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25243 %}
25244 ins_pipe( pipe_slow );
25245 %}
25246
25247 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25248 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25249 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25250 match(Set dst (SaturatingAddV (Binary dst src) mask));
25251 match(Set dst (SaturatingSubV (Binary dst src) mask));
25252 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25253 ins_encode %{
25254 int vlen_enc = vector_length_encoding(this);
25255 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25256 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25257 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25258 %}
25259 ins_pipe( pipe_slow );
25260 %}
25261
25262 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25263 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25264 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25265 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25266 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25267 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25268 ins_encode %{
25269 int vlen_enc = vector_length_encoding(this);
25270 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25271 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25272 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25273 %}
25274 ins_pipe( pipe_slow );
25275 %}
25276
25277 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25278 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25279 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25280 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25281 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25282 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25283 ins_encode %{
25284 int vlen_enc = vector_length_encoding(this);
25285 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25286 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25287 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25288 %}
25289 ins_pipe( pipe_slow );
25290 %}
25291
25292 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25293 %{
25294 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25295 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25296 ins_encode %{
25297 int vlen_enc = vector_length_encoding(this);
25298 BasicType bt = Matcher::vector_element_basic_type(this);
25299 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25300 %}
25301 ins_pipe(pipe_slow);
25302 %}
25303
25304 instruct reinterpretS2HF(regF dst, rRegI src)
25305 %{
25306 match(Set dst (ReinterpretS2HF src));
25307 format %{ "evmovw $dst, $src" %}
25308 ins_encode %{
25309 __ evmovw($dst$$XMMRegister, $src$$Register);
25310 %}
25311 ins_pipe(pipe_slow);
25312 %}
25313
25314 instruct reinterpretHF2S(rRegI dst, regF src)
25315 %{
25316 match(Set dst (ReinterpretHF2S src));
25317 format %{ "evmovw $dst, $src" %}
25318 ins_encode %{
25319 __ evmovw($dst$$Register, $src$$XMMRegister);
25320 %}
25321 ins_pipe(pipe_slow);
25322 %}
25323
25324 instruct convF2HFAndS2HF(regF dst, regF src)
25325 %{
25326 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25327 format %{ "convF2HFAndS2HF $dst, $src" %}
25328 ins_encode %{
25329 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25330 %}
25331 ins_pipe(pipe_slow);
25332 %}
25333
25334 instruct convHF2SAndHF2F(regF dst, regF src)
25335 %{
25336 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25337 format %{ "convHF2SAndHF2F $dst, $src" %}
25338 ins_encode %{
25339 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25340 %}
25341 ins_pipe(pipe_slow);
25342 %}
25343
25344 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25345 %{
25346 match(Set dst (SqrtHF src));
25347 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25348 ins_encode %{
25349 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25350 %}
25351 ins_pipe(pipe_slow);
25352 %}
25353
25354 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25355 %{
25356 match(Set dst (AddHF src1 src2));
25357 match(Set dst (DivHF src1 src2));
25358 match(Set dst (MulHF src1 src2));
25359 match(Set dst (SubHF src1 src2));
25360 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25361 ins_encode %{
25362 int opcode = this->ideal_Opcode();
25363 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25364 %}
25365 ins_pipe(pipe_slow);
25366 %}
25367
25368 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25369 %{
25370 predicate(VM_Version::supports_avx10_2());
25371 match(Set dst (MaxHF src1 src2));
25372 match(Set dst (MinHF src1 src2));
25373
25374 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25375 ins_encode %{
25376 int opcode = this->ideal_Opcode();
25377 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25378 %}
25379 ins_pipe( pipe_slow );
25380 %}
25381
25382 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25383 %{
25384 predicate(!VM_Version::supports_avx10_2());
25385 match(Set dst (MaxHF src1 src2));
25386 match(Set dst (MinHF src1 src2));
25387 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25388
25389 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25390 ins_encode %{
25391 int opcode = this->ideal_Opcode();
25392 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25393 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25394 %}
25395 ins_pipe( pipe_slow );
25396 %}
25397
25398 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25399 %{
25400 match(Set dst (FmaHF src2 (Binary dst src1)));
25401 effect(DEF dst);
25402 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25403 ins_encode %{
25404 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25405 %}
25406 ins_pipe( pipe_slow );
25407 %}
25408
25409
25410 instruct vector_sqrt_HF_reg(vec dst, vec src)
25411 %{
25412 match(Set dst (SqrtVHF src));
25413 format %{ "vector_sqrt_fp16 $dst, $src" %}
25414 ins_encode %{
25415 int vlen_enc = vector_length_encoding(this);
25416 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25417 %}
25418 ins_pipe(pipe_slow);
25419 %}
25420
25421 instruct vector_sqrt_HF_mem(vec dst, memory src)
25422 %{
25423 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25424 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25425 ins_encode %{
25426 int vlen_enc = vector_length_encoding(this);
25427 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25428 %}
25429 ins_pipe(pipe_slow);
25430 %}
25431
25432 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25433 %{
25434 match(Set dst (AddVHF src1 src2));
25435 match(Set dst (DivVHF src1 src2));
25436 match(Set dst (MulVHF src1 src2));
25437 match(Set dst (SubVHF src1 src2));
25438 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25439 ins_encode %{
25440 int vlen_enc = vector_length_encoding(this);
25441 int opcode = this->ideal_Opcode();
25442 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25443 %}
25444 ins_pipe(pipe_slow);
25445 %}
25446
25447
25448 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25449 %{
25450 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25451 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25452 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25453 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25454 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25455 ins_encode %{
25456 int vlen_enc = vector_length_encoding(this);
25457 int opcode = this->ideal_Opcode();
25458 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25459 %}
25460 ins_pipe(pipe_slow);
25461 %}
25462
25463 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25464 %{
25465 match(Set dst (FmaVHF src2 (Binary dst src1)));
25466 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25467 ins_encode %{
25468 int vlen_enc = vector_length_encoding(this);
25469 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25470 %}
25471 ins_pipe( pipe_slow );
25472 %}
25473
25474 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25475 %{
25476 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25477 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25478 ins_encode %{
25479 int vlen_enc = vector_length_encoding(this);
25480 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25481 %}
25482 ins_pipe( pipe_slow );
25483 %}
25484
25485 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25486 %{
25487 predicate(VM_Version::supports_avx10_2());
25488 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25489 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25490 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25491 ins_encode %{
25492 int vlen_enc = vector_length_encoding(this);
25493 int opcode = this->ideal_Opcode();
25494 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25495 k0, vlen_enc);
25496 %}
25497 ins_pipe( pipe_slow );
25498 %}
25499
25500 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25501 %{
25502 predicate(VM_Version::supports_avx10_2());
25503 match(Set dst (MinVHF src1 src2));
25504 match(Set dst (MaxVHF src1 src2));
25505 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25506 ins_encode %{
25507 int vlen_enc = vector_length_encoding(this);
25508 int opcode = this->ideal_Opcode();
25509 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25510 k0, vlen_enc);
25511 %}
25512 ins_pipe( pipe_slow );
25513 %}
25514
25515 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25516 %{
25517 predicate(!VM_Version::supports_avx10_2());
25518 match(Set dst (MinVHF src1 src2));
25519 match(Set dst (MaxVHF src1 src2));
25520 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25521 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25522 ins_encode %{
25523 int vlen_enc = vector_length_encoding(this);
25524 int opcode = this->ideal_Opcode();
25525 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25526 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25527 %}
25528 ins_pipe( pipe_slow );
25529 %}
25530
25531 //----------PEEPHOLE RULES-----------------------------------------------------
25532 // These must follow all instruction definitions as they use the names
25533 // defined in the instructions definitions.
25534 //
25535 // peeppredicate ( rule_predicate );
25536 // // the predicate unless which the peephole rule will be ignored
25537 //
25538 // peepmatch ( root_instr_name [preceding_instruction]* );
25539 //
25540 // peepprocedure ( procedure_name );
25541 // // provide a procedure name to perform the optimization, the procedure should
25542 // // reside in the architecture dependent peephole file, the method has the
25543 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25544 // // with the arguments being the basic block, the current node index inside the
25545 // // block, the register allocator, the functions upon invoked return a new node
25546 // // defined in peepreplace, and the rules of the nodes appearing in the
25547 // // corresponding peepmatch, the function return true if successful, else
25548 // // return false
25549 //
25550 // peepconstraint %{
25551 // (instruction_number.operand_name relational_op instruction_number.operand_name
25552 // [, ...] );
25553 // // instruction numbers are zero-based using left to right order in peepmatch
25554 //
25555 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25556 // // provide an instruction_number.operand_name for each operand that appears
25557 // // in the replacement instruction's match rule
25558 //
25559 // ---------VM FLAGS---------------------------------------------------------
25560 //
25561 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25562 //
25563 // Each peephole rule is given an identifying number starting with zero and
25564 // increasing by one in the order seen by the parser. An individual peephole
25565 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25566 // on the command-line.
25567 //
25568 // ---------CURRENT LIMITATIONS----------------------------------------------
25569 //
25570 // Only transformations inside a basic block (do we need more for peephole)
25571 //
25572 // ---------EXAMPLE----------------------------------------------------------
25573 //
25574 // // pertinent parts of existing instructions in architecture description
25575 // instruct movI(rRegI dst, rRegI src)
25576 // %{
25577 // match(Set dst (CopyI src));
25578 // %}
25579 //
25580 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25581 // %{
25582 // match(Set dst (AddI dst src));
25583 // effect(KILL cr);
25584 // %}
25585 //
25586 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25587 // %{
25588 // match(Set dst (AddI dst src));
25589 // %}
25590 //
25591 // 1. Simple replacement
25592 // - Only match adjacent instructions in same basic block
25593 // - Only equality constraints
25594 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25595 // - Only one replacement instruction
25596 //
25597 // // Change (inc mov) to lea
25598 // peephole %{
25599 // // lea should only be emitted when beneficial
25600 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25601 // // increment preceded by register-register move
25602 // peepmatch ( incI_rReg movI );
25603 // // require that the destination register of the increment
25604 // // match the destination register of the move
25605 // peepconstraint ( 0.dst == 1.dst );
25606 // // construct a replacement instruction that sets
25607 // // the destination to ( move's source register + one )
25608 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25609 // %}
25610 //
25611 // 2. Procedural replacement
25612 // - More flexible finding relevent nodes
25613 // - More flexible constraints
25614 // - More flexible transformations
25615 // - May utilise architecture-dependent API more effectively
25616 // - Currently only one replacement instruction due to adlc parsing capabilities
25617 //
25618 // // Change (inc mov) to lea
25619 // peephole %{
25620 // // lea should only be emitted when beneficial
25621 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25622 // // the rule numbers of these nodes inside are passed into the function below
25623 // peepmatch ( incI_rReg movI );
25624 // // the method that takes the responsibility of transformation
25625 // peepprocedure ( inc_mov_to_lea );
25626 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25627 // // node is passed into the function above
25628 // peepreplace ( leaI_rReg_immI() );
25629 // %}
25630
25631 // These instructions is not matched by the matcher but used by the peephole
25632 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25633 %{
25634 predicate(false);
25635 match(Set dst (AddI src1 src2));
25636 format %{ "leal $dst, [$src1 + $src2]" %}
25637 ins_encode %{
25638 Register dst = $dst$$Register;
25639 Register src1 = $src1$$Register;
25640 Register src2 = $src2$$Register;
25641 if (src1 != rbp && src1 != r13) {
25642 __ leal(dst, Address(src1, src2, Address::times_1));
25643 } else {
25644 assert(src2 != rbp && src2 != r13, "");
25645 __ leal(dst, Address(src2, src1, Address::times_1));
25646 }
25647 %}
25648 ins_pipe(ialu_reg_reg);
25649 %}
25650
25651 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25652 %{
25653 predicate(false);
25654 match(Set dst (AddI src1 src2));
25655 format %{ "leal $dst, [$src1 + $src2]" %}
25656 ins_encode %{
25657 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25658 %}
25659 ins_pipe(ialu_reg_reg);
25660 %}
25661
25662 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25663 %{
25664 predicate(false);
25665 match(Set dst (LShiftI src shift));
25666 format %{ "leal $dst, [$src << $shift]" %}
25667 ins_encode %{
25668 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25669 Register src = $src$$Register;
25670 if (scale == Address::times_2 && src != rbp && src != r13) {
25671 __ leal($dst$$Register, Address(src, src, Address::times_1));
25672 } else {
25673 __ leal($dst$$Register, Address(noreg, src, scale));
25674 }
25675 %}
25676 ins_pipe(ialu_reg_reg);
25677 %}
25678
25679 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25680 %{
25681 predicate(false);
25682 match(Set dst (AddL src1 src2));
25683 format %{ "leaq $dst, [$src1 + $src2]" %}
25684 ins_encode %{
25685 Register dst = $dst$$Register;
25686 Register src1 = $src1$$Register;
25687 Register src2 = $src2$$Register;
25688 if (src1 != rbp && src1 != r13) {
25689 __ leaq(dst, Address(src1, src2, Address::times_1));
25690 } else {
25691 assert(src2 != rbp && src2 != r13, "");
25692 __ leaq(dst, Address(src2, src1, Address::times_1));
25693 }
25694 %}
25695 ins_pipe(ialu_reg_reg);
25696 %}
25697
25698 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25699 %{
25700 predicate(false);
25701 match(Set dst (AddL src1 src2));
25702 format %{ "leaq $dst, [$src1 + $src2]" %}
25703 ins_encode %{
25704 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25705 %}
25706 ins_pipe(ialu_reg_reg);
25707 %}
25708
25709 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25710 %{
25711 predicate(false);
25712 match(Set dst (LShiftL src shift));
25713 format %{ "leaq $dst, [$src << $shift]" %}
25714 ins_encode %{
25715 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25716 Register src = $src$$Register;
25717 if (scale == Address::times_2 && src != rbp && src != r13) {
25718 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25719 } else {
25720 __ leaq($dst$$Register, Address(noreg, src, scale));
25721 }
25722 %}
25723 ins_pipe(ialu_reg_reg);
25724 %}
25725
25726 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25727 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25728 // processors with at least partial ALU support for lea
25729 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25730 // beneficial for processors with full ALU support
25731 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25732
25733 peephole
25734 %{
25735 peeppredicate(VM_Version::supports_fast_2op_lea());
25736 peepmatch (addI_rReg);
25737 peepprocedure (lea_coalesce_reg);
25738 peepreplace (leaI_rReg_rReg_peep());
25739 %}
25740
25741 peephole
25742 %{
25743 peeppredicate(VM_Version::supports_fast_2op_lea());
25744 peepmatch (addI_rReg_imm);
25745 peepprocedure (lea_coalesce_imm);
25746 peepreplace (leaI_rReg_immI_peep());
25747 %}
25748
25749 peephole
25750 %{
25751 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25752 VM_Version::is_intel_cascade_lake());
25753 peepmatch (incI_rReg);
25754 peepprocedure (lea_coalesce_imm);
25755 peepreplace (leaI_rReg_immI_peep());
25756 %}
25757
25758 peephole
25759 %{
25760 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25761 VM_Version::is_intel_cascade_lake());
25762 peepmatch (decI_rReg);
25763 peepprocedure (lea_coalesce_imm);
25764 peepreplace (leaI_rReg_immI_peep());
25765 %}
25766
25767 peephole
25768 %{
25769 peeppredicate(VM_Version::supports_fast_2op_lea());
25770 peepmatch (salI_rReg_immI2);
25771 peepprocedure (lea_coalesce_imm);
25772 peepreplace (leaI_rReg_immI2_peep());
25773 %}
25774
25775 peephole
25776 %{
25777 peeppredicate(VM_Version::supports_fast_2op_lea());
25778 peepmatch (addL_rReg);
25779 peepprocedure (lea_coalesce_reg);
25780 peepreplace (leaL_rReg_rReg_peep());
25781 %}
25782
25783 peephole
25784 %{
25785 peeppredicate(VM_Version::supports_fast_2op_lea());
25786 peepmatch (addL_rReg_imm);
25787 peepprocedure (lea_coalesce_imm);
25788 peepreplace (leaL_rReg_immL32_peep());
25789 %}
25790
25791 peephole
25792 %{
25793 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25794 VM_Version::is_intel_cascade_lake());
25795 peepmatch (incL_rReg);
25796 peepprocedure (lea_coalesce_imm);
25797 peepreplace (leaL_rReg_immL32_peep());
25798 %}
25799
25800 peephole
25801 %{
25802 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25803 VM_Version::is_intel_cascade_lake());
25804 peepmatch (decL_rReg);
25805 peepprocedure (lea_coalesce_imm);
25806 peepreplace (leaL_rReg_immL32_peep());
25807 %}
25808
25809 peephole
25810 %{
25811 peeppredicate(VM_Version::supports_fast_2op_lea());
25812 peepmatch (salL_rReg_immI2);
25813 peepprocedure (lea_coalesce_imm);
25814 peepreplace (leaL_rReg_immI2_peep());
25815 %}
25816
25817 peephole
25818 %{
25819 peepmatch (leaPCompressedOopOffset);
25820 peepprocedure (lea_remove_redundant);
25821 %}
25822
25823 peephole
25824 %{
25825 peepmatch (leaP8Narrow);
25826 peepprocedure (lea_remove_redundant);
25827 %}
25828
25829 peephole
25830 %{
25831 peepmatch (leaP32Narrow);
25832 peepprocedure (lea_remove_redundant);
25833 %}
25834
25835 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25836 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25837
25838 //int variant
25839 peephole
25840 %{
25841 peepmatch (testI_reg);
25842 peepprocedure (test_may_remove);
25843 %}
25844
25845 //long variant
25846 peephole
25847 %{
25848 peepmatch (testL_reg);
25849 peepprocedure (test_may_remove);
25850 %}
25851
25852
25853 //----------SMARTSPILL RULES---------------------------------------------------
25854 // These must follow all instruction definitions as they use the names
25855 // defined in the instructions definitions.