1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Common Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // For pre EVEX enabled architectures:
68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
69 // For EVEX enabled architectures:
70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
71 //
72 // Linux ABI: No register preserved across function calls
73 // XMM0-XMM7 might hold parameters
74 // Windows ABI: XMM6-XMM15 preserved across function calls
75 // XMM0-XMM3 might hold parameters
76
77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
93
94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
110
111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
127
128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
144
145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
161
162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
178
179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
195
196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
212
213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
229
230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
246
247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
263
264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
280
281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
297
298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
314
315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
331
332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
348
349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
365
366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
382
383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
399
400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
416
417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
433
434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
450
451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
467
468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
484
485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
501
502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
518
519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
535
536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
552
553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
569
570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
586
587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
603
604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
620
621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
622
623 // AVX3 Mask Registers.
624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
626
627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
629
630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
632
633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
635
636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
638
639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
641
642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
644
645
646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
678
679 alloc_class chunk2(K7, K7_H,
680 K6, K6_H,
681 K5, K5_H,
682 K4, K4_H,
683 K3, K3_H,
684 K2, K2_H,
685 K1, K1_H);
686
687 reg_class vectmask_reg(K1, K1_H,
688 K2, K2_H,
689 K3, K3_H,
690 K4, K4_H,
691 K5, K5_H,
692 K6, K6_H,
693 K7, K7_H);
694
695 reg_class vectmask_reg_K1(K1, K1_H);
696 reg_class vectmask_reg_K2(K2, K2_H);
697 reg_class vectmask_reg_K3(K3, K3_H);
698 reg_class vectmask_reg_K4(K4, K4_H);
699 reg_class vectmask_reg_K5(K5, K5_H);
700 reg_class vectmask_reg_K6(K6, K6_H);
701 reg_class vectmask_reg_K7(K7, K7_H);
702
703 // flags allocation class should be last.
704 alloc_class chunk3(RFLAGS);
705
706
707 // Singleton class for condition codes
708 reg_class int_flags(RFLAGS);
709
710 // Class for pre evex float registers
711 reg_class float_reg_legacy(XMM0,
712 XMM1,
713 XMM2,
714 XMM3,
715 XMM4,
716 XMM5,
717 XMM6,
718 XMM7,
719 XMM8,
720 XMM9,
721 XMM10,
722 XMM11,
723 XMM12,
724 XMM13,
725 XMM14,
726 XMM15);
727
728 // Class for evex float registers
729 reg_class float_reg_evex(XMM0,
730 XMM1,
731 XMM2,
732 XMM3,
733 XMM4,
734 XMM5,
735 XMM6,
736 XMM7,
737 XMM8,
738 XMM9,
739 XMM10,
740 XMM11,
741 XMM12,
742 XMM13,
743 XMM14,
744 XMM15,
745 XMM16,
746 XMM17,
747 XMM18,
748 XMM19,
749 XMM20,
750 XMM21,
751 XMM22,
752 XMM23,
753 XMM24,
754 XMM25,
755 XMM26,
756 XMM27,
757 XMM28,
758 XMM29,
759 XMM30,
760 XMM31);
761
762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
764
765 // Class for pre evex double registers
766 reg_class double_reg_legacy(XMM0, XMM0b,
767 XMM1, XMM1b,
768 XMM2, XMM2b,
769 XMM3, XMM3b,
770 XMM4, XMM4b,
771 XMM5, XMM5b,
772 XMM6, XMM6b,
773 XMM7, XMM7b,
774 XMM8, XMM8b,
775 XMM9, XMM9b,
776 XMM10, XMM10b,
777 XMM11, XMM11b,
778 XMM12, XMM12b,
779 XMM13, XMM13b,
780 XMM14, XMM14b,
781 XMM15, XMM15b);
782
783 // Class for evex double registers
784 reg_class double_reg_evex(XMM0, XMM0b,
785 XMM1, XMM1b,
786 XMM2, XMM2b,
787 XMM3, XMM3b,
788 XMM4, XMM4b,
789 XMM5, XMM5b,
790 XMM6, XMM6b,
791 XMM7, XMM7b,
792 XMM8, XMM8b,
793 XMM9, XMM9b,
794 XMM10, XMM10b,
795 XMM11, XMM11b,
796 XMM12, XMM12b,
797 XMM13, XMM13b,
798 XMM14, XMM14b,
799 XMM15, XMM15b,
800 XMM16, XMM16b,
801 XMM17, XMM17b,
802 XMM18, XMM18b,
803 XMM19, XMM19b,
804 XMM20, XMM20b,
805 XMM21, XMM21b,
806 XMM22, XMM22b,
807 XMM23, XMM23b,
808 XMM24, XMM24b,
809 XMM25, XMM25b,
810 XMM26, XMM26b,
811 XMM27, XMM27b,
812 XMM28, XMM28b,
813 XMM29, XMM29b,
814 XMM30, XMM30b,
815 XMM31, XMM31b);
816
817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
819
820 // Class for pre evex 32bit vector registers
821 reg_class vectors_reg_legacy(XMM0,
822 XMM1,
823 XMM2,
824 XMM3,
825 XMM4,
826 XMM5,
827 XMM6,
828 XMM7,
829 XMM8,
830 XMM9,
831 XMM10,
832 XMM11,
833 XMM12,
834 XMM13,
835 XMM14,
836 XMM15);
837
838 // Class for evex 32bit vector registers
839 reg_class vectors_reg_evex(XMM0,
840 XMM1,
841 XMM2,
842 XMM3,
843 XMM4,
844 XMM5,
845 XMM6,
846 XMM7,
847 XMM8,
848 XMM9,
849 XMM10,
850 XMM11,
851 XMM12,
852 XMM13,
853 XMM14,
854 XMM15,
855 XMM16,
856 XMM17,
857 XMM18,
858 XMM19,
859 XMM20,
860 XMM21,
861 XMM22,
862 XMM23,
863 XMM24,
864 XMM25,
865 XMM26,
866 XMM27,
867 XMM28,
868 XMM29,
869 XMM30,
870 XMM31);
871
872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
874
875 // Class for all 64bit vector registers
876 reg_class vectord_reg_legacy(XMM0, XMM0b,
877 XMM1, XMM1b,
878 XMM2, XMM2b,
879 XMM3, XMM3b,
880 XMM4, XMM4b,
881 XMM5, XMM5b,
882 XMM6, XMM6b,
883 XMM7, XMM7b,
884 XMM8, XMM8b,
885 XMM9, XMM9b,
886 XMM10, XMM10b,
887 XMM11, XMM11b,
888 XMM12, XMM12b,
889 XMM13, XMM13b,
890 XMM14, XMM14b,
891 XMM15, XMM15b);
892
893 // Class for all 64bit vector registers
894 reg_class vectord_reg_evex(XMM0, XMM0b,
895 XMM1, XMM1b,
896 XMM2, XMM2b,
897 XMM3, XMM3b,
898 XMM4, XMM4b,
899 XMM5, XMM5b,
900 XMM6, XMM6b,
901 XMM7, XMM7b,
902 XMM8, XMM8b,
903 XMM9, XMM9b,
904 XMM10, XMM10b,
905 XMM11, XMM11b,
906 XMM12, XMM12b,
907 XMM13, XMM13b,
908 XMM14, XMM14b,
909 XMM15, XMM15b,
910 XMM16, XMM16b,
911 XMM17, XMM17b,
912 XMM18, XMM18b,
913 XMM19, XMM19b,
914 XMM20, XMM20b,
915 XMM21, XMM21b,
916 XMM22, XMM22b,
917 XMM23, XMM23b,
918 XMM24, XMM24b,
919 XMM25, XMM25b,
920 XMM26, XMM26b,
921 XMM27, XMM27b,
922 XMM28, XMM28b,
923 XMM29, XMM29b,
924 XMM30, XMM30b,
925 XMM31, XMM31b);
926
927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
929
930 // Class for all 128bit vector registers
931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
932 XMM1, XMM1b, XMM1c, XMM1d,
933 XMM2, XMM2b, XMM2c, XMM2d,
934 XMM3, XMM3b, XMM3c, XMM3d,
935 XMM4, XMM4b, XMM4c, XMM4d,
936 XMM5, XMM5b, XMM5c, XMM5d,
937 XMM6, XMM6b, XMM6c, XMM6d,
938 XMM7, XMM7b, XMM7c, XMM7d,
939 XMM8, XMM8b, XMM8c, XMM8d,
940 XMM9, XMM9b, XMM9c, XMM9d,
941 XMM10, XMM10b, XMM10c, XMM10d,
942 XMM11, XMM11b, XMM11c, XMM11d,
943 XMM12, XMM12b, XMM12c, XMM12d,
944 XMM13, XMM13b, XMM13c, XMM13d,
945 XMM14, XMM14b, XMM14c, XMM14d,
946 XMM15, XMM15b, XMM15c, XMM15d);
947
948 // Class for all 128bit vector registers
949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
950 XMM1, XMM1b, XMM1c, XMM1d,
951 XMM2, XMM2b, XMM2c, XMM2d,
952 XMM3, XMM3b, XMM3c, XMM3d,
953 XMM4, XMM4b, XMM4c, XMM4d,
954 XMM5, XMM5b, XMM5c, XMM5d,
955 XMM6, XMM6b, XMM6c, XMM6d,
956 XMM7, XMM7b, XMM7c, XMM7d,
957 XMM8, XMM8b, XMM8c, XMM8d,
958 XMM9, XMM9b, XMM9c, XMM9d,
959 XMM10, XMM10b, XMM10c, XMM10d,
960 XMM11, XMM11b, XMM11c, XMM11d,
961 XMM12, XMM12b, XMM12c, XMM12d,
962 XMM13, XMM13b, XMM13c, XMM13d,
963 XMM14, XMM14b, XMM14c, XMM14d,
964 XMM15, XMM15b, XMM15c, XMM15d,
965 XMM16, XMM16b, XMM16c, XMM16d,
966 XMM17, XMM17b, XMM17c, XMM17d,
967 XMM18, XMM18b, XMM18c, XMM18d,
968 XMM19, XMM19b, XMM19c, XMM19d,
969 XMM20, XMM20b, XMM20c, XMM20d,
970 XMM21, XMM21b, XMM21c, XMM21d,
971 XMM22, XMM22b, XMM22c, XMM22d,
972 XMM23, XMM23b, XMM23c, XMM23d,
973 XMM24, XMM24b, XMM24c, XMM24d,
974 XMM25, XMM25b, XMM25c, XMM25d,
975 XMM26, XMM26b, XMM26c, XMM26d,
976 XMM27, XMM27b, XMM27c, XMM27d,
977 XMM28, XMM28b, XMM28c, XMM28d,
978 XMM29, XMM29b, XMM29c, XMM29d,
979 XMM30, XMM30b, XMM30c, XMM30d,
980 XMM31, XMM31b, XMM31c, XMM31d);
981
982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
984
985 // Class for all 256bit vector registers
986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1002
1003 // Class for all 256bit vector registers
1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1036
1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1039
1040 // Class for all 512bit vector registers
1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1073
1074 // Class for restricted 512bit vector registers
1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1091
1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1094
1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1096 %}
1097
1098
1099 //----------SOURCE BLOCK-------------------------------------------------------
1100 // This is a block of C++ code which provides values, functions, and
1101 // definitions necessary in the rest of the architecture description
1102
1103 source_hpp %{
1104 // Header information of the source block.
1105 // Method declarations/definitions which are used outside
1106 // the ad-scope can conveniently be defined here.
1107 //
1108 // To keep related declarations/definitions/uses close together,
1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1110
1111 #include "runtime/vm_version.hpp"
1112
1113 class NativeJump;
1114
1115 class CallStubImpl {
1116
1117 //--------------------------------------------------------------
1118 //---< Used for optimization in Compile::shorten_branches >---
1119 //--------------------------------------------------------------
1120
1121 public:
1122 // Size of call trampoline stub.
1123 static uint size_call_trampoline() {
1124 return 0; // no call trampolines on this platform
1125 }
1126
1127 // number of relocations needed by a call trampoline stub
1128 static uint reloc_call_trampoline() {
1129 return 0; // no call trampolines on this platform
1130 }
1131 };
1132
1133 class HandlerImpl {
1134
1135 public:
1136
1137 static int emit_exception_handler(C2_MacroAssembler *masm);
1138 static int emit_deopt_handler(C2_MacroAssembler* masm);
1139
1140 static uint size_exception_handler() {
1141 // NativeCall instruction size is the same as NativeJump.
1142 // exception handler starts out as jump and can be patched to
1143 // a call be deoptimization. (4932387)
1144 // Note that this value is also credited (in output.cpp) to
1145 // the size of the code section.
1146 return NativeJump::instruction_size;
1147 }
1148
1149 static uint size_deopt_handler() {
1150 // three 5 byte instructions plus one move for unreachable address.
1151 return 15+3;
1152 }
1153 };
1154
1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
1156 switch(bytes) {
1157 case 4: // fall-through
1158 case 8: // fall-through
1159 case 16: return Assembler::AVX_128bit;
1160 case 32: return Assembler::AVX_256bit;
1161 case 64: return Assembler::AVX_512bit;
1162
1163 default: {
1164 ShouldNotReachHere();
1165 return Assembler::AVX_NoVec;
1166 }
1167 }
1168 }
1169
1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
1172 }
1173
1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
1175 uint def_idx = use->operand_index(opnd);
1176 Node* def = use->in(def_idx);
1177 return vector_length_encoding(def);
1178 }
1179
1180 static inline bool is_vector_popcount_predicate(BasicType bt) {
1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1183 }
1184
1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
1188 }
1189
1190 class Node::PD {
1191 public:
1192 enum NodeFlags {
1193 Flag_intel_jcc_erratum = Node::_last_flag << 1,
1194 Flag_sets_carry_flag = Node::_last_flag << 2,
1195 Flag_sets_parity_flag = Node::_last_flag << 3,
1196 Flag_sets_zero_flag = Node::_last_flag << 4,
1197 Flag_sets_overflow_flag = Node::_last_flag << 5,
1198 Flag_sets_sign_flag = Node::_last_flag << 6,
1199 Flag_clears_carry_flag = Node::_last_flag << 7,
1200 Flag_clears_parity_flag = Node::_last_flag << 8,
1201 Flag_clears_zero_flag = Node::_last_flag << 9,
1202 Flag_clears_overflow_flag = Node::_last_flag << 10,
1203 Flag_clears_sign_flag = Node::_last_flag << 11,
1204 _last_flag = Flag_clears_sign_flag
1205 };
1206 };
1207
1208 %} // end source_hpp
1209
1210 source %{
1211
1212 #include "opto/addnode.hpp"
1213 #include "c2_intelJccErratum_x86.hpp"
1214
1215 void PhaseOutput::pd_perform_mach_node_analysis() {
1216 if (VM_Version::has_intel_jcc_erratum()) {
1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1218 _buf_sizes._code += extra_padding;
1219 }
1220 }
1221
1222 int MachNode::pd_alignment_required() const {
1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
1225 return IntelJccErratum::largest_jcc_size() + 1;
1226 } else {
1227 return 1;
1228 }
1229 }
1230
1231 int MachNode::compute_padding(int current_offset) const {
1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1233 Compile* C = Compile::current();
1234 PhaseOutput* output = C->output();
1235 Block* block = output->block();
1236 int index = output->index();
1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1238 } else {
1239 return 0;
1240 }
1241 }
1242
1243 // Emit exception handler code.
1244 // Stuff framesize into a register and call a VM stub routine.
1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
1246
1247 // Note that the code buffer's insts_mark is always relative to insts.
1248 // That's why we must use the macroassembler to generate a handler.
1249 address base = __ start_a_stub(size_exception_handler());
1250 if (base == nullptr) {
1251 ciEnv::current()->record_failure("CodeCache is full");
1252 return 0; // CodeBuffer::expand failed
1253 }
1254 int offset = __ offset();
1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1257 __ end_a_stub();
1258 return offset;
1259 }
1260
1261 // Emit deopt handler code.
1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
1263
1264 // Note that the code buffer's insts_mark is always relative to insts.
1265 // That's why we must use the macroassembler to generate a handler.
1266 address base = __ start_a_stub(size_deopt_handler());
1267 if (base == nullptr) {
1268 ciEnv::current()->record_failure("CodeCache is full");
1269 return 0; // CodeBuffer::expand failed
1270 }
1271 int offset = __ offset();
1272
1273 address the_pc = (address) __ pc();
1274 Label next;
1275 // push a "the_pc" on the stack without destroying any registers
1276 // as they all may be live.
1277
1278 // push address of "next"
1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1280 __ bind(next);
1281 // adjust it so it matches "the_pc"
1282 __ subptr(Address(rsp, 0), __ offset() - offset);
1283
1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1286 __ end_a_stub();
1287 return offset;
1288 }
1289
1290 static Assembler::Width widthForType(BasicType bt) {
1291 if (bt == T_BYTE) {
1292 return Assembler::B;
1293 } else if (bt == T_SHORT) {
1294 return Assembler::W;
1295 } else if (bt == T_INT) {
1296 return Assembler::D;
1297 } else {
1298 assert(bt == T_LONG, "not a long: %s", type2name(bt));
1299 return Assembler::Q;
1300 }
1301 }
1302
1303 //=============================================================================
1304
1305 // Float masks come from different places depending on platform.
1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
1325
1326 //=============================================================================
1327 bool Matcher::match_rule_supported(int opcode) {
1328 if (!has_match_rule(opcode)) {
1329 return false; // no match rule present
1330 }
1331 switch (opcode) {
1332 case Op_AbsVL:
1333 case Op_StoreVectorScatter:
1334 if (UseAVX < 3) {
1335 return false;
1336 }
1337 break;
1338 case Op_PopCountI:
1339 case Op_PopCountL:
1340 if (!UsePopCountInstruction) {
1341 return false;
1342 }
1343 break;
1344 case Op_PopCountVI:
1345 if (UseAVX < 2) {
1346 return false;
1347 }
1348 break;
1349 case Op_CompressV:
1350 case Op_ExpandV:
1351 case Op_PopCountVL:
1352 if (UseAVX < 2) {
1353 return false;
1354 }
1355 break;
1356 case Op_MulVI:
1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1358 return false;
1359 }
1360 break;
1361 case Op_MulVL:
1362 if (UseSSE < 4) { // only with SSE4_1 or AVX
1363 return false;
1364 }
1365 break;
1366 case Op_MulReductionVL:
1367 if (VM_Version::supports_avx512dq() == false) {
1368 return false;
1369 }
1370 break;
1371 case Op_AbsVB:
1372 case Op_AbsVS:
1373 case Op_AbsVI:
1374 case Op_AddReductionVI:
1375 case Op_AndReductionV:
1376 case Op_OrReductionV:
1377 case Op_XorReductionV:
1378 if (UseSSE < 3) { // requires at least SSSE3
1379 return false;
1380 }
1381 break;
1382 case Op_MaxHF:
1383 case Op_MinHF:
1384 if (!VM_Version::supports_avx512vlbw()) {
1385 return false;
1386 } // fallthrough
1387 case Op_AddHF:
1388 case Op_DivHF:
1389 case Op_FmaHF:
1390 case Op_MulHF:
1391 case Op_ReinterpretS2HF:
1392 case Op_ReinterpretHF2S:
1393 case Op_SubHF:
1394 case Op_SqrtHF:
1395 if (!VM_Version::supports_avx512_fp16()) {
1396 return false;
1397 }
1398 break;
1399 case Op_VectorLoadShuffle:
1400 case Op_VectorRearrange:
1401 case Op_MulReductionVI:
1402 if (UseSSE < 4) { // requires at least SSE4
1403 return false;
1404 }
1405 break;
1406 case Op_IsInfiniteF:
1407 case Op_IsInfiniteD:
1408 if (!VM_Version::supports_avx512dq()) {
1409 return false;
1410 }
1411 break;
1412 case Op_SqrtVD:
1413 case Op_SqrtVF:
1414 case Op_VectorMaskCmp:
1415 case Op_VectorCastB2X:
1416 case Op_VectorCastS2X:
1417 case Op_VectorCastI2X:
1418 case Op_VectorCastL2X:
1419 case Op_VectorCastF2X:
1420 case Op_VectorCastD2X:
1421 case Op_VectorUCastB2X:
1422 case Op_VectorUCastS2X:
1423 case Op_VectorUCastI2X:
1424 case Op_VectorMaskCast:
1425 if (UseAVX < 1) { // enabled for AVX only
1426 return false;
1427 }
1428 break;
1429 case Op_PopulateIndex:
1430 if (UseAVX < 2) {
1431 return false;
1432 }
1433 break;
1434 case Op_RoundVF:
1435 if (UseAVX < 2) { // enabled for AVX2 only
1436 return false;
1437 }
1438 break;
1439 case Op_RoundVD:
1440 if (UseAVX < 3) {
1441 return false; // enabled for AVX3 only
1442 }
1443 break;
1444 case Op_CompareAndSwapL:
1445 case Op_CompareAndSwapP:
1446 break;
1447 case Op_StrIndexOf:
1448 if (!UseSSE42Intrinsics) {
1449 return false;
1450 }
1451 break;
1452 case Op_StrIndexOfChar:
1453 if (!UseSSE42Intrinsics) {
1454 return false;
1455 }
1456 break;
1457 case Op_OnSpinWait:
1458 if (VM_Version::supports_on_spin_wait() == false) {
1459 return false;
1460 }
1461 break;
1462 case Op_MulVB:
1463 case Op_LShiftVB:
1464 case Op_RShiftVB:
1465 case Op_URShiftVB:
1466 case Op_VectorInsert:
1467 case Op_VectorLoadMask:
1468 case Op_VectorStoreMask:
1469 case Op_VectorBlend:
1470 if (UseSSE < 4) {
1471 return false;
1472 }
1473 break;
1474 case Op_MaxD:
1475 case Op_MaxF:
1476 case Op_MinD:
1477 case Op_MinF:
1478 if (UseAVX < 1) { // enabled for AVX only
1479 return false;
1480 }
1481 break;
1482 case Op_CacheWB:
1483 case Op_CacheWBPreSync:
1484 case Op_CacheWBPostSync:
1485 if (!VM_Version::supports_data_cache_line_flush()) {
1486 return false;
1487 }
1488 break;
1489 case Op_ExtractB:
1490 case Op_ExtractL:
1491 case Op_ExtractI:
1492 case Op_RoundDoubleMode:
1493 if (UseSSE < 4) {
1494 return false;
1495 }
1496 break;
1497 case Op_RoundDoubleModeV:
1498 if (VM_Version::supports_avx() == false) {
1499 return false; // 128bit vroundpd is not available
1500 }
1501 break;
1502 case Op_LoadVectorGather:
1503 case Op_LoadVectorGatherMasked:
1504 if (UseAVX < 2) {
1505 return false;
1506 }
1507 break;
1508 case Op_FmaF:
1509 case Op_FmaD:
1510 case Op_FmaVD:
1511 case Op_FmaVF:
1512 if (!UseFMA) {
1513 return false;
1514 }
1515 break;
1516 case Op_MacroLogicV:
1517 if (UseAVX < 3 || !UseVectorMacroLogic) {
1518 return false;
1519 }
1520 break;
1521
1522 case Op_VectorCmpMasked:
1523 case Op_VectorMaskGen:
1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1525 return false;
1526 }
1527 break;
1528 case Op_VectorMaskFirstTrue:
1529 case Op_VectorMaskLastTrue:
1530 case Op_VectorMaskTrueCount:
1531 case Op_VectorMaskToLong:
1532 if (UseAVX < 1) {
1533 return false;
1534 }
1535 break;
1536 case Op_RoundF:
1537 case Op_RoundD:
1538 break;
1539 case Op_CopySignD:
1540 case Op_CopySignF:
1541 if (UseAVX < 3) {
1542 return false;
1543 }
1544 if (!VM_Version::supports_avx512vl()) {
1545 return false;
1546 }
1547 break;
1548 case Op_CompressBits:
1549 case Op_ExpandBits:
1550 if (!VM_Version::supports_bmi2()) {
1551 return false;
1552 }
1553 break;
1554 case Op_CompressM:
1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
1556 return false;
1557 }
1558 break;
1559 case Op_ConvF2HF:
1560 case Op_ConvHF2F:
1561 if (!VM_Version::supports_float16()) {
1562 return false;
1563 }
1564 break;
1565 case Op_VectorCastF2HF:
1566 case Op_VectorCastHF2F:
1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
1568 return false;
1569 }
1570 break;
1571 }
1572 return true; // Match rules are supported by default.
1573 }
1574
1575 //------------------------------------------------------------------------
1576
1577 static inline bool is_pop_count_instr_target(BasicType bt) {
1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1580 }
1581
1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
1583 return match_rule_supported_vector(opcode, vlen, bt);
1584 }
1585
1586 // Identify extra cases that we might want to provide match rules for vector nodes and
1587 // other intrinsics guarded with vector length (vlen) and element type (bt).
1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1589 if (!match_rule_supported(opcode)) {
1590 return false;
1591 }
1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1593 // * SSE2 supports 128bit vectors for all types;
1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1595 // * AVX2 supports 256bit vectors for all types;
1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1599 // And MaxVectorSize is taken into account as well.
1600 if (!vector_size_supported(bt, vlen)) {
1601 return false;
1602 }
1603 // Special cases which require vector length follow:
1604 // * implementation limitations
1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
1606 // * 128bit vroundpd instruction is present only in AVX1
1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1608 switch (opcode) {
1609 case Op_MaxVHF:
1610 case Op_MinVHF:
1611 if (!VM_Version::supports_avx512bw()) {
1612 return false;
1613 }
1614 case Op_AddVHF:
1615 case Op_DivVHF:
1616 case Op_FmaVHF:
1617 case Op_MulVHF:
1618 case Op_SubVHF:
1619 case Op_SqrtVHF:
1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1621 return false;
1622 }
1623 if (!VM_Version::supports_avx512_fp16()) {
1624 return false;
1625 }
1626 break;
1627 case Op_AbsVF:
1628 case Op_NegVF:
1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
1630 return false; // 512bit vandps and vxorps are not available
1631 }
1632 break;
1633 case Op_AbsVD:
1634 case Op_NegVD:
1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available
1637 }
1638 break;
1639 case Op_RotateRightV:
1640 case Op_RotateLeftV:
1641 if (bt != T_INT && bt != T_LONG) {
1642 return false;
1643 } // fallthrough
1644 case Op_MacroLogicV:
1645 if (!VM_Version::supports_evex() ||
1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
1647 return false;
1648 }
1649 break;
1650 case Op_ClearArray:
1651 case Op_VectorMaskGen:
1652 case Op_VectorCmpMasked:
1653 if (!VM_Version::supports_avx512bw()) {
1654 return false;
1655 }
1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
1657 return false;
1658 }
1659 break;
1660 case Op_LoadVectorMasked:
1661 case Op_StoreVectorMasked:
1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
1663 return false;
1664 }
1665 break;
1666 case Op_UMinV:
1667 case Op_UMaxV:
1668 if (UseAVX == 0) {
1669 return false;
1670 }
1671 break;
1672 case Op_MaxV:
1673 case Op_MinV:
1674 if (UseSSE < 4 && is_integral_type(bt)) {
1675 return false;
1676 }
1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
1678 // Float/Double intrinsics are enabled for AVX family currently.
1679 if (UseAVX == 0) {
1680 return false;
1681 }
1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
1683 return false;
1684 }
1685 }
1686 break;
1687 case Op_CallLeafVector:
1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
1689 return false;
1690 }
1691 break;
1692 case Op_AddReductionVI:
1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
1694 return false;
1695 }
1696 // fallthrough
1697 case Op_AndReductionV:
1698 case Op_OrReductionV:
1699 case Op_XorReductionV:
1700 if (is_subword_type(bt) && (UseSSE < 4)) {
1701 return false;
1702 }
1703 break;
1704 case Op_MinReductionV:
1705 case Op_MaxReductionV:
1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
1707 return false;
1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
1709 return false;
1710 }
1711 // Float/Double intrinsics enabled for AVX family.
1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
1713 return false;
1714 }
1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
1716 return false;
1717 }
1718 break;
1719 case Op_VectorTest:
1720 if (UseSSE < 4) {
1721 return false; // Implementation limitation
1722 } else if (size_in_bits < 32) {
1723 return false; // Implementation limitation
1724 }
1725 break;
1726 case Op_VectorLoadShuffle:
1727 case Op_VectorRearrange:
1728 if(vlen == 2) {
1729 return false; // Implementation limitation due to how shuffle is loaded
1730 } else if (size_in_bits == 256 && UseAVX < 2) {
1731 return false; // Implementation limitation
1732 }
1733 break;
1734 case Op_VectorLoadMask:
1735 case Op_VectorMaskCast:
1736 if (size_in_bits == 256 && UseAVX < 2) {
1737 return false; // Implementation limitation
1738 }
1739 // fallthrough
1740 case Op_VectorStoreMask:
1741 if (vlen == 2) {
1742 return false; // Implementation limitation
1743 }
1744 break;
1745 case Op_PopulateIndex:
1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
1747 return false;
1748 }
1749 break;
1750 case Op_VectorCastB2X:
1751 case Op_VectorCastS2X:
1752 case Op_VectorCastI2X:
1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
1754 return false;
1755 }
1756 break;
1757 case Op_VectorCastL2X:
1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
1759 return false;
1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
1761 return false;
1762 }
1763 break;
1764 case Op_VectorCastF2X: {
1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types
1766 // happen after intermediate conversion to integer and special handling
1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
1770 return false;
1771 }
1772 }
1773 // fallthrough
1774 case Op_VectorCastD2X:
1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
1776 return false;
1777 }
1778 break;
1779 case Op_VectorCastF2HF:
1780 case Op_VectorCastHF2F:
1781 if (!VM_Version::supports_f16c() &&
1782 ((!VM_Version::supports_evex() ||
1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
1784 return false;
1785 }
1786 break;
1787 case Op_RoundVD:
1788 if (!VM_Version::supports_avx512dq()) {
1789 return false;
1790 }
1791 break;
1792 case Op_MulReductionVI:
1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1794 return false;
1795 }
1796 break;
1797 case Op_LoadVectorGatherMasked:
1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1799 return false;
1800 }
1801 if (is_subword_type(bt) &&
1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
1803 (size_in_bits < 64) ||
1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
1805 return false;
1806 }
1807 break;
1808 case Op_StoreVectorScatterMasked:
1809 case Op_StoreVectorScatter:
1810 if (is_subword_type(bt)) {
1811 return false;
1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1813 return false;
1814 }
1815 // fallthrough
1816 case Op_LoadVectorGather:
1817 if (!is_subword_type(bt) && size_in_bits == 64) {
1818 return false;
1819 }
1820 if (is_subword_type(bt) && size_in_bits < 64) {
1821 return false;
1822 }
1823 break;
1824 case Op_SaturatingAddV:
1825 case Op_SaturatingSubV:
1826 if (UseAVX < 1) {
1827 return false; // Implementation limitation
1828 }
1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1830 return false;
1831 }
1832 break;
1833 case Op_SelectFromTwoVector:
1834 if (size_in_bits < 128) {
1835 return false;
1836 }
1837 if ((size_in_bits < 512 && !VM_Version::supports_avx512vl())) {
1838 return false;
1839 }
1840 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
1841 return false;
1842 }
1843 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
1844 return false;
1845 }
1846 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
1847 return false;
1848 }
1849 break;
1850 case Op_MaskAll:
1851 if (!VM_Version::supports_evex()) {
1852 return false;
1853 }
1854 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1855 return false;
1856 }
1857 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1858 return false;
1859 }
1860 break;
1861 case Op_VectorMaskCmp:
1862 if (vlen < 2 || size_in_bits < 32) {
1863 return false;
1864 }
1865 break;
1866 case Op_CompressM:
1867 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1868 return false;
1869 }
1870 break;
1871 case Op_CompressV:
1872 case Op_ExpandV:
1873 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
1874 return false;
1875 }
1876 if (size_in_bits < 128 ) {
1877 return false;
1878 }
1879 case Op_VectorLongToMask:
1880 if (UseAVX < 1) {
1881 return false;
1882 }
1883 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
1884 return false;
1885 }
1886 break;
1887 case Op_SignumVD:
1888 case Op_SignumVF:
1889 if (UseAVX < 1) {
1890 return false;
1891 }
1892 break;
1893 case Op_PopCountVI:
1894 case Op_PopCountVL: {
1895 if (!is_pop_count_instr_target(bt) &&
1896 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
1897 return false;
1898 }
1899 }
1900 break;
1901 case Op_ReverseV:
1902 case Op_ReverseBytesV:
1903 if (UseAVX < 2) {
1904 return false;
1905 }
1906 break;
1907 case Op_CountTrailingZerosV:
1908 case Op_CountLeadingZerosV:
1909 if (UseAVX < 2) {
1910 return false;
1911 }
1912 break;
1913 }
1914 return true; // Per default match rules are supported.
1915 }
1916
1917 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
1918 // ADLC based match_rule_supported routine checks for the existence of pattern based
1919 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
1920 // of their non-masked counterpart with mask edge being the differentiator.
1921 // This routine does a strict check on the existence of masked operation patterns
1922 // by returning a default false value for all the other opcodes apart from the
1923 // ones whose masked instruction patterns are defined in this file.
1924 if (!match_rule_supported_vector(opcode, vlen, bt)) {
1925 return false;
1926 }
1927
1928 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1929 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
1930 return false;
1931 }
1932 switch(opcode) {
1933 // Unary masked operations
1934 case Op_AbsVB:
1935 case Op_AbsVS:
1936 if(!VM_Version::supports_avx512bw()) {
1937 return false; // Implementation limitation
1938 }
1939 case Op_AbsVI:
1940 case Op_AbsVL:
1941 return true;
1942
1943 // Ternary masked operations
1944 case Op_FmaVF:
1945 case Op_FmaVD:
1946 return true;
1947
1948 case Op_MacroLogicV:
1949 if(bt != T_INT && bt != T_LONG) {
1950 return false;
1951 }
1952 return true;
1953
1954 // Binary masked operations
1955 case Op_AddVB:
1956 case Op_AddVS:
1957 case Op_SubVB:
1958 case Op_SubVS:
1959 case Op_MulVS:
1960 case Op_LShiftVS:
1961 case Op_RShiftVS:
1962 case Op_URShiftVS:
1963 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1964 if (!VM_Version::supports_avx512bw()) {
1965 return false; // Implementation limitation
1966 }
1967 return true;
1968
1969 case Op_MulVL:
1970 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1971 if (!VM_Version::supports_avx512dq()) {
1972 return false; // Implementation limitation
1973 }
1974 return true;
1975
1976 case Op_AndV:
1977 case Op_OrV:
1978 case Op_XorV:
1979 case Op_RotateRightV:
1980 case Op_RotateLeftV:
1981 if (bt != T_INT && bt != T_LONG) {
1982 return false; // Implementation limitation
1983 }
1984 return true;
1985
1986 case Op_VectorLoadMask:
1987 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1988 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
1989 return false;
1990 }
1991 return true;
1992
1993 case Op_AddVI:
1994 case Op_AddVL:
1995 case Op_AddVF:
1996 case Op_AddVD:
1997 case Op_SubVI:
1998 case Op_SubVL:
1999 case Op_SubVF:
2000 case Op_SubVD:
2001 case Op_MulVI:
2002 case Op_MulVF:
2003 case Op_MulVD:
2004 case Op_DivVF:
2005 case Op_DivVD:
2006 case Op_SqrtVF:
2007 case Op_SqrtVD:
2008 case Op_LShiftVI:
2009 case Op_LShiftVL:
2010 case Op_RShiftVI:
2011 case Op_RShiftVL:
2012 case Op_URShiftVI:
2013 case Op_URShiftVL:
2014 case Op_LoadVectorMasked:
2015 case Op_StoreVectorMasked:
2016 case Op_LoadVectorGatherMasked:
2017 case Op_StoreVectorScatterMasked:
2018 return true;
2019
2020 case Op_UMinV:
2021 case Op_UMaxV:
2022 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
2023 return false;
2024 } // fallthrough
2025 case Op_MaxV:
2026 case Op_MinV:
2027 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2028 return false; // Implementation limitation
2029 }
2030 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
2031 return false; // Implementation limitation
2032 }
2033 return true;
2034 case Op_SaturatingAddV:
2035 case Op_SaturatingSubV:
2036 if (!is_subword_type(bt)) {
2037 return false;
2038 }
2039 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
2040 return false; // Implementation limitation
2041 }
2042 return true;
2043
2044 case Op_VectorMaskCmp:
2045 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2046 return false; // Implementation limitation
2047 }
2048 return true;
2049
2050 case Op_VectorRearrange:
2051 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
2052 return false; // Implementation limitation
2053 }
2054 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
2055 return false; // Implementation limitation
2056 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
2057 return false; // Implementation limitation
2058 }
2059 return true;
2060
2061 // Binary Logical operations
2062 case Op_AndVMask:
2063 case Op_OrVMask:
2064 case Op_XorVMask:
2065 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
2066 return false; // Implementation limitation
2067 }
2068 return true;
2069
2070 case Op_PopCountVI:
2071 case Op_PopCountVL:
2072 if (!is_pop_count_instr_target(bt)) {
2073 return false;
2074 }
2075 return true;
2076
2077 case Op_MaskAll:
2078 return true;
2079
2080 case Op_CountLeadingZerosV:
2081 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
2082 return true;
2083 }
2084 default:
2085 return false;
2086 }
2087 }
2088
2089 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
2090 return false;
2091 }
2092
2093 // Return true if Vector::rearrange needs preparation of the shuffle argument
2094 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
2095 switch (elem_bt) {
2096 case T_BYTE: return false;
2097 case T_SHORT: return !VM_Version::supports_avx512bw();
2098 case T_INT: return !VM_Version::supports_avx();
2099 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
2100 default:
2101 ShouldNotReachHere();
2102 return false;
2103 }
2104 }
2105
2106 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2107 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2108 bool legacy = (generic_opnd->opcode() == LEGVEC);
2109 if (!VM_Version::supports_avx512vlbwdq() && // KNL
2110 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
2111 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
2112 return new legVecZOper();
2113 }
2114 if (legacy) {
2115 switch (ideal_reg) {
2116 case Op_VecS: return new legVecSOper();
2117 case Op_VecD: return new legVecDOper();
2118 case Op_VecX: return new legVecXOper();
2119 case Op_VecY: return new legVecYOper();
2120 case Op_VecZ: return new legVecZOper();
2121 }
2122 } else {
2123 switch (ideal_reg) {
2124 case Op_VecS: return new vecSOper();
2125 case Op_VecD: return new vecDOper();
2126 case Op_VecX: return new vecXOper();
2127 case Op_VecY: return new vecYOper();
2128 case Op_VecZ: return new vecZOper();
2129 }
2130 }
2131 ShouldNotReachHere();
2132 return nullptr;
2133 }
2134
2135 bool Matcher::is_reg2reg_move(MachNode* m) {
2136 switch (m->rule()) {
2137 case MoveVec2Leg_rule:
2138 case MoveLeg2Vec_rule:
2139 case MoveF2VL_rule:
2140 case MoveF2LEG_rule:
2141 case MoveVL2F_rule:
2142 case MoveLEG2F_rule:
2143 case MoveD2VL_rule:
2144 case MoveD2LEG_rule:
2145 case MoveVL2D_rule:
2146 case MoveLEG2D_rule:
2147 return true;
2148 default:
2149 return false;
2150 }
2151 }
2152
2153 bool Matcher::is_generic_vector(MachOper* opnd) {
2154 switch (opnd->opcode()) {
2155 case VEC:
2156 case LEGVEC:
2157 return true;
2158 default:
2159 return false;
2160 }
2161 }
2162
2163 //------------------------------------------------------------------------
2164
2165 const RegMask* Matcher::predicate_reg_mask(void) {
2166 return &_VECTMASK_REG_mask;
2167 }
2168
2169 // Max vector size in bytes. 0 if not supported.
2170 int Matcher::vector_width_in_bytes(BasicType bt) {
2171 assert(is_java_primitive(bt), "only primitive type vectors");
2172 // SSE2 supports 128bit vectors for all types.
2173 // AVX2 supports 256bit vectors for all types.
2174 // AVX2/EVEX supports 512bit vectors for all types.
2175 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
2176 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
2177 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
2178 size = (UseAVX > 2) ? 64 : 32;
2179 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
2180 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
2181 // Use flag to limit vector size.
2182 size = MIN2(size,(int)MaxVectorSize);
2183 // Minimum 2 values in vector (or 4 for bytes).
2184 switch (bt) {
2185 case T_DOUBLE:
2186 case T_LONG:
2187 if (size < 16) return 0;
2188 break;
2189 case T_FLOAT:
2190 case T_INT:
2191 if (size < 8) return 0;
2192 break;
2193 case T_BOOLEAN:
2194 if (size < 4) return 0;
2195 break;
2196 case T_CHAR:
2197 if (size < 4) return 0;
2198 break;
2199 case T_BYTE:
2200 if (size < 4) return 0;
2201 break;
2202 case T_SHORT:
2203 if (size < 4) return 0;
2204 break;
2205 default:
2206 ShouldNotReachHere();
2207 }
2208 return size;
2209 }
2210
2211 // Limits on vector size (number of elements) loaded into vector.
2212 int Matcher::max_vector_size(const BasicType bt) {
2213 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2214 }
2215 int Matcher::min_vector_size(const BasicType bt) {
2216 int max_size = max_vector_size(bt);
2217 // Min size which can be loaded into vector is 4 bytes.
2218 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
2219 // Support for calling svml double64 vectors
2220 if (bt == T_DOUBLE) {
2221 size = 1;
2222 }
2223 return MIN2(size,max_size);
2224 }
2225
2226 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
2227 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
2228 // by default on Cascade Lake
2229 if (VM_Version::is_default_intel_cascade_lake()) {
2230 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
2231 }
2232 return Matcher::max_vector_size(bt);
2233 }
2234
2235 int Matcher::scalable_vector_reg_size(const BasicType bt) {
2236 return -1;
2237 }
2238
2239 // Vector ideal reg corresponding to specified size in bytes
2240 uint Matcher::vector_ideal_reg(int size) {
2241 assert(MaxVectorSize >= size, "");
2242 switch(size) {
2243 case 4: return Op_VecS;
2244 case 8: return Op_VecD;
2245 case 16: return Op_VecX;
2246 case 32: return Op_VecY;
2247 case 64: return Op_VecZ;
2248 }
2249 ShouldNotReachHere();
2250 return 0;
2251 }
2252
2253 // Check for shift by small constant as well
2254 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
2255 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
2256 shift->in(2)->get_int() <= 3 &&
2257 // Are there other uses besides address expressions?
2258 !matcher->is_visited(shift)) {
2259 address_visited.set(shift->_idx); // Flag as address_visited
2260 mstack.push(shift->in(2), Matcher::Visit);
2261 Node *conv = shift->in(1);
2262 // Allow Matcher to match the rule which bypass
2263 // ConvI2L operation for an array index on LP64
2264 // if the index value is positive.
2265 if (conv->Opcode() == Op_ConvI2L &&
2266 conv->as_Type()->type()->is_long()->_lo >= 0 &&
2267 // Are there other uses besides address expressions?
2268 !matcher->is_visited(conv)) {
2269 address_visited.set(conv->_idx); // Flag as address_visited
2270 mstack.push(conv->in(1), Matcher::Pre_Visit);
2271 } else {
2272 mstack.push(conv, Matcher::Pre_Visit);
2273 }
2274 return true;
2275 }
2276 return false;
2277 }
2278
2279 // This function identifies sub-graphs in which a 'load' node is
2280 // input to two different nodes, and such that it can be matched
2281 // with BMI instructions like blsi, blsr, etc.
2282 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
2283 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
2284 // refers to the same node.
2285 //
2286 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
2287 // This is a temporary solution until we make DAGs expressible in ADL.
2288 template<typename ConType>
2289 class FusedPatternMatcher {
2290 Node* _op1_node;
2291 Node* _mop_node;
2292 int _con_op;
2293
2294 static int match_next(Node* n, int next_op, int next_op_idx) {
2295 if (n->in(1) == nullptr || n->in(2) == nullptr) {
2296 return -1;
2297 }
2298
2299 if (next_op_idx == -1) { // n is commutative, try rotations
2300 if (n->in(1)->Opcode() == next_op) {
2301 return 1;
2302 } else if (n->in(2)->Opcode() == next_op) {
2303 return 2;
2304 }
2305 } else {
2306 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
2307 if (n->in(next_op_idx)->Opcode() == next_op) {
2308 return next_op_idx;
2309 }
2310 }
2311 return -1;
2312 }
2313
2314 public:
2315 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
2316 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
2317
2318 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
2319 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
2320 typename ConType::NativeType con_value) {
2321 if (_op1_node->Opcode() != op1) {
2322 return false;
2323 }
2324 if (_mop_node->outcnt() > 2) {
2325 return false;
2326 }
2327 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
2328 if (op1_op2_idx == -1) {
2329 return false;
2330 }
2331 // Memory operation must be the other edge
2332 int op1_mop_idx = (op1_op2_idx & 1) + 1;
2333
2334 // Check that the mop node is really what we want
2335 if (_op1_node->in(op1_mop_idx) == _mop_node) {
2336 Node* op2_node = _op1_node->in(op1_op2_idx);
2337 if (op2_node->outcnt() > 1) {
2338 return false;
2339 }
2340 assert(op2_node->Opcode() == op2, "Should be");
2341 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
2342 if (op2_con_idx == -1) {
2343 return false;
2344 }
2345 // Memory operation must be the other edge
2346 int op2_mop_idx = (op2_con_idx & 1) + 1;
2347 // Check that the memory operation is the same node
2348 if (op2_node->in(op2_mop_idx) == _mop_node) {
2349 // Now check the constant
2350 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
2351 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
2352 return true;
2353 }
2354 }
2355 }
2356 return false;
2357 }
2358 };
2359
2360 static bool is_bmi_pattern(Node* n, Node* m) {
2361 assert(UseBMI1Instructions, "sanity");
2362 if (n != nullptr && m != nullptr) {
2363 if (m->Opcode() == Op_LoadI) {
2364 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
2365 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
2366 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
2367 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
2368 } else if (m->Opcode() == Op_LoadL) {
2369 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
2370 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
2371 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
2372 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
2373 }
2374 }
2375 return false;
2376 }
2377
2378 // Should the matcher clone input 'm' of node 'n'?
2379 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2380 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
2381 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
2382 mstack.push(m, Visit);
2383 return true;
2384 }
2385 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
2386 mstack.push(m, Visit); // m = ShiftCntV
2387 return true;
2388 }
2389 if (is_encode_and_store_pattern(n, m)) {
2390 mstack.push(m, Visit);
2391 return true;
2392 }
2393 return false;
2394 }
2395
2396 // Should the Matcher clone shifts on addressing modes, expecting them
2397 // to be subsumed into complex addressing expressions or compute them
2398 // into registers?
2399 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2400 Node *off = m->in(AddPNode::Offset);
2401 if (off->is_Con()) {
2402 address_visited.test_set(m->_idx); // Flag as address_visited
2403 Node *adr = m->in(AddPNode::Address);
2404
2405 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
2406 // AtomicAdd is not an addressing expression.
2407 // Cheap to find it by looking for screwy base.
2408 if (adr->is_AddP() &&
2409 !adr->in(AddPNode::Base)->is_top() &&
2410 !adr->in(AddPNode::Offset)->is_Con() &&
2411 off->get_long() == (int) (off->get_long()) && // immL32
2412 // Are there other uses besides address expressions?
2413 !is_visited(adr)) {
2414 address_visited.set(adr->_idx); // Flag as address_visited
2415 Node *shift = adr->in(AddPNode::Offset);
2416 if (!clone_shift(shift, this, mstack, address_visited)) {
2417 mstack.push(shift, Pre_Visit);
2418 }
2419 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
2420 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
2421 } else {
2422 mstack.push(adr, Pre_Visit);
2423 }
2424
2425 // Clone X+offset as it also folds into most addressing expressions
2426 mstack.push(off, Visit);
2427 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2428 return true;
2429 } else if (clone_shift(off, this, mstack, address_visited)) {
2430 address_visited.test_set(m->_idx); // Flag as address_visited
2431 mstack.push(m->in(AddPNode::Address), Pre_Visit);
2432 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2433 return true;
2434 }
2435 return false;
2436 }
2437
2438 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
2439 switch (bt) {
2440 case BoolTest::eq:
2441 return Assembler::eq;
2442 case BoolTest::ne:
2443 return Assembler::neq;
2444 case BoolTest::le:
2445 case BoolTest::ule:
2446 return Assembler::le;
2447 case BoolTest::ge:
2448 case BoolTest::uge:
2449 return Assembler::nlt;
2450 case BoolTest::lt:
2451 case BoolTest::ult:
2452 return Assembler::lt;
2453 case BoolTest::gt:
2454 case BoolTest::ugt:
2455 return Assembler::nle;
2456 default : ShouldNotReachHere(); return Assembler::_false;
2457 }
2458 }
2459
2460 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
2461 switch (bt) {
2462 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
2463 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
2464 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
2465 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
2466 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
2467 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
2468 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
2469 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
2470 }
2471 }
2472
2473 // Helper methods for MachSpillCopyNode::implementation().
2474 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2475 int src_hi, int dst_hi, uint ireg, outputStream* st) {
2476 assert(ireg == Op_VecS || // 32bit vector
2477 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
2478 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
2479 "no non-adjacent vector moves" );
2480 if (masm) {
2481 switch (ireg) {
2482 case Op_VecS: // copy whole register
2483 case Op_VecD:
2484 case Op_VecX:
2485 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2486 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2487 } else {
2488 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2489 }
2490 break;
2491 case Op_VecY:
2492 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2493 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2494 } else {
2495 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2496 }
2497 break;
2498 case Op_VecZ:
2499 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
2500 break;
2501 default:
2502 ShouldNotReachHere();
2503 }
2504 #ifndef PRODUCT
2505 } else {
2506 switch (ireg) {
2507 case Op_VecS:
2508 case Op_VecD:
2509 case Op_VecX:
2510 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2511 break;
2512 case Op_VecY:
2513 case Op_VecZ:
2514 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2515 break;
2516 default:
2517 ShouldNotReachHere();
2518 }
2519 #endif
2520 }
2521 }
2522
2523 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2524 int stack_offset, int reg, uint ireg, outputStream* st) {
2525 if (masm) {
2526 if (is_load) {
2527 switch (ireg) {
2528 case Op_VecS:
2529 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2530 break;
2531 case Op_VecD:
2532 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2533 break;
2534 case Op_VecX:
2535 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2536 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2537 } else {
2538 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2539 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2540 }
2541 break;
2542 case Op_VecY:
2543 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2544 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2545 } else {
2546 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2547 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2548 }
2549 break;
2550 case Op_VecZ:
2551 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
2552 break;
2553 default:
2554 ShouldNotReachHere();
2555 }
2556 } else { // store
2557 switch (ireg) {
2558 case Op_VecS:
2559 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2560 break;
2561 case Op_VecD:
2562 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2563 break;
2564 case Op_VecX:
2565 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2566 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2567 }
2568 else {
2569 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2570 }
2571 break;
2572 case Op_VecY:
2573 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2574 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2575 }
2576 else {
2577 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2578 }
2579 break;
2580 case Op_VecZ:
2581 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2582 break;
2583 default:
2584 ShouldNotReachHere();
2585 }
2586 }
2587 #ifndef PRODUCT
2588 } else {
2589 if (is_load) {
2590 switch (ireg) {
2591 case Op_VecS:
2592 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2593 break;
2594 case Op_VecD:
2595 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2596 break;
2597 case Op_VecX:
2598 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2599 break;
2600 case Op_VecY:
2601 case Op_VecZ:
2602 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2603 break;
2604 default:
2605 ShouldNotReachHere();
2606 }
2607 } else { // store
2608 switch (ireg) {
2609 case Op_VecS:
2610 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2611 break;
2612 case Op_VecD:
2613 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2614 break;
2615 case Op_VecX:
2616 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2617 break;
2618 case Op_VecY:
2619 case Op_VecZ:
2620 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2621 break;
2622 default:
2623 ShouldNotReachHere();
2624 }
2625 }
2626 #endif
2627 }
2628 }
2629
2630 template <class T>
2631 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
2632 int size = type2aelembytes(bt) * len;
2633 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
2634 for (int i = 0; i < len; i++) {
2635 int offset = i * type2aelembytes(bt);
2636 switch (bt) {
2637 case T_BYTE: val->at(i) = con; break;
2638 case T_SHORT: {
2639 jshort c = con;
2640 memcpy(val->adr_at(offset), &c, sizeof(jshort));
2641 break;
2642 }
2643 case T_INT: {
2644 jint c = con;
2645 memcpy(val->adr_at(offset), &c, sizeof(jint));
2646 break;
2647 }
2648 case T_LONG: {
2649 jlong c = con;
2650 memcpy(val->adr_at(offset), &c, sizeof(jlong));
2651 break;
2652 }
2653 case T_FLOAT: {
2654 jfloat c = con;
2655 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
2656 break;
2657 }
2658 case T_DOUBLE: {
2659 jdouble c = con;
2660 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
2661 break;
2662 }
2663 default: assert(false, "%s", type2name(bt));
2664 }
2665 }
2666 return val;
2667 }
2668
2669 static inline jlong high_bit_set(BasicType bt) {
2670 switch (bt) {
2671 case T_BYTE: return 0x8080808080808080;
2672 case T_SHORT: return 0x8000800080008000;
2673 case T_INT: return 0x8000000080000000;
2674 case T_LONG: return 0x8000000000000000;
2675 default:
2676 ShouldNotReachHere();
2677 return 0;
2678 }
2679 }
2680
2681 #ifndef PRODUCT
2682 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2683 st->print("nop \t# %d bytes pad for loops and calls", _count);
2684 }
2685 #endif
2686
2687 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
2688 __ nop(_count);
2689 }
2690
2691 uint MachNopNode::size(PhaseRegAlloc*) const {
2692 return _count;
2693 }
2694
2695 #ifndef PRODUCT
2696 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2697 st->print("# breakpoint");
2698 }
2699 #endif
2700
2701 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
2702 __ int3();
2703 }
2704
2705 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2706 return MachNode::size(ra_);
2707 }
2708
2709 %}
2710
2711 encode %{
2712
2713 enc_class call_epilog %{
2714 if (VerifyStackAtCalls) {
2715 // Check that stack depth is unchanged: find majik cookie on stack
2716 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2717 Label L;
2718 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2719 __ jccb(Assembler::equal, L);
2720 // Die if stack mismatch
2721 __ int3();
2722 __ bind(L);
2723 }
2724 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
2725 // The last return value is not set by the callee but used to pass the null marker to compiled code.
2726 // Search for the corresponding projection, get the register and emit code that initialized it.
2727 uint con = (tf()->range_cc()->cnt() - 1);
2728 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
2729 ProjNode* proj = fast_out(i)->as_Proj();
2730 if (proj->_con == con) {
2731 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
2732 OptoReg::Name optoReg = ra_->get_reg_first(proj);
2733 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
2734 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
2735 __ testq(rax, rax);
2736 __ setb(Assembler::notZero, toReg);
2737 __ movzbl(toReg, toReg);
2738 if (reg->is_stack()) {
2739 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
2740 __ movq(Address(rsp, st_off), toReg);
2741 }
2742 break;
2743 }
2744 }
2745 if (return_value_is_used()) {
2746 // An inline type is returned as fields in multiple registers.
2747 // Rax either contains an oop if the inline type is buffered or a pointer
2748 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
2749 // if the lowest bit is set to allow C2 to use the oop after null checking.
2750 // rax &= (rax & 1) - 1
2751 __ movptr(rscratch1, rax);
2752 __ andptr(rscratch1, 0x1);
2753 __ subptr(rscratch1, 0x1);
2754 __ andptr(rax, rscratch1);
2755 }
2756 }
2757 %}
2758
2759 %}
2760
2761 // Operands for bound floating pointer register arguments
2762 operand rxmm0() %{
2763 constraint(ALLOC_IN_RC(xmm0_reg));
2764 match(VecX);
2765 format%{%}
2766 interface(REG_INTER);
2767 %}
2768
2769 //----------OPERANDS-----------------------------------------------------------
2770 // Operand definitions must precede instruction definitions for correct parsing
2771 // in the ADLC because operands constitute user defined types which are used in
2772 // instruction definitions.
2773
2774 // Vectors
2775
2776 // Dummy generic vector class. Should be used for all vector operands.
2777 // Replaced with vec[SDXYZ] during post-selection pass.
2778 operand vec() %{
2779 constraint(ALLOC_IN_RC(dynamic));
2780 match(VecX);
2781 match(VecY);
2782 match(VecZ);
2783 match(VecS);
2784 match(VecD);
2785
2786 format %{ %}
2787 interface(REG_INTER);
2788 %}
2789
2790 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
2791 // Replaced with legVec[SDXYZ] during post-selection cleanup.
2792 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
2793 // runtime code generation via reg_class_dynamic.
2794 operand legVec() %{
2795 constraint(ALLOC_IN_RC(dynamic));
2796 match(VecX);
2797 match(VecY);
2798 match(VecZ);
2799 match(VecS);
2800 match(VecD);
2801
2802 format %{ %}
2803 interface(REG_INTER);
2804 %}
2805
2806 // Replaces vec during post-selection cleanup. See above.
2807 operand vecS() %{
2808 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
2809 match(VecS);
2810
2811 format %{ %}
2812 interface(REG_INTER);
2813 %}
2814
2815 // Replaces legVec during post-selection cleanup. See above.
2816 operand legVecS() %{
2817 constraint(ALLOC_IN_RC(vectors_reg_legacy));
2818 match(VecS);
2819
2820 format %{ %}
2821 interface(REG_INTER);
2822 %}
2823
2824 // Replaces vec during post-selection cleanup. See above.
2825 operand vecD() %{
2826 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
2827 match(VecD);
2828
2829 format %{ %}
2830 interface(REG_INTER);
2831 %}
2832
2833 // Replaces legVec during post-selection cleanup. See above.
2834 operand legVecD() %{
2835 constraint(ALLOC_IN_RC(vectord_reg_legacy));
2836 match(VecD);
2837
2838 format %{ %}
2839 interface(REG_INTER);
2840 %}
2841
2842 // Replaces vec during post-selection cleanup. See above.
2843 operand vecX() %{
2844 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
2845 match(VecX);
2846
2847 format %{ %}
2848 interface(REG_INTER);
2849 %}
2850
2851 // Replaces legVec during post-selection cleanup. See above.
2852 operand legVecX() %{
2853 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
2854 match(VecX);
2855
2856 format %{ %}
2857 interface(REG_INTER);
2858 %}
2859
2860 // Replaces vec during post-selection cleanup. See above.
2861 operand vecY() %{
2862 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
2863 match(VecY);
2864
2865 format %{ %}
2866 interface(REG_INTER);
2867 %}
2868
2869 // Replaces legVec during post-selection cleanup. See above.
2870 operand legVecY() %{
2871 constraint(ALLOC_IN_RC(vectory_reg_legacy));
2872 match(VecY);
2873
2874 format %{ %}
2875 interface(REG_INTER);
2876 %}
2877
2878 // Replaces vec during post-selection cleanup. See above.
2879 operand vecZ() %{
2880 constraint(ALLOC_IN_RC(vectorz_reg));
2881 match(VecZ);
2882
2883 format %{ %}
2884 interface(REG_INTER);
2885 %}
2886
2887 // Replaces legVec during post-selection cleanup. See above.
2888 operand legVecZ() %{
2889 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
2890 match(VecZ);
2891
2892 format %{ %}
2893 interface(REG_INTER);
2894 %}
2895
2896 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2897
2898 // ============================================================================
2899
2900 instruct ShouldNotReachHere() %{
2901 match(Halt);
2902 format %{ "stop\t# ShouldNotReachHere" %}
2903 ins_encode %{
2904 if (is_reachable()) {
2905 const char* str = __ code_string(_halt_reason);
2906 __ stop(str);
2907 }
2908 %}
2909 ins_pipe(pipe_slow);
2910 %}
2911
2912 // ============================================================================
2913
2914 instruct addF_reg(regF dst, regF src) %{
2915 predicate(UseAVX == 0);
2916 match(Set dst (AddF dst src));
2917
2918 format %{ "addss $dst, $src" %}
2919 ins_cost(150);
2920 ins_encode %{
2921 __ addss($dst$$XMMRegister, $src$$XMMRegister);
2922 %}
2923 ins_pipe(pipe_slow);
2924 %}
2925
2926 instruct addF_mem(regF dst, memory src) %{
2927 predicate(UseAVX == 0);
2928 match(Set dst (AddF dst (LoadF src)));
2929
2930 format %{ "addss $dst, $src" %}
2931 ins_cost(150);
2932 ins_encode %{
2933 __ addss($dst$$XMMRegister, $src$$Address);
2934 %}
2935 ins_pipe(pipe_slow);
2936 %}
2937
2938 instruct addF_imm(regF dst, immF con) %{
2939 predicate(UseAVX == 0);
2940 match(Set dst (AddF dst con));
2941 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2942 ins_cost(150);
2943 ins_encode %{
2944 __ addss($dst$$XMMRegister, $constantaddress($con));
2945 %}
2946 ins_pipe(pipe_slow);
2947 %}
2948
2949 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2950 predicate(UseAVX > 0);
2951 match(Set dst (AddF src1 src2));
2952
2953 format %{ "vaddss $dst, $src1, $src2" %}
2954 ins_cost(150);
2955 ins_encode %{
2956 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2957 %}
2958 ins_pipe(pipe_slow);
2959 %}
2960
2961 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2962 predicate(UseAVX > 0);
2963 match(Set dst (AddF src1 (LoadF src2)));
2964
2965 format %{ "vaddss $dst, $src1, $src2" %}
2966 ins_cost(150);
2967 ins_encode %{
2968 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2969 %}
2970 ins_pipe(pipe_slow);
2971 %}
2972
2973 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2974 predicate(UseAVX > 0);
2975 match(Set dst (AddF src con));
2976
2977 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2978 ins_cost(150);
2979 ins_encode %{
2980 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2981 %}
2982 ins_pipe(pipe_slow);
2983 %}
2984
2985 instruct addD_reg(regD dst, regD src) %{
2986 predicate(UseAVX == 0);
2987 match(Set dst (AddD dst src));
2988
2989 format %{ "addsd $dst, $src" %}
2990 ins_cost(150);
2991 ins_encode %{
2992 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2993 %}
2994 ins_pipe(pipe_slow);
2995 %}
2996
2997 instruct addD_mem(regD dst, memory src) %{
2998 predicate(UseAVX == 0);
2999 match(Set dst (AddD dst (LoadD src)));
3000
3001 format %{ "addsd $dst, $src" %}
3002 ins_cost(150);
3003 ins_encode %{
3004 __ addsd($dst$$XMMRegister, $src$$Address);
3005 %}
3006 ins_pipe(pipe_slow);
3007 %}
3008
3009 instruct addD_imm(regD dst, immD con) %{
3010 predicate(UseAVX == 0);
3011 match(Set dst (AddD dst con));
3012 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3013 ins_cost(150);
3014 ins_encode %{
3015 __ addsd($dst$$XMMRegister, $constantaddress($con));
3016 %}
3017 ins_pipe(pipe_slow);
3018 %}
3019
3020 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
3021 predicate(UseAVX > 0);
3022 match(Set dst (AddD src1 src2));
3023
3024 format %{ "vaddsd $dst, $src1, $src2" %}
3025 ins_cost(150);
3026 ins_encode %{
3027 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3028 %}
3029 ins_pipe(pipe_slow);
3030 %}
3031
3032 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
3033 predicate(UseAVX > 0);
3034 match(Set dst (AddD src1 (LoadD src2)));
3035
3036 format %{ "vaddsd $dst, $src1, $src2" %}
3037 ins_cost(150);
3038 ins_encode %{
3039 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3040 %}
3041 ins_pipe(pipe_slow);
3042 %}
3043
3044 instruct addD_reg_imm(regD dst, regD src, immD con) %{
3045 predicate(UseAVX > 0);
3046 match(Set dst (AddD src con));
3047
3048 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3049 ins_cost(150);
3050 ins_encode %{
3051 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3052 %}
3053 ins_pipe(pipe_slow);
3054 %}
3055
3056 instruct subF_reg(regF dst, regF src) %{
3057 predicate(UseAVX == 0);
3058 match(Set dst (SubF dst src));
3059
3060 format %{ "subss $dst, $src" %}
3061 ins_cost(150);
3062 ins_encode %{
3063 __ subss($dst$$XMMRegister, $src$$XMMRegister);
3064 %}
3065 ins_pipe(pipe_slow);
3066 %}
3067
3068 instruct subF_mem(regF dst, memory src) %{
3069 predicate(UseAVX == 0);
3070 match(Set dst (SubF dst (LoadF src)));
3071
3072 format %{ "subss $dst, $src" %}
3073 ins_cost(150);
3074 ins_encode %{
3075 __ subss($dst$$XMMRegister, $src$$Address);
3076 %}
3077 ins_pipe(pipe_slow);
3078 %}
3079
3080 instruct subF_imm(regF dst, immF con) %{
3081 predicate(UseAVX == 0);
3082 match(Set dst (SubF dst con));
3083 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3084 ins_cost(150);
3085 ins_encode %{
3086 __ subss($dst$$XMMRegister, $constantaddress($con));
3087 %}
3088 ins_pipe(pipe_slow);
3089 %}
3090
3091 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
3092 predicate(UseAVX > 0);
3093 match(Set dst (SubF src1 src2));
3094
3095 format %{ "vsubss $dst, $src1, $src2" %}
3096 ins_cost(150);
3097 ins_encode %{
3098 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3099 %}
3100 ins_pipe(pipe_slow);
3101 %}
3102
3103 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
3104 predicate(UseAVX > 0);
3105 match(Set dst (SubF src1 (LoadF src2)));
3106
3107 format %{ "vsubss $dst, $src1, $src2" %}
3108 ins_cost(150);
3109 ins_encode %{
3110 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3111 %}
3112 ins_pipe(pipe_slow);
3113 %}
3114
3115 instruct subF_reg_imm(regF dst, regF src, immF con) %{
3116 predicate(UseAVX > 0);
3117 match(Set dst (SubF src con));
3118
3119 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3120 ins_cost(150);
3121 ins_encode %{
3122 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3123 %}
3124 ins_pipe(pipe_slow);
3125 %}
3126
3127 instruct subD_reg(regD dst, regD src) %{
3128 predicate(UseAVX == 0);
3129 match(Set dst (SubD dst src));
3130
3131 format %{ "subsd $dst, $src" %}
3132 ins_cost(150);
3133 ins_encode %{
3134 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
3135 %}
3136 ins_pipe(pipe_slow);
3137 %}
3138
3139 instruct subD_mem(regD dst, memory src) %{
3140 predicate(UseAVX == 0);
3141 match(Set dst (SubD dst (LoadD src)));
3142
3143 format %{ "subsd $dst, $src" %}
3144 ins_cost(150);
3145 ins_encode %{
3146 __ subsd($dst$$XMMRegister, $src$$Address);
3147 %}
3148 ins_pipe(pipe_slow);
3149 %}
3150
3151 instruct subD_imm(regD dst, immD con) %{
3152 predicate(UseAVX == 0);
3153 match(Set dst (SubD dst con));
3154 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3155 ins_cost(150);
3156 ins_encode %{
3157 __ subsd($dst$$XMMRegister, $constantaddress($con));
3158 %}
3159 ins_pipe(pipe_slow);
3160 %}
3161
3162 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
3163 predicate(UseAVX > 0);
3164 match(Set dst (SubD src1 src2));
3165
3166 format %{ "vsubsd $dst, $src1, $src2" %}
3167 ins_cost(150);
3168 ins_encode %{
3169 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3170 %}
3171 ins_pipe(pipe_slow);
3172 %}
3173
3174 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
3175 predicate(UseAVX > 0);
3176 match(Set dst (SubD src1 (LoadD src2)));
3177
3178 format %{ "vsubsd $dst, $src1, $src2" %}
3179 ins_cost(150);
3180 ins_encode %{
3181 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3182 %}
3183 ins_pipe(pipe_slow);
3184 %}
3185
3186 instruct subD_reg_imm(regD dst, regD src, immD con) %{
3187 predicate(UseAVX > 0);
3188 match(Set dst (SubD src con));
3189
3190 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3191 ins_cost(150);
3192 ins_encode %{
3193 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3194 %}
3195 ins_pipe(pipe_slow);
3196 %}
3197
3198 instruct mulF_reg(regF dst, regF src) %{
3199 predicate(UseAVX == 0);
3200 match(Set dst (MulF dst src));
3201
3202 format %{ "mulss $dst, $src" %}
3203 ins_cost(150);
3204 ins_encode %{
3205 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
3206 %}
3207 ins_pipe(pipe_slow);
3208 %}
3209
3210 instruct mulF_mem(regF dst, memory src) %{
3211 predicate(UseAVX == 0);
3212 match(Set dst (MulF dst (LoadF src)));
3213
3214 format %{ "mulss $dst, $src" %}
3215 ins_cost(150);
3216 ins_encode %{
3217 __ mulss($dst$$XMMRegister, $src$$Address);
3218 %}
3219 ins_pipe(pipe_slow);
3220 %}
3221
3222 instruct mulF_imm(regF dst, immF con) %{
3223 predicate(UseAVX == 0);
3224 match(Set dst (MulF dst con));
3225 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3226 ins_cost(150);
3227 ins_encode %{
3228 __ mulss($dst$$XMMRegister, $constantaddress($con));
3229 %}
3230 ins_pipe(pipe_slow);
3231 %}
3232
3233 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
3234 predicate(UseAVX > 0);
3235 match(Set dst (MulF src1 src2));
3236
3237 format %{ "vmulss $dst, $src1, $src2" %}
3238 ins_cost(150);
3239 ins_encode %{
3240 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3241 %}
3242 ins_pipe(pipe_slow);
3243 %}
3244
3245 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
3246 predicate(UseAVX > 0);
3247 match(Set dst (MulF src1 (LoadF src2)));
3248
3249 format %{ "vmulss $dst, $src1, $src2" %}
3250 ins_cost(150);
3251 ins_encode %{
3252 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3253 %}
3254 ins_pipe(pipe_slow);
3255 %}
3256
3257 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
3258 predicate(UseAVX > 0);
3259 match(Set dst (MulF src con));
3260
3261 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3262 ins_cost(150);
3263 ins_encode %{
3264 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3265 %}
3266 ins_pipe(pipe_slow);
3267 %}
3268
3269 instruct mulD_reg(regD dst, regD src) %{
3270 predicate(UseAVX == 0);
3271 match(Set dst (MulD dst src));
3272
3273 format %{ "mulsd $dst, $src" %}
3274 ins_cost(150);
3275 ins_encode %{
3276 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
3277 %}
3278 ins_pipe(pipe_slow);
3279 %}
3280
3281 instruct mulD_mem(regD dst, memory src) %{
3282 predicate(UseAVX == 0);
3283 match(Set dst (MulD dst (LoadD src)));
3284
3285 format %{ "mulsd $dst, $src" %}
3286 ins_cost(150);
3287 ins_encode %{
3288 __ mulsd($dst$$XMMRegister, $src$$Address);
3289 %}
3290 ins_pipe(pipe_slow);
3291 %}
3292
3293 instruct mulD_imm(regD dst, immD con) %{
3294 predicate(UseAVX == 0);
3295 match(Set dst (MulD dst con));
3296 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3297 ins_cost(150);
3298 ins_encode %{
3299 __ mulsd($dst$$XMMRegister, $constantaddress($con));
3300 %}
3301 ins_pipe(pipe_slow);
3302 %}
3303
3304 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
3305 predicate(UseAVX > 0);
3306 match(Set dst (MulD src1 src2));
3307
3308 format %{ "vmulsd $dst, $src1, $src2" %}
3309 ins_cost(150);
3310 ins_encode %{
3311 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3312 %}
3313 ins_pipe(pipe_slow);
3314 %}
3315
3316 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
3317 predicate(UseAVX > 0);
3318 match(Set dst (MulD src1 (LoadD src2)));
3319
3320 format %{ "vmulsd $dst, $src1, $src2" %}
3321 ins_cost(150);
3322 ins_encode %{
3323 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3324 %}
3325 ins_pipe(pipe_slow);
3326 %}
3327
3328 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
3329 predicate(UseAVX > 0);
3330 match(Set dst (MulD src con));
3331
3332 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3333 ins_cost(150);
3334 ins_encode %{
3335 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3336 %}
3337 ins_pipe(pipe_slow);
3338 %}
3339
3340 instruct divF_reg(regF dst, regF src) %{
3341 predicate(UseAVX == 0);
3342 match(Set dst (DivF dst src));
3343
3344 format %{ "divss $dst, $src" %}
3345 ins_cost(150);
3346 ins_encode %{
3347 __ divss($dst$$XMMRegister, $src$$XMMRegister);
3348 %}
3349 ins_pipe(pipe_slow);
3350 %}
3351
3352 instruct divF_mem(regF dst, memory src) %{
3353 predicate(UseAVX == 0);
3354 match(Set dst (DivF dst (LoadF src)));
3355
3356 format %{ "divss $dst, $src" %}
3357 ins_cost(150);
3358 ins_encode %{
3359 __ divss($dst$$XMMRegister, $src$$Address);
3360 %}
3361 ins_pipe(pipe_slow);
3362 %}
3363
3364 instruct divF_imm(regF dst, immF con) %{
3365 predicate(UseAVX == 0);
3366 match(Set dst (DivF dst con));
3367 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3368 ins_cost(150);
3369 ins_encode %{
3370 __ divss($dst$$XMMRegister, $constantaddress($con));
3371 %}
3372 ins_pipe(pipe_slow);
3373 %}
3374
3375 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
3376 predicate(UseAVX > 0);
3377 match(Set dst (DivF src1 src2));
3378
3379 format %{ "vdivss $dst, $src1, $src2" %}
3380 ins_cost(150);
3381 ins_encode %{
3382 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3383 %}
3384 ins_pipe(pipe_slow);
3385 %}
3386
3387 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
3388 predicate(UseAVX > 0);
3389 match(Set dst (DivF src1 (LoadF src2)));
3390
3391 format %{ "vdivss $dst, $src1, $src2" %}
3392 ins_cost(150);
3393 ins_encode %{
3394 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3395 %}
3396 ins_pipe(pipe_slow);
3397 %}
3398
3399 instruct divF_reg_imm(regF dst, regF src, immF con) %{
3400 predicate(UseAVX > 0);
3401 match(Set dst (DivF src con));
3402
3403 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3404 ins_cost(150);
3405 ins_encode %{
3406 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3407 %}
3408 ins_pipe(pipe_slow);
3409 %}
3410
3411 instruct divD_reg(regD dst, regD src) %{
3412 predicate(UseAVX == 0);
3413 match(Set dst (DivD dst src));
3414
3415 format %{ "divsd $dst, $src" %}
3416 ins_cost(150);
3417 ins_encode %{
3418 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
3419 %}
3420 ins_pipe(pipe_slow);
3421 %}
3422
3423 instruct divD_mem(regD dst, memory src) %{
3424 predicate(UseAVX == 0);
3425 match(Set dst (DivD dst (LoadD src)));
3426
3427 format %{ "divsd $dst, $src" %}
3428 ins_cost(150);
3429 ins_encode %{
3430 __ divsd($dst$$XMMRegister, $src$$Address);
3431 %}
3432 ins_pipe(pipe_slow);
3433 %}
3434
3435 instruct divD_imm(regD dst, immD con) %{
3436 predicate(UseAVX == 0);
3437 match(Set dst (DivD dst con));
3438 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3439 ins_cost(150);
3440 ins_encode %{
3441 __ divsd($dst$$XMMRegister, $constantaddress($con));
3442 %}
3443 ins_pipe(pipe_slow);
3444 %}
3445
3446 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
3447 predicate(UseAVX > 0);
3448 match(Set dst (DivD src1 src2));
3449
3450 format %{ "vdivsd $dst, $src1, $src2" %}
3451 ins_cost(150);
3452 ins_encode %{
3453 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3454 %}
3455 ins_pipe(pipe_slow);
3456 %}
3457
3458 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
3459 predicate(UseAVX > 0);
3460 match(Set dst (DivD src1 (LoadD src2)));
3461
3462 format %{ "vdivsd $dst, $src1, $src2" %}
3463 ins_cost(150);
3464 ins_encode %{
3465 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3466 %}
3467 ins_pipe(pipe_slow);
3468 %}
3469
3470 instruct divD_reg_imm(regD dst, regD src, immD con) %{
3471 predicate(UseAVX > 0);
3472 match(Set dst (DivD src con));
3473
3474 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3475 ins_cost(150);
3476 ins_encode %{
3477 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3478 %}
3479 ins_pipe(pipe_slow);
3480 %}
3481
3482 instruct absF_reg(regF dst) %{
3483 predicate(UseAVX == 0);
3484 match(Set dst (AbsF dst));
3485 ins_cost(150);
3486 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
3487 ins_encode %{
3488 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
3489 %}
3490 ins_pipe(pipe_slow);
3491 %}
3492
3493 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
3494 predicate(UseAVX > 0);
3495 match(Set dst (AbsF src));
3496 ins_cost(150);
3497 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
3498 ins_encode %{
3499 int vlen_enc = Assembler::AVX_128bit;
3500 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
3501 ExternalAddress(float_signmask()), vlen_enc);
3502 %}
3503 ins_pipe(pipe_slow);
3504 %}
3505
3506 instruct absD_reg(regD dst) %{
3507 predicate(UseAVX == 0);
3508 match(Set dst (AbsD dst));
3509 ins_cost(150);
3510 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
3511 "# abs double by sign masking" %}
3512 ins_encode %{
3513 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
3514 %}
3515 ins_pipe(pipe_slow);
3516 %}
3517
3518 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
3519 predicate(UseAVX > 0);
3520 match(Set dst (AbsD src));
3521 ins_cost(150);
3522 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
3523 "# abs double by sign masking" %}
3524 ins_encode %{
3525 int vlen_enc = Assembler::AVX_128bit;
3526 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
3527 ExternalAddress(double_signmask()), vlen_enc);
3528 %}
3529 ins_pipe(pipe_slow);
3530 %}
3531
3532 instruct negF_reg(regF dst) %{
3533 predicate(UseAVX == 0);
3534 match(Set dst (NegF dst));
3535 ins_cost(150);
3536 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
3537 ins_encode %{
3538 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
3539 %}
3540 ins_pipe(pipe_slow);
3541 %}
3542
3543 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
3544 predicate(UseAVX > 0);
3545 match(Set dst (NegF src));
3546 ins_cost(150);
3547 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
3548 ins_encode %{
3549 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
3550 ExternalAddress(float_signflip()));
3551 %}
3552 ins_pipe(pipe_slow);
3553 %}
3554
3555 instruct negD_reg(regD dst) %{
3556 predicate(UseAVX == 0);
3557 match(Set dst (NegD dst));
3558 ins_cost(150);
3559 format %{ "xorpd $dst, [0x8000000000000000]\t"
3560 "# neg double by sign flipping" %}
3561 ins_encode %{
3562 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3563 %}
3564 ins_pipe(pipe_slow);
3565 %}
3566
3567 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
3568 predicate(UseAVX > 0);
3569 match(Set dst (NegD src));
3570 ins_cost(150);
3571 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
3572 "# neg double by sign flipping" %}
3573 ins_encode %{
3574 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
3575 ExternalAddress(double_signflip()));
3576 %}
3577 ins_pipe(pipe_slow);
3578 %}
3579
3580 // sqrtss instruction needs destination register to be pre initialized for best performance
3581 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3582 instruct sqrtF_reg(regF dst) %{
3583 match(Set dst (SqrtF dst));
3584 format %{ "sqrtss $dst, $dst" %}
3585 ins_encode %{
3586 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
3587 %}
3588 ins_pipe(pipe_slow);
3589 %}
3590
3591 // sqrtsd instruction needs destination register to be pre initialized for best performance
3592 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3593 instruct sqrtD_reg(regD dst) %{
3594 match(Set dst (SqrtD dst));
3595 format %{ "sqrtsd $dst, $dst" %}
3596 ins_encode %{
3597 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
3598 %}
3599 ins_pipe(pipe_slow);
3600 %}
3601
3602 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
3603 effect(TEMP tmp);
3604 match(Set dst (ConvF2HF src));
3605 ins_cost(125);
3606 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
3607 ins_encode %{
3608 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
3609 %}
3610 ins_pipe( pipe_slow );
3611 %}
3612
3613 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
3614 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
3615 effect(TEMP ktmp, TEMP rtmp);
3616 match(Set mem (StoreC mem (ConvF2HF src)));
3617 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
3618 ins_encode %{
3619 __ movl($rtmp$$Register, 0x1);
3620 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
3621 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
3622 %}
3623 ins_pipe( pipe_slow );
3624 %}
3625
3626 instruct vconvF2HF(vec dst, vec src) %{
3627 match(Set dst (VectorCastF2HF src));
3628 format %{ "vector_conv_F2HF $dst $src" %}
3629 ins_encode %{
3630 int vlen_enc = vector_length_encoding(this, $src);
3631 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
3632 %}
3633 ins_pipe( pipe_slow );
3634 %}
3635
3636 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
3637 predicate(n->as_StoreVector()->memory_size() >= 16);
3638 match(Set mem (StoreVector mem (VectorCastF2HF src)));
3639 format %{ "vcvtps2ph $mem,$src" %}
3640 ins_encode %{
3641 int vlen_enc = vector_length_encoding(this, $src);
3642 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
3643 %}
3644 ins_pipe( pipe_slow );
3645 %}
3646
3647 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
3648 match(Set dst (ConvHF2F src));
3649 format %{ "vcvtph2ps $dst,$src" %}
3650 ins_encode %{
3651 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
3652 %}
3653 ins_pipe( pipe_slow );
3654 %}
3655
3656 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
3657 match(Set dst (VectorCastHF2F (LoadVector mem)));
3658 format %{ "vcvtph2ps $dst,$mem" %}
3659 ins_encode %{
3660 int vlen_enc = vector_length_encoding(this);
3661 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
3662 %}
3663 ins_pipe( pipe_slow );
3664 %}
3665
3666 instruct vconvHF2F(vec dst, vec src) %{
3667 match(Set dst (VectorCastHF2F src));
3668 ins_cost(125);
3669 format %{ "vector_conv_HF2F $dst,$src" %}
3670 ins_encode %{
3671 int vlen_enc = vector_length_encoding(this);
3672 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
3673 %}
3674 ins_pipe( pipe_slow );
3675 %}
3676
3677 // ---------------------------------------- VectorReinterpret ------------------------------------
3678 instruct reinterpret_mask(kReg dst) %{
3679 predicate(n->bottom_type()->isa_vectmask() &&
3680 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
3681 match(Set dst (VectorReinterpret dst));
3682 ins_cost(125);
3683 format %{ "vector_reinterpret $dst\t!" %}
3684 ins_encode %{
3685 // empty
3686 %}
3687 ins_pipe( pipe_slow );
3688 %}
3689
3690 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
3691 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3692 n->bottom_type()->isa_vectmask() &&
3693 n->in(1)->bottom_type()->isa_vectmask() &&
3694 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
3695 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3696 match(Set dst (VectorReinterpret src));
3697 effect(TEMP xtmp);
3698 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
3699 ins_encode %{
3700 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
3701 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3702 assert(src_sz == dst_sz , "src and dst size mismatch");
3703 int vlen_enc = vector_length_encoding(src_sz);
3704 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3705 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3706 %}
3707 ins_pipe( pipe_slow );
3708 %}
3709
3710 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
3711 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3712 n->bottom_type()->isa_vectmask() &&
3713 n->in(1)->bottom_type()->isa_vectmask() &&
3714 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
3715 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
3716 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3717 match(Set dst (VectorReinterpret src));
3718 effect(TEMP xtmp);
3719 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
3720 ins_encode %{
3721 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
3722 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3723 assert(src_sz == dst_sz , "src and dst size mismatch");
3724 int vlen_enc = vector_length_encoding(src_sz);
3725 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3726 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3727 %}
3728 ins_pipe( pipe_slow );
3729 %}
3730
3731 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
3732 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3733 n->bottom_type()->isa_vectmask() &&
3734 n->in(1)->bottom_type()->isa_vectmask() &&
3735 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
3736 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
3737 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3738 match(Set dst (VectorReinterpret src));
3739 effect(TEMP xtmp);
3740 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
3741 ins_encode %{
3742 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
3743 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3744 assert(src_sz == dst_sz , "src and dst size mismatch");
3745 int vlen_enc = vector_length_encoding(src_sz);
3746 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3747 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3748 %}
3749 ins_pipe( pipe_slow );
3750 %}
3751
3752 instruct reinterpret(vec dst) %{
3753 predicate(!n->bottom_type()->isa_vectmask() &&
3754 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
3755 match(Set dst (VectorReinterpret dst));
3756 ins_cost(125);
3757 format %{ "vector_reinterpret $dst\t!" %}
3758 ins_encode %{
3759 // empty
3760 %}
3761 ins_pipe( pipe_slow );
3762 %}
3763
3764 instruct reinterpret_expand(vec dst, vec src) %{
3765 predicate(UseAVX == 0 &&
3766 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3767 match(Set dst (VectorReinterpret src));
3768 ins_cost(125);
3769 effect(TEMP dst);
3770 format %{ "vector_reinterpret_expand $dst,$src" %}
3771 ins_encode %{
3772 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
3773 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
3774
3775 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3776 if (src_vlen_in_bytes == 4) {
3777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
3778 } else {
3779 assert(src_vlen_in_bytes == 8, "");
3780 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
3781 }
3782 __ pand($dst$$XMMRegister, $src$$XMMRegister);
3783 %}
3784 ins_pipe( pipe_slow );
3785 %}
3786
3787 instruct vreinterpret_expand4(legVec dst, vec src) %{
3788 predicate(UseAVX > 0 &&
3789 !n->bottom_type()->isa_vectmask() &&
3790 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
3791 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3792 match(Set dst (VectorReinterpret src));
3793 ins_cost(125);
3794 format %{ "vector_reinterpret_expand $dst,$src" %}
3795 ins_encode %{
3796 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
3797 %}
3798 ins_pipe( pipe_slow );
3799 %}
3800
3801
3802 instruct vreinterpret_expand(legVec dst, vec src) %{
3803 predicate(UseAVX > 0 &&
3804 !n->bottom_type()->isa_vectmask() &&
3805 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
3806 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3807 match(Set dst (VectorReinterpret src));
3808 ins_cost(125);
3809 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
3810 ins_encode %{
3811 switch (Matcher::vector_length_in_bytes(this, $src)) {
3812 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3813 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3814 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3815 default: ShouldNotReachHere();
3816 }
3817 %}
3818 ins_pipe( pipe_slow );
3819 %}
3820
3821 instruct reinterpret_shrink(vec dst, legVec src) %{
3822 predicate(!n->bottom_type()->isa_vectmask() &&
3823 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
3824 match(Set dst (VectorReinterpret src));
3825 ins_cost(125);
3826 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
3827 ins_encode %{
3828 switch (Matcher::vector_length_in_bytes(this)) {
3829 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
3830 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3831 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3832 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3833 default: ShouldNotReachHere();
3834 }
3835 %}
3836 ins_pipe( pipe_slow );
3837 %}
3838
3839 // ----------------------------------------------------------------------------------------------------
3840
3841 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3842 match(Set dst (RoundDoubleMode src rmode));
3843 format %{ "roundsd $dst,$src" %}
3844 ins_cost(150);
3845 ins_encode %{
3846 assert(UseSSE >= 4, "required");
3847 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
3848 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3849 }
3850 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
3851 %}
3852 ins_pipe(pipe_slow);
3853 %}
3854
3855 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
3856 match(Set dst (RoundDoubleMode con rmode));
3857 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
3858 ins_cost(150);
3859 ins_encode %{
3860 assert(UseSSE >= 4, "required");
3861 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
3862 %}
3863 ins_pipe(pipe_slow);
3864 %}
3865
3866 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
3867 predicate(Matcher::vector_length(n) < 8);
3868 match(Set dst (RoundDoubleModeV src rmode));
3869 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
3870 ins_encode %{
3871 assert(UseAVX > 0, "required");
3872 int vlen_enc = vector_length_encoding(this);
3873 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
3874 %}
3875 ins_pipe( pipe_slow );
3876 %}
3877
3878 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
3879 predicate(Matcher::vector_length(n) == 8);
3880 match(Set dst (RoundDoubleModeV src rmode));
3881 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
3882 ins_encode %{
3883 assert(UseAVX > 2, "required");
3884 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
3885 %}
3886 ins_pipe( pipe_slow );
3887 %}
3888
3889 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
3890 predicate(Matcher::vector_length(n) < 8);
3891 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3892 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
3893 ins_encode %{
3894 assert(UseAVX > 0, "required");
3895 int vlen_enc = vector_length_encoding(this);
3896 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
3897 %}
3898 ins_pipe( pipe_slow );
3899 %}
3900
3901 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
3902 predicate(Matcher::vector_length(n) == 8);
3903 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3904 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
3905 ins_encode %{
3906 assert(UseAVX > 2, "required");
3907 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
3908 %}
3909 ins_pipe( pipe_slow );
3910 %}
3911
3912 instruct onspinwait() %{
3913 match(OnSpinWait);
3914 ins_cost(200);
3915
3916 format %{
3917 $$template
3918 $$emit$$"pause\t! membar_onspinwait"
3919 %}
3920 ins_encode %{
3921 __ pause();
3922 %}
3923 ins_pipe(pipe_slow);
3924 %}
3925
3926 // a * b + c
3927 instruct fmaD_reg(regD a, regD b, regD c) %{
3928 match(Set c (FmaD c (Binary a b)));
3929 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
3930 ins_cost(150);
3931 ins_encode %{
3932 assert(UseFMA, "Needs FMA instructions support.");
3933 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3934 %}
3935 ins_pipe( pipe_slow );
3936 %}
3937
3938 // a * b + c
3939 instruct fmaF_reg(regF a, regF b, regF c) %{
3940 match(Set c (FmaF c (Binary a b)));
3941 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
3942 ins_cost(150);
3943 ins_encode %{
3944 assert(UseFMA, "Needs FMA instructions support.");
3945 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3946 %}
3947 ins_pipe( pipe_slow );
3948 %}
3949
3950 // ====================VECTOR INSTRUCTIONS=====================================
3951
3952 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
3953 instruct MoveVec2Leg(legVec dst, vec src) %{
3954 match(Set dst src);
3955 format %{ "" %}
3956 ins_encode %{
3957 ShouldNotReachHere();
3958 %}
3959 ins_pipe( fpu_reg_reg );
3960 %}
3961
3962 instruct MoveLeg2Vec(vec dst, legVec src) %{
3963 match(Set dst src);
3964 format %{ "" %}
3965 ins_encode %{
3966 ShouldNotReachHere();
3967 %}
3968 ins_pipe( fpu_reg_reg );
3969 %}
3970
3971 // ============================================================================
3972
3973 // Load vectors generic operand pattern
3974 instruct loadV(vec dst, memory mem) %{
3975 match(Set dst (LoadVector mem));
3976 ins_cost(125);
3977 format %{ "load_vector $dst,$mem" %}
3978 ins_encode %{
3979 BasicType bt = Matcher::vector_element_basic_type(this);
3980 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
3981 %}
3982 ins_pipe( pipe_slow );
3983 %}
3984
3985 // Store vectors generic operand pattern.
3986 instruct storeV(memory mem, vec src) %{
3987 match(Set mem (StoreVector mem src));
3988 ins_cost(145);
3989 format %{ "store_vector $mem,$src\n\t" %}
3990 ins_encode %{
3991 switch (Matcher::vector_length_in_bytes(this, $src)) {
3992 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
3993 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
3994 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
3995 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
3996 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3997 default: ShouldNotReachHere();
3998 }
3999 %}
4000 ins_pipe( pipe_slow );
4001 %}
4002
4003 // ---------------------------------------- Gather ------------------------------------
4004
4005 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
4006
4007 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
4008 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
4009 Matcher::vector_length_in_bytes(n) <= 32);
4010 match(Set dst (LoadVectorGather mem idx));
4011 effect(TEMP dst, TEMP tmp, TEMP mask);
4012 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
4013 ins_encode %{
4014 int vlen_enc = vector_length_encoding(this);
4015 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4016 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4017 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
4018 __ lea($tmp$$Register, $mem$$Address);
4019 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
4020 %}
4021 ins_pipe( pipe_slow );
4022 %}
4023
4024
4025 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
4026 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
4027 !is_subword_type(Matcher::vector_element_basic_type(n)));
4028 match(Set dst (LoadVectorGather mem idx));
4029 effect(TEMP dst, TEMP tmp, TEMP ktmp);
4030 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
4031 ins_encode %{
4032 int vlen_enc = vector_length_encoding(this);
4033 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4034 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
4035 __ lea($tmp$$Register, $mem$$Address);
4036 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4037 %}
4038 ins_pipe( pipe_slow );
4039 %}
4040
4041 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4042 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
4043 !is_subword_type(Matcher::vector_element_basic_type(n)));
4044 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
4045 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
4046 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
4047 ins_encode %{
4048 assert(UseAVX > 2, "sanity");
4049 int vlen_enc = vector_length_encoding(this);
4050 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4051 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4052 // Note: Since gather instruction partially updates the opmask register used
4053 // for predication hense moving mask operand to a temporary.
4054 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4055 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4056 __ lea($tmp$$Register, $mem$$Address);
4057 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4058 %}
4059 ins_pipe( pipe_slow );
4060 %}
4061
4062 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
4063 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4064 match(Set dst (LoadVectorGather mem idx_base));
4065 effect(TEMP tmp, TEMP rtmp);
4066 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
4067 ins_encode %{
4068 int vlen_enc = vector_length_encoding(this);
4069 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4070 __ lea($tmp$$Register, $mem$$Address);
4071 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
4072 %}
4073 ins_pipe( pipe_slow );
4074 %}
4075
4076 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
4077 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
4078 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4079 match(Set dst (LoadVectorGather mem idx_base));
4080 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
4081 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
4082 ins_encode %{
4083 int vlen_enc = vector_length_encoding(this);
4084 int vector_len = Matcher::vector_length(this);
4085 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4086 __ lea($tmp$$Register, $mem$$Address);
4087 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4088 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
4089 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
4090 %}
4091 ins_pipe( pipe_slow );
4092 %}
4093
4094 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
4095 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4096 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4097 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4098 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4099 ins_encode %{
4100 int vlen_enc = vector_length_encoding(this);
4101 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4102 __ xorq($mask_idx$$Register, $mask_idx$$Register);
4103 __ lea($tmp$$Register, $mem$$Address);
4104 __ kmovql($rtmp2$$Register, $mask$$KRegister);
4105 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4106 %}
4107 ins_pipe( pipe_slow );
4108 %}
4109
4110 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
4111 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
4112 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4113 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4114 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4115 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4116 ins_encode %{
4117 int vlen_enc = vector_length_encoding(this);
4118 int vector_len = Matcher::vector_length(this);
4119 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4120 __ xorq($mask_idx$$Register, $mask_idx$$Register);
4121 __ lea($tmp$$Register, $mem$$Address);
4122 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4123 __ kmovql($rtmp2$$Register, $mask$$KRegister);
4124 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4125 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4126 %}
4127 ins_pipe( pipe_slow );
4128 %}
4129
4130 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
4131 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4132 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4133 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4134 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4135 ins_encode %{
4136 int vlen_enc = vector_length_encoding(this);
4137 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4138 __ lea($tmp$$Register, $mem$$Address);
4139 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4140 if (elem_bt == T_SHORT) {
4141 __ movl($mask_idx$$Register, 0x55555555);
4142 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4143 }
4144 __ xorl($mask_idx$$Register, $mask_idx$$Register);
4145 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4146 %}
4147 ins_pipe( pipe_slow );
4148 %}
4149
4150 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
4151 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
4152 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4153 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4154 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4155 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4156 ins_encode %{
4157 int vlen_enc = vector_length_encoding(this);
4158 int vector_len = Matcher::vector_length(this);
4159 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4160 __ lea($tmp$$Register, $mem$$Address);
4161 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4162 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4163 if (elem_bt == T_SHORT) {
4164 __ movl($mask_idx$$Register, 0x55555555);
4165 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4166 }
4167 __ xorl($mask_idx$$Register, $mask_idx$$Register);
4168 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4169 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4170 %}
4171 ins_pipe( pipe_slow );
4172 %}
4173
4174 // ====================Scatter=======================================
4175
4176 // Scatter INT, LONG, FLOAT, DOUBLE
4177
4178 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
4179 predicate(UseAVX > 2);
4180 match(Set mem (StoreVectorScatter mem (Binary src idx)));
4181 effect(TEMP tmp, TEMP ktmp);
4182 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
4183 ins_encode %{
4184 int vlen_enc = vector_length_encoding(this, $src);
4185 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4186
4187 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4188 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4189
4190 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
4191 __ lea($tmp$$Register, $mem$$Address);
4192 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4193 %}
4194 ins_pipe( pipe_slow );
4195 %}
4196
4197 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4198 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
4199 effect(TEMP tmp, TEMP ktmp);
4200 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
4201 ins_encode %{
4202 int vlen_enc = vector_length_encoding(this, $src);
4203 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4204 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4205 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4206 // Note: Since scatter instruction partially updates the opmask register used
4207 // for predication hense moving mask operand to a temporary.
4208 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4209 __ lea($tmp$$Register, $mem$$Address);
4210 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4211 %}
4212 ins_pipe( pipe_slow );
4213 %}
4214
4215 // ====================REPLICATE=======================================
4216
4217 // Replicate byte scalar to be vector
4218 instruct vReplB_reg(vec dst, rRegI src) %{
4219 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
4220 match(Set dst (Replicate src));
4221 format %{ "replicateB $dst,$src" %}
4222 ins_encode %{
4223 uint vlen = Matcher::vector_length(this);
4224 if (UseAVX >= 2) {
4225 int vlen_enc = vector_length_encoding(this);
4226 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4227 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
4228 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
4229 } else {
4230 __ movdl($dst$$XMMRegister, $src$$Register);
4231 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4232 }
4233 } else {
4234 assert(UseAVX < 2, "");
4235 __ movdl($dst$$XMMRegister, $src$$Register);
4236 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
4237 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4238 if (vlen >= 16) {
4239 assert(vlen == 16, "");
4240 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4241 }
4242 }
4243 %}
4244 ins_pipe( pipe_slow );
4245 %}
4246
4247 instruct ReplB_mem(vec dst, memory mem) %{
4248 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
4249 match(Set dst (Replicate (LoadB mem)));
4250 format %{ "replicateB $dst,$mem" %}
4251 ins_encode %{
4252 int vlen_enc = vector_length_encoding(this);
4253 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
4254 %}
4255 ins_pipe( pipe_slow );
4256 %}
4257
4258 // ====================ReplicateS=======================================
4259
4260 instruct vReplS_reg(vec dst, rRegI src) %{
4261 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
4262 match(Set dst (Replicate src));
4263 format %{ "replicateS $dst,$src" %}
4264 ins_encode %{
4265 uint vlen = Matcher::vector_length(this);
4266 int vlen_enc = vector_length_encoding(this);
4267 if (UseAVX >= 2) {
4268 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4269 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
4270 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
4271 } else {
4272 __ movdl($dst$$XMMRegister, $src$$Register);
4273 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4274 }
4275 } else {
4276 assert(UseAVX < 2, "");
4277 __ movdl($dst$$XMMRegister, $src$$Register);
4278 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4279 if (vlen >= 8) {
4280 assert(vlen == 8, "");
4281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4282 }
4283 }
4284 %}
4285 ins_pipe( pipe_slow );
4286 %}
4287
4288 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
4289 match(Set dst (Replicate con));
4290 effect(TEMP rtmp);
4291 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
4292 ins_encode %{
4293 int vlen_enc = vector_length_encoding(this);
4294 BasicType bt = Matcher::vector_element_basic_type(this);
4295 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
4296 __ movl($rtmp$$Register, $con$$constant);
4297 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4298 %}
4299 ins_pipe( pipe_slow );
4300 %}
4301
4302 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
4303 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
4304 match(Set dst (Replicate src));
4305 effect(TEMP rtmp);
4306 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
4307 ins_encode %{
4308 int vlen_enc = vector_length_encoding(this);
4309 __ vmovw($rtmp$$Register, $src$$XMMRegister);
4310 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4311 %}
4312 ins_pipe( pipe_slow );
4313 %}
4314
4315 instruct ReplS_mem(vec dst, memory mem) %{
4316 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
4317 match(Set dst (Replicate (LoadS mem)));
4318 format %{ "replicateS $dst,$mem" %}
4319 ins_encode %{
4320 int vlen_enc = vector_length_encoding(this);
4321 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
4322 %}
4323 ins_pipe( pipe_slow );
4324 %}
4325
4326 // ====================ReplicateI=======================================
4327
4328 instruct ReplI_reg(vec dst, rRegI src) %{
4329 predicate(Matcher::vector_element_basic_type(n) == T_INT);
4330 match(Set dst (Replicate src));
4331 format %{ "replicateI $dst,$src" %}
4332 ins_encode %{
4333 uint vlen = Matcher::vector_length(this);
4334 int vlen_enc = vector_length_encoding(this);
4335 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4336 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
4337 } else if (VM_Version::supports_avx2()) {
4338 __ movdl($dst$$XMMRegister, $src$$Register);
4339 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4340 } else {
4341 __ movdl($dst$$XMMRegister, $src$$Register);
4342 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4343 }
4344 %}
4345 ins_pipe( pipe_slow );
4346 %}
4347
4348 instruct ReplI_mem(vec dst, memory mem) %{
4349 predicate(Matcher::vector_element_basic_type(n) == T_INT);
4350 match(Set dst (Replicate (LoadI mem)));
4351 format %{ "replicateI $dst,$mem" %}
4352 ins_encode %{
4353 int vlen_enc = vector_length_encoding(this);
4354 if (VM_Version::supports_avx2()) {
4355 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4356 } else if (VM_Version::supports_avx()) {
4357 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4358 } else {
4359 __ movdl($dst$$XMMRegister, $mem$$Address);
4360 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4361 }
4362 %}
4363 ins_pipe( pipe_slow );
4364 %}
4365
4366 instruct ReplI_imm(vec dst, immI con) %{
4367 predicate(Matcher::is_non_long_integral_vector(n));
4368 match(Set dst (Replicate con));
4369 format %{ "replicateI $dst,$con" %}
4370 ins_encode %{
4371 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
4372 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
4373 type2aelembytes(Matcher::vector_element_basic_type(this))));
4374 BasicType bt = Matcher::vector_element_basic_type(this);
4375 int vlen = Matcher::vector_length_in_bytes(this);
4376 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
4377 %}
4378 ins_pipe( pipe_slow );
4379 %}
4380
4381 // Replicate scalar zero to be vector
4382 instruct ReplI_zero(vec dst, immI_0 zero) %{
4383 predicate(Matcher::is_non_long_integral_vector(n));
4384 match(Set dst (Replicate zero));
4385 format %{ "replicateI $dst,$zero" %}
4386 ins_encode %{
4387 int vlen_enc = vector_length_encoding(this);
4388 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4389 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4390 } else {
4391 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4392 }
4393 %}
4394 ins_pipe( fpu_reg_reg );
4395 %}
4396
4397 instruct ReplI_M1(vec dst, immI_M1 con) %{
4398 predicate(Matcher::is_non_long_integral_vector(n));
4399 match(Set dst (Replicate con));
4400 format %{ "vallones $dst" %}
4401 ins_encode %{
4402 int vector_len = vector_length_encoding(this);
4403 __ vallones($dst$$XMMRegister, vector_len);
4404 %}
4405 ins_pipe( pipe_slow );
4406 %}
4407
4408 // ====================ReplicateL=======================================
4409
4410 // Replicate long (8 byte) scalar to be vector
4411 instruct ReplL_reg(vec dst, rRegL src) %{
4412 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4413 match(Set dst (Replicate src));
4414 format %{ "replicateL $dst,$src" %}
4415 ins_encode %{
4416 int vlen = Matcher::vector_length(this);
4417 int vlen_enc = vector_length_encoding(this);
4418 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4419 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
4420 } else if (VM_Version::supports_avx2()) {
4421 __ movdq($dst$$XMMRegister, $src$$Register);
4422 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4423 } else {
4424 __ movdq($dst$$XMMRegister, $src$$Register);
4425 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4426 }
4427 %}
4428 ins_pipe( pipe_slow );
4429 %}
4430
4431 instruct ReplL_mem(vec dst, memory mem) %{
4432 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4433 match(Set dst (Replicate (LoadL mem)));
4434 format %{ "replicateL $dst,$mem" %}
4435 ins_encode %{
4436 int vlen_enc = vector_length_encoding(this);
4437 if (VM_Version::supports_avx2()) {
4438 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
4439 } else if (VM_Version::supports_sse3()) {
4440 __ movddup($dst$$XMMRegister, $mem$$Address);
4441 } else {
4442 __ movq($dst$$XMMRegister, $mem$$Address);
4443 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4444 }
4445 %}
4446 ins_pipe( pipe_slow );
4447 %}
4448
4449 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
4450 instruct ReplL_imm(vec dst, immL con) %{
4451 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4452 match(Set dst (Replicate con));
4453 format %{ "replicateL $dst,$con" %}
4454 ins_encode %{
4455 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
4456 int vlen = Matcher::vector_length_in_bytes(this);
4457 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
4458 %}
4459 ins_pipe( pipe_slow );
4460 %}
4461
4462 instruct ReplL_zero(vec dst, immL0 zero) %{
4463 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4464 match(Set dst (Replicate zero));
4465 format %{ "replicateL $dst,$zero" %}
4466 ins_encode %{
4467 int vlen_enc = vector_length_encoding(this);
4468 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4469 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4470 } else {
4471 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4472 }
4473 %}
4474 ins_pipe( fpu_reg_reg );
4475 %}
4476
4477 instruct ReplL_M1(vec dst, immL_M1 con) %{
4478 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4479 match(Set dst (Replicate con));
4480 format %{ "vallones $dst" %}
4481 ins_encode %{
4482 int vector_len = vector_length_encoding(this);
4483 __ vallones($dst$$XMMRegister, vector_len);
4484 %}
4485 ins_pipe( pipe_slow );
4486 %}
4487
4488 // ====================ReplicateF=======================================
4489
4490 instruct vReplF_reg(vec dst, vlRegF src) %{
4491 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4492 match(Set dst (Replicate src));
4493 format %{ "replicateF $dst,$src" %}
4494 ins_encode %{
4495 uint vlen = Matcher::vector_length(this);
4496 int vlen_enc = vector_length_encoding(this);
4497 if (vlen <= 4) {
4498 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4499 } else if (VM_Version::supports_avx2()) {
4500 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4501 } else {
4502 assert(vlen == 8, "sanity");
4503 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4504 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4505 }
4506 %}
4507 ins_pipe( pipe_slow );
4508 %}
4509
4510 instruct ReplF_reg(vec dst, vlRegF src) %{
4511 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4512 match(Set dst (Replicate src));
4513 format %{ "replicateF $dst,$src" %}
4514 ins_encode %{
4515 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
4516 %}
4517 ins_pipe( pipe_slow );
4518 %}
4519
4520 instruct ReplF_mem(vec dst, memory mem) %{
4521 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4522 match(Set dst (Replicate (LoadF mem)));
4523 format %{ "replicateF $dst,$mem" %}
4524 ins_encode %{
4525 int vlen_enc = vector_length_encoding(this);
4526 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4527 %}
4528 ins_pipe( pipe_slow );
4529 %}
4530
4531 // Replicate float scalar immediate to be vector by loading from const table.
4532 instruct ReplF_imm(vec dst, immF con) %{
4533 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4534 match(Set dst (Replicate con));
4535 format %{ "replicateF $dst,$con" %}
4536 ins_encode %{
4537 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
4538 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
4539 int vlen = Matcher::vector_length_in_bytes(this);
4540 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
4541 %}
4542 ins_pipe( pipe_slow );
4543 %}
4544
4545 instruct ReplF_zero(vec dst, immF0 zero) %{
4546 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4547 match(Set dst (Replicate zero));
4548 format %{ "replicateF $dst,$zero" %}
4549 ins_encode %{
4550 int vlen_enc = vector_length_encoding(this);
4551 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4552 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4553 } else {
4554 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4555 }
4556 %}
4557 ins_pipe( fpu_reg_reg );
4558 %}
4559
4560 // ====================ReplicateD=======================================
4561
4562 // Replicate double (8 bytes) scalar to be vector
4563 instruct vReplD_reg(vec dst, vlRegD src) %{
4564 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4565 match(Set dst (Replicate src));
4566 format %{ "replicateD $dst,$src" %}
4567 ins_encode %{
4568 uint vlen = Matcher::vector_length(this);
4569 int vlen_enc = vector_length_encoding(this);
4570 if (vlen <= 2) {
4571 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
4572 } else if (VM_Version::supports_avx2()) {
4573 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4574 } else {
4575 assert(vlen == 4, "sanity");
4576 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
4577 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4578 }
4579 %}
4580 ins_pipe( pipe_slow );
4581 %}
4582
4583 instruct ReplD_reg(vec dst, vlRegD src) %{
4584 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4585 match(Set dst (Replicate src));
4586 format %{ "replicateD $dst,$src" %}
4587 ins_encode %{
4588 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
4589 %}
4590 ins_pipe( pipe_slow );
4591 %}
4592
4593 instruct ReplD_mem(vec dst, memory mem) %{
4594 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4595 match(Set dst (Replicate (LoadD mem)));
4596 format %{ "replicateD $dst,$mem" %}
4597 ins_encode %{
4598 if (Matcher::vector_length(this) >= 4) {
4599 int vlen_enc = vector_length_encoding(this);
4600 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4601 } else {
4602 __ movddup($dst$$XMMRegister, $mem$$Address);
4603 }
4604 %}
4605 ins_pipe( pipe_slow );
4606 %}
4607
4608 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
4609 instruct ReplD_imm(vec dst, immD con) %{
4610 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4611 match(Set dst (Replicate con));
4612 format %{ "replicateD $dst,$con" %}
4613 ins_encode %{
4614 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
4615 int vlen = Matcher::vector_length_in_bytes(this);
4616 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
4617 %}
4618 ins_pipe( pipe_slow );
4619 %}
4620
4621 instruct ReplD_zero(vec dst, immD0 zero) %{
4622 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4623 match(Set dst (Replicate zero));
4624 format %{ "replicateD $dst,$zero" %}
4625 ins_encode %{
4626 int vlen_enc = vector_length_encoding(this);
4627 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4628 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4629 } else {
4630 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4631 }
4632 %}
4633 ins_pipe( fpu_reg_reg );
4634 %}
4635
4636 // ====================VECTOR INSERT=======================================
4637
4638 instruct insert(vec dst, rRegI val, immU8 idx) %{
4639 predicate(Matcher::vector_length_in_bytes(n) < 32);
4640 match(Set dst (VectorInsert (Binary dst val) idx));
4641 format %{ "vector_insert $dst,$val,$idx" %}
4642 ins_encode %{
4643 assert(UseSSE >= 4, "required");
4644 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
4645
4646 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4647
4648 assert(is_integral_type(elem_bt), "");
4649 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4650
4651 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
4652 %}
4653 ins_pipe( pipe_slow );
4654 %}
4655
4656 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
4657 predicate(Matcher::vector_length_in_bytes(n) == 32);
4658 match(Set dst (VectorInsert (Binary src val) idx));
4659 effect(TEMP vtmp);
4660 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4661 ins_encode %{
4662 int vlen_enc = Assembler::AVX_256bit;
4663 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4664 int elem_per_lane = 16/type2aelembytes(elem_bt);
4665 int log2epr = log2(elem_per_lane);
4666
4667 assert(is_integral_type(elem_bt), "sanity");
4668 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4669
4670 uint x_idx = $idx$$constant & right_n_bits(log2epr);
4671 uint y_idx = ($idx$$constant >> log2epr) & 1;
4672 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4673 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4674 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4675 %}
4676 ins_pipe( pipe_slow );
4677 %}
4678
4679 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
4680 predicate(Matcher::vector_length_in_bytes(n) == 64);
4681 match(Set dst (VectorInsert (Binary src val) idx));
4682 effect(TEMP vtmp);
4683 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4684 ins_encode %{
4685 assert(UseAVX > 2, "sanity");
4686
4687 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4688 int elem_per_lane = 16/type2aelembytes(elem_bt);
4689 int log2epr = log2(elem_per_lane);
4690
4691 assert(is_integral_type(elem_bt), "");
4692 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4693
4694 uint x_idx = $idx$$constant & right_n_bits(log2epr);
4695 uint y_idx = ($idx$$constant >> log2epr) & 3;
4696 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4697 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4698 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4699 %}
4700 ins_pipe( pipe_slow );
4701 %}
4702
4703 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
4704 predicate(Matcher::vector_length(n) == 2);
4705 match(Set dst (VectorInsert (Binary dst val) idx));
4706 format %{ "vector_insert $dst,$val,$idx" %}
4707 ins_encode %{
4708 assert(UseSSE >= 4, "required");
4709 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4710 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4711
4712 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
4713 %}
4714 ins_pipe( pipe_slow );
4715 %}
4716
4717 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
4718 predicate(Matcher::vector_length(n) == 4);
4719 match(Set dst (VectorInsert (Binary src val) idx));
4720 effect(TEMP vtmp);
4721 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4722 ins_encode %{
4723 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4724 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4725
4726 uint x_idx = $idx$$constant & right_n_bits(1);
4727 uint y_idx = ($idx$$constant >> 1) & 1;
4728 int vlen_enc = Assembler::AVX_256bit;
4729 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4730 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4731 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4732 %}
4733 ins_pipe( pipe_slow );
4734 %}
4735
4736 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
4737 predicate(Matcher::vector_length(n) == 8);
4738 match(Set dst (VectorInsert (Binary src val) idx));
4739 effect(TEMP vtmp);
4740 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4741 ins_encode %{
4742 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
4743 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4744
4745 uint x_idx = $idx$$constant & right_n_bits(1);
4746 uint y_idx = ($idx$$constant >> 1) & 3;
4747 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4748 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4749 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4750 %}
4751 ins_pipe( pipe_slow );
4752 %}
4753
4754 instruct insertF(vec dst, regF val, immU8 idx) %{
4755 predicate(Matcher::vector_length(n) < 8);
4756 match(Set dst (VectorInsert (Binary dst val) idx));
4757 format %{ "vector_insert $dst,$val,$idx" %}
4758 ins_encode %{
4759 assert(UseSSE >= 4, "sanity");
4760
4761 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4762 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4763
4764 uint x_idx = $idx$$constant & right_n_bits(2);
4765 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4766 %}
4767 ins_pipe( pipe_slow );
4768 %}
4769
4770 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
4771 predicate(Matcher::vector_length(n) >= 8);
4772 match(Set dst (VectorInsert (Binary src val) idx));
4773 effect(TEMP vtmp);
4774 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4775 ins_encode %{
4776 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4777 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4778
4779 int vlen = Matcher::vector_length(this);
4780 uint x_idx = $idx$$constant & right_n_bits(2);
4781 if (vlen == 8) {
4782 uint y_idx = ($idx$$constant >> 2) & 1;
4783 int vlen_enc = Assembler::AVX_256bit;
4784 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4785 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4786 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4787 } else {
4788 assert(vlen == 16, "sanity");
4789 uint y_idx = ($idx$$constant >> 2) & 3;
4790 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4791 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4792 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4793 }
4794 %}
4795 ins_pipe( pipe_slow );
4796 %}
4797
4798 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
4799 predicate(Matcher::vector_length(n) == 2);
4800 match(Set dst (VectorInsert (Binary dst val) idx));
4801 effect(TEMP tmp);
4802 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
4803 ins_encode %{
4804 assert(UseSSE >= 4, "sanity");
4805 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4806 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4807
4808 __ movq($tmp$$Register, $val$$XMMRegister);
4809 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
4810 %}
4811 ins_pipe( pipe_slow );
4812 %}
4813
4814 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
4815 predicate(Matcher::vector_length(n) == 4);
4816 match(Set dst (VectorInsert (Binary src val) idx));
4817 effect(TEMP vtmp, TEMP tmp);
4818 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
4819 ins_encode %{
4820 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4821 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4822
4823 uint x_idx = $idx$$constant & right_n_bits(1);
4824 uint y_idx = ($idx$$constant >> 1) & 1;
4825 int vlen_enc = Assembler::AVX_256bit;
4826 __ movq($tmp$$Register, $val$$XMMRegister);
4827 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4828 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
4829 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4830 %}
4831 ins_pipe( pipe_slow );
4832 %}
4833
4834 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
4835 predicate(Matcher::vector_length(n) == 8);
4836 match(Set dst (VectorInsert (Binary src val) idx));
4837 effect(TEMP tmp, TEMP vtmp);
4838 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4839 ins_encode %{
4840 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4841 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4842
4843 uint x_idx = $idx$$constant & right_n_bits(1);
4844 uint y_idx = ($idx$$constant >> 1) & 3;
4845 __ movq($tmp$$Register, $val$$XMMRegister);
4846 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4847 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
4848 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4849 %}
4850 ins_pipe( pipe_slow );
4851 %}
4852
4853 // ====================REDUCTION ARITHMETIC=======================================
4854
4855 // =======================Int Reduction==========================================
4856
4857 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
4858 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
4859 match(Set dst (AddReductionVI src1 src2));
4860 match(Set dst (MulReductionVI src1 src2));
4861 match(Set dst (AndReductionV src1 src2));
4862 match(Set dst ( OrReductionV src1 src2));
4863 match(Set dst (XorReductionV src1 src2));
4864 match(Set dst (MinReductionV src1 src2));
4865 match(Set dst (MaxReductionV src1 src2));
4866 effect(TEMP vtmp1, TEMP vtmp2);
4867 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4868 ins_encode %{
4869 int opcode = this->ideal_Opcode();
4870 int vlen = Matcher::vector_length(this, $src2);
4871 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4872 %}
4873 ins_pipe( pipe_slow );
4874 %}
4875
4876 // =======================Long Reduction==========================================
4877
4878 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
4879 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
4880 match(Set dst (AddReductionVL src1 src2));
4881 match(Set dst (MulReductionVL src1 src2));
4882 match(Set dst (AndReductionV src1 src2));
4883 match(Set dst ( OrReductionV src1 src2));
4884 match(Set dst (XorReductionV src1 src2));
4885 match(Set dst (MinReductionV src1 src2));
4886 match(Set dst (MaxReductionV src1 src2));
4887 effect(TEMP vtmp1, TEMP vtmp2);
4888 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4889 ins_encode %{
4890 int opcode = this->ideal_Opcode();
4891 int vlen = Matcher::vector_length(this, $src2);
4892 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4893 %}
4894 ins_pipe( pipe_slow );
4895 %}
4896
4897 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
4898 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
4899 match(Set dst (AddReductionVL src1 src2));
4900 match(Set dst (MulReductionVL src1 src2));
4901 match(Set dst (AndReductionV src1 src2));
4902 match(Set dst ( OrReductionV src1 src2));
4903 match(Set dst (XorReductionV src1 src2));
4904 match(Set dst (MinReductionV src1 src2));
4905 match(Set dst (MaxReductionV src1 src2));
4906 effect(TEMP vtmp1, TEMP vtmp2);
4907 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4908 ins_encode %{
4909 int opcode = this->ideal_Opcode();
4910 int vlen = Matcher::vector_length(this, $src2);
4911 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4912 %}
4913 ins_pipe( pipe_slow );
4914 %}
4915
4916 // =======================Float Reduction==========================================
4917
4918 instruct reductionF128(regF dst, vec src, vec vtmp) %{
4919 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
4920 match(Set dst (AddReductionVF dst src));
4921 match(Set dst (MulReductionVF dst src));
4922 effect(TEMP dst, TEMP vtmp);
4923 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
4924 ins_encode %{
4925 int opcode = this->ideal_Opcode();
4926 int vlen = Matcher::vector_length(this, $src);
4927 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
4928 %}
4929 ins_pipe( pipe_slow );
4930 %}
4931
4932 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
4933 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
4934 match(Set dst (AddReductionVF dst src));
4935 match(Set dst (MulReductionVF dst src));
4936 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4937 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
4938 ins_encode %{
4939 int opcode = this->ideal_Opcode();
4940 int vlen = Matcher::vector_length(this, $src);
4941 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4942 %}
4943 ins_pipe( pipe_slow );
4944 %}
4945
4946 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
4947 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
4948 match(Set dst (AddReductionVF dst src));
4949 match(Set dst (MulReductionVF dst src));
4950 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4951 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
4952 ins_encode %{
4953 int opcode = this->ideal_Opcode();
4954 int vlen = Matcher::vector_length(this, $src);
4955 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4956 %}
4957 ins_pipe( pipe_slow );
4958 %}
4959
4960
4961 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
4962 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4963 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4964 // src1 contains reduction identity
4965 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
4966 match(Set dst (AddReductionVF src1 src2));
4967 match(Set dst (MulReductionVF src1 src2));
4968 effect(TEMP dst);
4969 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
4970 ins_encode %{
4971 int opcode = this->ideal_Opcode();
4972 int vlen = Matcher::vector_length(this, $src2);
4973 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
4974 %}
4975 ins_pipe( pipe_slow );
4976 %}
4977
4978 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
4979 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4980 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4981 // src1 contains reduction identity
4982 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
4983 match(Set dst (AddReductionVF src1 src2));
4984 match(Set dst (MulReductionVF src1 src2));
4985 effect(TEMP dst, TEMP vtmp);
4986 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
4987 ins_encode %{
4988 int opcode = this->ideal_Opcode();
4989 int vlen = Matcher::vector_length(this, $src2);
4990 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
4991 %}
4992 ins_pipe( pipe_slow );
4993 %}
4994
4995 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
4996 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4997 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4998 // src1 contains reduction identity
4999 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5000 match(Set dst (AddReductionVF src1 src2));
5001 match(Set dst (MulReductionVF src1 src2));
5002 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5003 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5004 ins_encode %{
5005 int opcode = this->ideal_Opcode();
5006 int vlen = Matcher::vector_length(this, $src2);
5007 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5008 %}
5009 ins_pipe( pipe_slow );
5010 %}
5011
5012 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5013 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5014 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5015 // src1 contains reduction identity
5016 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
5017 match(Set dst (AddReductionVF src1 src2));
5018 match(Set dst (MulReductionVF src1 src2));
5019 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5020 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5021 ins_encode %{
5022 int opcode = this->ideal_Opcode();
5023 int vlen = Matcher::vector_length(this, $src2);
5024 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5025 %}
5026 ins_pipe( pipe_slow );
5027 %}
5028
5029 // =======================Double Reduction==========================================
5030
5031 instruct reduction2D(regD dst, vec src, vec vtmp) %{
5032 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
5033 match(Set dst (AddReductionVD dst src));
5034 match(Set dst (MulReductionVD dst src));
5035 effect(TEMP dst, TEMP vtmp);
5036 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
5037 ins_encode %{
5038 int opcode = this->ideal_Opcode();
5039 int vlen = Matcher::vector_length(this, $src);
5040 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
5041 %}
5042 ins_pipe( pipe_slow );
5043 %}
5044
5045 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5046 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
5047 match(Set dst (AddReductionVD dst src));
5048 match(Set dst (MulReductionVD dst src));
5049 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5050 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5051 ins_encode %{
5052 int opcode = this->ideal_Opcode();
5053 int vlen = Matcher::vector_length(this, $src);
5054 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5055 %}
5056 ins_pipe( pipe_slow );
5057 %}
5058
5059 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5060 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
5061 match(Set dst (AddReductionVD dst src));
5062 match(Set dst (MulReductionVD dst src));
5063 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5064 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5065 ins_encode %{
5066 int opcode = this->ideal_Opcode();
5067 int vlen = Matcher::vector_length(this, $src);
5068 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5069 %}
5070 ins_pipe( pipe_slow );
5071 %}
5072
5073 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
5074 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5075 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5076 // src1 contains reduction identity
5077 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5078 match(Set dst (AddReductionVD src1 src2));
5079 match(Set dst (MulReductionVD src1 src2));
5080 effect(TEMP dst);
5081 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
5082 ins_encode %{
5083 int opcode = this->ideal_Opcode();
5084 int vlen = Matcher::vector_length(this, $src2);
5085 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5086 %}
5087 ins_pipe( pipe_slow );
5088 %}
5089
5090 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
5091 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5092 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5093 // src1 contains reduction identity
5094 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5095 match(Set dst (AddReductionVD src1 src2));
5096 match(Set dst (MulReductionVD src1 src2));
5097 effect(TEMP dst, TEMP vtmp);
5098 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5099 ins_encode %{
5100 int opcode = this->ideal_Opcode();
5101 int vlen = Matcher::vector_length(this, $src2);
5102 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5103 %}
5104 ins_pipe( pipe_slow );
5105 %}
5106
5107 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5108 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5109 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5110 // src1 contains reduction identity
5111 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5112 match(Set dst (AddReductionVD src1 src2));
5113 match(Set dst (MulReductionVD src1 src2));
5114 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5115 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5116 ins_encode %{
5117 int opcode = this->ideal_Opcode();
5118 int vlen = Matcher::vector_length(this, $src2);
5119 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5120 %}
5121 ins_pipe( pipe_slow );
5122 %}
5123
5124 // =======================Byte Reduction==========================================
5125
5126 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5127 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
5128 match(Set dst (AddReductionVI src1 src2));
5129 match(Set dst (AndReductionV src1 src2));
5130 match(Set dst ( OrReductionV src1 src2));
5131 match(Set dst (XorReductionV src1 src2));
5132 match(Set dst (MinReductionV src1 src2));
5133 match(Set dst (MaxReductionV src1 src2));
5134 effect(TEMP vtmp1, TEMP vtmp2);
5135 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5136 ins_encode %{
5137 int opcode = this->ideal_Opcode();
5138 int vlen = Matcher::vector_length(this, $src2);
5139 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5140 %}
5141 ins_pipe( pipe_slow );
5142 %}
5143
5144 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5145 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
5146 match(Set dst (AddReductionVI src1 src2));
5147 match(Set dst (AndReductionV src1 src2));
5148 match(Set dst ( OrReductionV src1 src2));
5149 match(Set dst (XorReductionV src1 src2));
5150 match(Set dst (MinReductionV src1 src2));
5151 match(Set dst (MaxReductionV src1 src2));
5152 effect(TEMP vtmp1, TEMP vtmp2);
5153 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5154 ins_encode %{
5155 int opcode = this->ideal_Opcode();
5156 int vlen = Matcher::vector_length(this, $src2);
5157 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5158 %}
5159 ins_pipe( pipe_slow );
5160 %}
5161
5162 // =======================Short Reduction==========================================
5163
5164 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5165 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
5166 match(Set dst (AddReductionVI src1 src2));
5167 match(Set dst (MulReductionVI src1 src2));
5168 match(Set dst (AndReductionV src1 src2));
5169 match(Set dst ( OrReductionV src1 src2));
5170 match(Set dst (XorReductionV src1 src2));
5171 match(Set dst (MinReductionV src1 src2));
5172 match(Set dst (MaxReductionV src1 src2));
5173 effect(TEMP vtmp1, TEMP vtmp2);
5174 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5175 ins_encode %{
5176 int opcode = this->ideal_Opcode();
5177 int vlen = Matcher::vector_length(this, $src2);
5178 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5179 %}
5180 ins_pipe( pipe_slow );
5181 %}
5182
5183 // =======================Mul Reduction==========================================
5184
5185 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5186 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5187 Matcher::vector_length(n->in(2)) <= 32); // src2
5188 match(Set dst (MulReductionVI src1 src2));
5189 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5190 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5191 ins_encode %{
5192 int opcode = this->ideal_Opcode();
5193 int vlen = Matcher::vector_length(this, $src2);
5194 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5195 %}
5196 ins_pipe( pipe_slow );
5197 %}
5198
5199 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5200 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5201 Matcher::vector_length(n->in(2)) == 64); // src2
5202 match(Set dst (MulReductionVI src1 src2));
5203 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5204 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5205 ins_encode %{
5206 int opcode = this->ideal_Opcode();
5207 int vlen = Matcher::vector_length(this, $src2);
5208 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5209 %}
5210 ins_pipe( pipe_slow );
5211 %}
5212
5213 //--------------------Min/Max Float Reduction --------------------
5214 // Float Min Reduction
5215 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
5216 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5217 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5218 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5219 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5220 Matcher::vector_length(n->in(2)) == 2);
5221 match(Set dst (MinReductionV src1 src2));
5222 match(Set dst (MaxReductionV src1 src2));
5223 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5224 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5225 ins_encode %{
5226 assert(UseAVX > 0, "sanity");
5227
5228 int opcode = this->ideal_Opcode();
5229 int vlen = Matcher::vector_length(this, $src2);
5230 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5231 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5232 %}
5233 ins_pipe( pipe_slow );
5234 %}
5235
5236 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
5237 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5238 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5239 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5240 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5241 Matcher::vector_length(n->in(2)) >= 4);
5242 match(Set dst (MinReductionV src1 src2));
5243 match(Set dst (MaxReductionV src1 src2));
5244 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5245 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5246 ins_encode %{
5247 assert(UseAVX > 0, "sanity");
5248
5249 int opcode = this->ideal_Opcode();
5250 int vlen = Matcher::vector_length(this, $src2);
5251 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5252 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5253 %}
5254 ins_pipe( pipe_slow );
5255 %}
5256
5257 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
5258 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5259 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5260 Matcher::vector_length(n->in(2)) == 2);
5261 match(Set dst (MinReductionV dst src));
5262 match(Set dst (MaxReductionV dst src));
5263 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5264 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5265 ins_encode %{
5266 assert(UseAVX > 0, "sanity");
5267
5268 int opcode = this->ideal_Opcode();
5269 int vlen = Matcher::vector_length(this, $src);
5270 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5271 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5272 %}
5273 ins_pipe( pipe_slow );
5274 %}
5275
5276
5277 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
5278 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5279 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5280 Matcher::vector_length(n->in(2)) >= 4);
5281 match(Set dst (MinReductionV dst src));
5282 match(Set dst (MaxReductionV dst src));
5283 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5284 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5285 ins_encode %{
5286 assert(UseAVX > 0, "sanity");
5287
5288 int opcode = this->ideal_Opcode();
5289 int vlen = Matcher::vector_length(this, $src);
5290 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5291 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5292 %}
5293 ins_pipe( pipe_slow );
5294 %}
5295
5296 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
5297 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5298 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5299 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5300 Matcher::vector_length(n->in(2)) == 2);
5301 match(Set dst (MinReductionV src1 src2));
5302 match(Set dst (MaxReductionV src1 src2));
5303 effect(TEMP dst, TEMP xtmp1);
5304 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
5305 ins_encode %{
5306 int opcode = this->ideal_Opcode();
5307 int vlen = Matcher::vector_length(this, $src2);
5308 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5309 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
5310 %}
5311 ins_pipe( pipe_slow );
5312 %}
5313
5314 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
5315 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5316 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5317 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5318 Matcher::vector_length(n->in(2)) >= 4);
5319 match(Set dst (MinReductionV src1 src2));
5320 match(Set dst (MaxReductionV src1 src2));
5321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5322 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
5323 ins_encode %{
5324 int opcode = this->ideal_Opcode();
5325 int vlen = Matcher::vector_length(this, $src2);
5326 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
5327 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5328 %}
5329 ins_pipe( pipe_slow );
5330 %}
5331
5332 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
5333 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5334 Matcher::vector_length(n->in(2)) == 2);
5335 match(Set dst (MinReductionV dst src));
5336 match(Set dst (MaxReductionV dst src));
5337 effect(TEMP dst, TEMP xtmp1);
5338 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
5339 ins_encode %{
5340 int opcode = this->ideal_Opcode();
5341 int vlen = Matcher::vector_length(this, $src);
5342 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
5343 $xtmp1$$XMMRegister);
5344 %}
5345 ins_pipe( pipe_slow );
5346 %}
5347
5348 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
5349 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5350 Matcher::vector_length(n->in(2)) >= 4);
5351 match(Set dst (MinReductionV dst src));
5352 match(Set dst (MaxReductionV dst src));
5353 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5354 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
5355 ins_encode %{
5356 int opcode = this->ideal_Opcode();
5357 int vlen = Matcher::vector_length(this, $src);
5358 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
5359 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5360 %}
5361 ins_pipe( pipe_slow );
5362 %}
5363
5364 //--------------------Min Double Reduction --------------------
5365 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
5366 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
5367 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5368 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5369 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5370 Matcher::vector_length(n->in(2)) == 2);
5371 match(Set dst (MinReductionV src1 src2));
5372 match(Set dst (MaxReductionV src1 src2));
5373 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5374 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5375 ins_encode %{
5376 assert(UseAVX > 0, "sanity");
5377
5378 int opcode = this->ideal_Opcode();
5379 int vlen = Matcher::vector_length(this, $src2);
5380 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5381 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5382 %}
5383 ins_pipe( pipe_slow );
5384 %}
5385
5386 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
5387 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
5388 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5389 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5390 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5391 Matcher::vector_length(n->in(2)) >= 4);
5392 match(Set dst (MinReductionV src1 src2));
5393 match(Set dst (MaxReductionV src1 src2));
5394 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5395 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5396 ins_encode %{
5397 assert(UseAVX > 0, "sanity");
5398
5399 int opcode = this->ideal_Opcode();
5400 int vlen = Matcher::vector_length(this, $src2);
5401 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5402 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5403 %}
5404 ins_pipe( pipe_slow );
5405 %}
5406
5407
5408 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
5409 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
5410 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5411 Matcher::vector_length(n->in(2)) == 2);
5412 match(Set dst (MinReductionV dst src));
5413 match(Set dst (MaxReductionV dst src));
5414 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5415 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5416 ins_encode %{
5417 assert(UseAVX > 0, "sanity");
5418
5419 int opcode = this->ideal_Opcode();
5420 int vlen = Matcher::vector_length(this, $src);
5421 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5422 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5423 %}
5424 ins_pipe( pipe_slow );
5425 %}
5426
5427 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
5428 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
5429 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5430 Matcher::vector_length(n->in(2)) >= 4);
5431 match(Set dst (MinReductionV dst src));
5432 match(Set dst (MaxReductionV dst src));
5433 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5434 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5435 ins_encode %{
5436 assert(UseAVX > 0, "sanity");
5437
5438 int opcode = this->ideal_Opcode();
5439 int vlen = Matcher::vector_length(this, $src);
5440 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5441 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5442 %}
5443 ins_pipe( pipe_slow );
5444 %}
5445
5446 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
5447 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5448 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5449 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5450 Matcher::vector_length(n->in(2)) == 2);
5451 match(Set dst (MinReductionV src1 src2));
5452 match(Set dst (MaxReductionV src1 src2));
5453 effect(TEMP dst, TEMP xtmp1);
5454 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
5455 ins_encode %{
5456 int opcode = this->ideal_Opcode();
5457 int vlen = Matcher::vector_length(this, $src2);
5458 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
5459 xnoreg, xnoreg, $xtmp1$$XMMRegister);
5460 %}
5461 ins_pipe( pipe_slow );
5462 %}
5463
5464 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
5465 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5466 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5467 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5468 Matcher::vector_length(n->in(2)) >= 4);
5469 match(Set dst (MinReductionV src1 src2));
5470 match(Set dst (MaxReductionV src1 src2));
5471 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5472 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
5473 ins_encode %{
5474 int opcode = this->ideal_Opcode();
5475 int vlen = Matcher::vector_length(this, $src2);
5476 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
5477 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5478 %}
5479 ins_pipe( pipe_slow );
5480 %}
5481
5482
5483 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
5484 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5485 Matcher::vector_length(n->in(2)) == 2);
5486 match(Set dst (MinReductionV dst src));
5487 match(Set dst (MaxReductionV dst src));
5488 effect(TEMP dst, TEMP xtmp1);
5489 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
5490 ins_encode %{
5491 int opcode = this->ideal_Opcode();
5492 int vlen = Matcher::vector_length(this, $src);
5493 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5494 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
5495 %}
5496 ins_pipe( pipe_slow );
5497 %}
5498
5499 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
5500 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5501 Matcher::vector_length(n->in(2)) >= 4);
5502 match(Set dst (MinReductionV dst src));
5503 match(Set dst (MaxReductionV dst src));
5504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5505 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
5506 ins_encode %{
5507 int opcode = this->ideal_Opcode();
5508 int vlen = Matcher::vector_length(this, $src);
5509 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5510 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5511 %}
5512 ins_pipe( pipe_slow );
5513 %}
5514
5515 // ====================VECTOR ARITHMETIC=======================================
5516
5517 // --------------------------------- ADD --------------------------------------
5518
5519 // Bytes vector add
5520 instruct vaddB(vec dst, vec src) %{
5521 predicate(UseAVX == 0);
5522 match(Set dst (AddVB dst src));
5523 format %{ "paddb $dst,$src\t! add packedB" %}
5524 ins_encode %{
5525 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5526 %}
5527 ins_pipe( pipe_slow );
5528 %}
5529
5530 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
5531 predicate(UseAVX > 0);
5532 match(Set dst (AddVB src1 src2));
5533 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
5534 ins_encode %{
5535 int vlen_enc = vector_length_encoding(this);
5536 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5537 %}
5538 ins_pipe( pipe_slow );
5539 %}
5540
5541 instruct vaddB_mem(vec dst, vec src, memory mem) %{
5542 predicate((UseAVX > 0) &&
5543 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5544 match(Set dst (AddVB src (LoadVector mem)));
5545 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
5546 ins_encode %{
5547 int vlen_enc = vector_length_encoding(this);
5548 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5549 %}
5550 ins_pipe( pipe_slow );
5551 %}
5552
5553 // Shorts/Chars vector add
5554 instruct vaddS(vec dst, vec src) %{
5555 predicate(UseAVX == 0);
5556 match(Set dst (AddVS dst src));
5557 format %{ "paddw $dst,$src\t! add packedS" %}
5558 ins_encode %{
5559 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5560 %}
5561 ins_pipe( pipe_slow );
5562 %}
5563
5564 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
5565 predicate(UseAVX > 0);
5566 match(Set dst (AddVS src1 src2));
5567 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
5568 ins_encode %{
5569 int vlen_enc = vector_length_encoding(this);
5570 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5571 %}
5572 ins_pipe( pipe_slow );
5573 %}
5574
5575 instruct vaddS_mem(vec dst, vec src, memory mem) %{
5576 predicate((UseAVX > 0) &&
5577 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5578 match(Set dst (AddVS src (LoadVector mem)));
5579 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
5580 ins_encode %{
5581 int vlen_enc = vector_length_encoding(this);
5582 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5583 %}
5584 ins_pipe( pipe_slow );
5585 %}
5586
5587 // Integers vector add
5588 instruct vaddI(vec dst, vec src) %{
5589 predicate(UseAVX == 0);
5590 match(Set dst (AddVI dst src));
5591 format %{ "paddd $dst,$src\t! add packedI" %}
5592 ins_encode %{
5593 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5594 %}
5595 ins_pipe( pipe_slow );
5596 %}
5597
5598 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
5599 predicate(UseAVX > 0);
5600 match(Set dst (AddVI src1 src2));
5601 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
5602 ins_encode %{
5603 int vlen_enc = vector_length_encoding(this);
5604 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5605 %}
5606 ins_pipe( pipe_slow );
5607 %}
5608
5609
5610 instruct vaddI_mem(vec dst, vec src, memory mem) %{
5611 predicate((UseAVX > 0) &&
5612 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5613 match(Set dst (AddVI src (LoadVector mem)));
5614 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
5615 ins_encode %{
5616 int vlen_enc = vector_length_encoding(this);
5617 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5618 %}
5619 ins_pipe( pipe_slow );
5620 %}
5621
5622 // Longs vector add
5623 instruct vaddL(vec dst, vec src) %{
5624 predicate(UseAVX == 0);
5625 match(Set dst (AddVL dst src));
5626 format %{ "paddq $dst,$src\t! add packedL" %}
5627 ins_encode %{
5628 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5629 %}
5630 ins_pipe( pipe_slow );
5631 %}
5632
5633 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
5634 predicate(UseAVX > 0);
5635 match(Set dst (AddVL src1 src2));
5636 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
5637 ins_encode %{
5638 int vlen_enc = vector_length_encoding(this);
5639 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5640 %}
5641 ins_pipe( pipe_slow );
5642 %}
5643
5644 instruct vaddL_mem(vec dst, vec src, memory mem) %{
5645 predicate((UseAVX > 0) &&
5646 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5647 match(Set dst (AddVL src (LoadVector mem)));
5648 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
5649 ins_encode %{
5650 int vlen_enc = vector_length_encoding(this);
5651 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5652 %}
5653 ins_pipe( pipe_slow );
5654 %}
5655
5656 // Floats vector add
5657 instruct vaddF(vec dst, vec src) %{
5658 predicate(UseAVX == 0);
5659 match(Set dst (AddVF dst src));
5660 format %{ "addps $dst,$src\t! add packedF" %}
5661 ins_encode %{
5662 __ addps($dst$$XMMRegister, $src$$XMMRegister);
5663 %}
5664 ins_pipe( pipe_slow );
5665 %}
5666
5667 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
5668 predicate(UseAVX > 0);
5669 match(Set dst (AddVF src1 src2));
5670 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
5671 ins_encode %{
5672 int vlen_enc = vector_length_encoding(this);
5673 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5674 %}
5675 ins_pipe( pipe_slow );
5676 %}
5677
5678 instruct vaddF_mem(vec dst, vec src, memory mem) %{
5679 predicate((UseAVX > 0) &&
5680 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5681 match(Set dst (AddVF src (LoadVector mem)));
5682 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
5683 ins_encode %{
5684 int vlen_enc = vector_length_encoding(this);
5685 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5686 %}
5687 ins_pipe( pipe_slow );
5688 %}
5689
5690 // Doubles vector add
5691 instruct vaddD(vec dst, vec src) %{
5692 predicate(UseAVX == 0);
5693 match(Set dst (AddVD dst src));
5694 format %{ "addpd $dst,$src\t! add packedD" %}
5695 ins_encode %{
5696 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5697 %}
5698 ins_pipe( pipe_slow );
5699 %}
5700
5701 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
5702 predicate(UseAVX > 0);
5703 match(Set dst (AddVD src1 src2));
5704 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
5705 ins_encode %{
5706 int vlen_enc = vector_length_encoding(this);
5707 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5708 %}
5709 ins_pipe( pipe_slow );
5710 %}
5711
5712 instruct vaddD_mem(vec dst, vec src, memory mem) %{
5713 predicate((UseAVX > 0) &&
5714 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5715 match(Set dst (AddVD src (LoadVector mem)));
5716 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
5717 ins_encode %{
5718 int vlen_enc = vector_length_encoding(this);
5719 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5720 %}
5721 ins_pipe( pipe_slow );
5722 %}
5723
5724 // --------------------------------- SUB --------------------------------------
5725
5726 // Bytes vector sub
5727 instruct vsubB(vec dst, vec src) %{
5728 predicate(UseAVX == 0);
5729 match(Set dst (SubVB dst src));
5730 format %{ "psubb $dst,$src\t! sub packedB" %}
5731 ins_encode %{
5732 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5733 %}
5734 ins_pipe( pipe_slow );
5735 %}
5736
5737 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
5738 predicate(UseAVX > 0);
5739 match(Set dst (SubVB src1 src2));
5740 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
5741 ins_encode %{
5742 int vlen_enc = vector_length_encoding(this);
5743 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5744 %}
5745 ins_pipe( pipe_slow );
5746 %}
5747
5748 instruct vsubB_mem(vec dst, vec src, memory mem) %{
5749 predicate((UseAVX > 0) &&
5750 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5751 match(Set dst (SubVB src (LoadVector mem)));
5752 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
5753 ins_encode %{
5754 int vlen_enc = vector_length_encoding(this);
5755 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5756 %}
5757 ins_pipe( pipe_slow );
5758 %}
5759
5760 // Shorts/Chars vector sub
5761 instruct vsubS(vec dst, vec src) %{
5762 predicate(UseAVX == 0);
5763 match(Set dst (SubVS dst src));
5764 format %{ "psubw $dst,$src\t! sub packedS" %}
5765 ins_encode %{
5766 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5767 %}
5768 ins_pipe( pipe_slow );
5769 %}
5770
5771
5772 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
5773 predicate(UseAVX > 0);
5774 match(Set dst (SubVS src1 src2));
5775 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
5776 ins_encode %{
5777 int vlen_enc = vector_length_encoding(this);
5778 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5779 %}
5780 ins_pipe( pipe_slow );
5781 %}
5782
5783 instruct vsubS_mem(vec dst, vec src, memory mem) %{
5784 predicate((UseAVX > 0) &&
5785 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5786 match(Set dst (SubVS src (LoadVector mem)));
5787 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
5788 ins_encode %{
5789 int vlen_enc = vector_length_encoding(this);
5790 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5791 %}
5792 ins_pipe( pipe_slow );
5793 %}
5794
5795 // Integers vector sub
5796 instruct vsubI(vec dst, vec src) %{
5797 predicate(UseAVX == 0);
5798 match(Set dst (SubVI dst src));
5799 format %{ "psubd $dst,$src\t! sub packedI" %}
5800 ins_encode %{
5801 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5802 %}
5803 ins_pipe( pipe_slow );
5804 %}
5805
5806 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
5807 predicate(UseAVX > 0);
5808 match(Set dst (SubVI src1 src2));
5809 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
5810 ins_encode %{
5811 int vlen_enc = vector_length_encoding(this);
5812 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5813 %}
5814 ins_pipe( pipe_slow );
5815 %}
5816
5817 instruct vsubI_mem(vec dst, vec src, memory mem) %{
5818 predicate((UseAVX > 0) &&
5819 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5820 match(Set dst (SubVI src (LoadVector mem)));
5821 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
5822 ins_encode %{
5823 int vlen_enc = vector_length_encoding(this);
5824 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5825 %}
5826 ins_pipe( pipe_slow );
5827 %}
5828
5829 // Longs vector sub
5830 instruct vsubL(vec dst, vec src) %{
5831 predicate(UseAVX == 0);
5832 match(Set dst (SubVL dst src));
5833 format %{ "psubq $dst,$src\t! sub packedL" %}
5834 ins_encode %{
5835 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
5836 %}
5837 ins_pipe( pipe_slow );
5838 %}
5839
5840 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
5841 predicate(UseAVX > 0);
5842 match(Set dst (SubVL src1 src2));
5843 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
5844 ins_encode %{
5845 int vlen_enc = vector_length_encoding(this);
5846 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5847 %}
5848 ins_pipe( pipe_slow );
5849 %}
5850
5851
5852 instruct vsubL_mem(vec dst, vec src, memory mem) %{
5853 predicate((UseAVX > 0) &&
5854 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5855 match(Set dst (SubVL src (LoadVector mem)));
5856 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
5857 ins_encode %{
5858 int vlen_enc = vector_length_encoding(this);
5859 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5860 %}
5861 ins_pipe( pipe_slow );
5862 %}
5863
5864 // Floats vector sub
5865 instruct vsubF(vec dst, vec src) %{
5866 predicate(UseAVX == 0);
5867 match(Set dst (SubVF dst src));
5868 format %{ "subps $dst,$src\t! sub packedF" %}
5869 ins_encode %{
5870 __ subps($dst$$XMMRegister, $src$$XMMRegister);
5871 %}
5872 ins_pipe( pipe_slow );
5873 %}
5874
5875 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
5876 predicate(UseAVX > 0);
5877 match(Set dst (SubVF src1 src2));
5878 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
5879 ins_encode %{
5880 int vlen_enc = vector_length_encoding(this);
5881 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5882 %}
5883 ins_pipe( pipe_slow );
5884 %}
5885
5886 instruct vsubF_mem(vec dst, vec src, memory mem) %{
5887 predicate((UseAVX > 0) &&
5888 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5889 match(Set dst (SubVF src (LoadVector mem)));
5890 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
5891 ins_encode %{
5892 int vlen_enc = vector_length_encoding(this);
5893 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5894 %}
5895 ins_pipe( pipe_slow );
5896 %}
5897
5898 // Doubles vector sub
5899 instruct vsubD(vec dst, vec src) %{
5900 predicate(UseAVX == 0);
5901 match(Set dst (SubVD dst src));
5902 format %{ "subpd $dst,$src\t! sub packedD" %}
5903 ins_encode %{
5904 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
5905 %}
5906 ins_pipe( pipe_slow );
5907 %}
5908
5909 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
5910 predicate(UseAVX > 0);
5911 match(Set dst (SubVD src1 src2));
5912 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
5913 ins_encode %{
5914 int vlen_enc = vector_length_encoding(this);
5915 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5916 %}
5917 ins_pipe( pipe_slow );
5918 %}
5919
5920 instruct vsubD_mem(vec dst, vec src, memory mem) %{
5921 predicate((UseAVX > 0) &&
5922 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5923 match(Set dst (SubVD src (LoadVector mem)));
5924 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
5925 ins_encode %{
5926 int vlen_enc = vector_length_encoding(this);
5927 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5928 %}
5929 ins_pipe( pipe_slow );
5930 %}
5931
5932 // --------------------------------- MUL --------------------------------------
5933
5934 // Byte vector mul
5935 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
5936 predicate(Matcher::vector_length_in_bytes(n) <= 8);
5937 match(Set dst (MulVB src1 src2));
5938 effect(TEMP dst, TEMP xtmp);
5939 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5940 ins_encode %{
5941 assert(UseSSE > 3, "required");
5942 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
5943 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
5944 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5945 __ psllw($dst$$XMMRegister, 8);
5946 __ psrlw($dst$$XMMRegister, 8);
5947 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
5948 %}
5949 ins_pipe( pipe_slow );
5950 %}
5951
5952 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
5953 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
5954 match(Set dst (MulVB src1 src2));
5955 effect(TEMP dst, TEMP xtmp);
5956 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5957 ins_encode %{
5958 assert(UseSSE > 3, "required");
5959 // Odd-index elements
5960 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
5961 __ psrlw($dst$$XMMRegister, 8);
5962 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
5963 __ psrlw($xtmp$$XMMRegister, 8);
5964 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5965 __ psllw($dst$$XMMRegister, 8);
5966 // Even-index elements
5967 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
5968 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
5969 __ psllw($xtmp$$XMMRegister, 8);
5970 __ psrlw($xtmp$$XMMRegister, 8);
5971 // Combine
5972 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
5973 %}
5974 ins_pipe( pipe_slow );
5975 %}
5976
5977 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
5978 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
5979 match(Set dst (MulVB src1 src2));
5980 effect(TEMP xtmp1, TEMP xtmp2);
5981 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
5982 ins_encode %{
5983 int vlen_enc = vector_length_encoding(this);
5984 // Odd-index elements
5985 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
5986 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
5987 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5988 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
5989 // Even-index elements
5990 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5991 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5992 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5993 // Combine
5994 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5995 %}
5996 ins_pipe( pipe_slow );
5997 %}
5998
5999 // Shorts/Chars vector mul
6000 instruct vmulS(vec dst, vec src) %{
6001 predicate(UseAVX == 0);
6002 match(Set dst (MulVS dst src));
6003 format %{ "pmullw $dst,$src\t! mul packedS" %}
6004 ins_encode %{
6005 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6006 %}
6007 ins_pipe( pipe_slow );
6008 %}
6009
6010 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
6011 predicate(UseAVX > 0);
6012 match(Set dst (MulVS src1 src2));
6013 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
6014 ins_encode %{
6015 int vlen_enc = vector_length_encoding(this);
6016 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6017 %}
6018 ins_pipe( pipe_slow );
6019 %}
6020
6021 instruct vmulS_mem(vec dst, vec src, memory mem) %{
6022 predicate((UseAVX > 0) &&
6023 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6024 match(Set dst (MulVS src (LoadVector mem)));
6025 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
6026 ins_encode %{
6027 int vlen_enc = vector_length_encoding(this);
6028 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6029 %}
6030 ins_pipe( pipe_slow );
6031 %}
6032
6033 // Integers vector mul
6034 instruct vmulI(vec dst, vec src) %{
6035 predicate(UseAVX == 0);
6036 match(Set dst (MulVI dst src));
6037 format %{ "pmulld $dst,$src\t! mul packedI" %}
6038 ins_encode %{
6039 assert(UseSSE > 3, "required");
6040 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6041 %}
6042 ins_pipe( pipe_slow );
6043 %}
6044
6045 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
6046 predicate(UseAVX > 0);
6047 match(Set dst (MulVI src1 src2));
6048 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
6049 ins_encode %{
6050 int vlen_enc = vector_length_encoding(this);
6051 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6052 %}
6053 ins_pipe( pipe_slow );
6054 %}
6055
6056 instruct vmulI_mem(vec dst, vec src, memory mem) %{
6057 predicate((UseAVX > 0) &&
6058 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6059 match(Set dst (MulVI src (LoadVector mem)));
6060 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
6061 ins_encode %{
6062 int vlen_enc = vector_length_encoding(this);
6063 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6064 %}
6065 ins_pipe( pipe_slow );
6066 %}
6067
6068 // Longs vector mul
6069 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
6070 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6071 VM_Version::supports_avx512dq()) ||
6072 VM_Version::supports_avx512vldq());
6073 match(Set dst (MulVL src1 src2));
6074 ins_cost(500);
6075 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
6076 ins_encode %{
6077 assert(UseAVX > 2, "required");
6078 int vlen_enc = vector_length_encoding(this);
6079 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6080 %}
6081 ins_pipe( pipe_slow );
6082 %}
6083
6084 instruct evmulL_mem(vec dst, vec src, memory mem) %{
6085 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6086 VM_Version::supports_avx512dq()) ||
6087 (Matcher::vector_length_in_bytes(n) > 8 &&
6088 VM_Version::supports_avx512vldq()));
6089 match(Set dst (MulVL src (LoadVector mem)));
6090 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
6091 ins_cost(500);
6092 ins_encode %{
6093 assert(UseAVX > 2, "required");
6094 int vlen_enc = vector_length_encoding(this);
6095 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6096 %}
6097 ins_pipe( pipe_slow );
6098 %}
6099
6100 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
6101 predicate(UseAVX == 0);
6102 match(Set dst (MulVL src1 src2));
6103 ins_cost(500);
6104 effect(TEMP dst, TEMP xtmp);
6105 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
6106 ins_encode %{
6107 assert(VM_Version::supports_sse4_1(), "required");
6108 // Get the lo-hi products, only the lower 32 bits is in concerns
6109 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
6110 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
6111 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
6112 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
6113 __ psllq($dst$$XMMRegister, 32);
6114 // Get the lo-lo products
6115 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
6116 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
6117 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
6118 %}
6119 ins_pipe( pipe_slow );
6120 %}
6121
6122 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
6123 predicate(UseAVX > 0 &&
6124 ((Matcher::vector_length_in_bytes(n) == 64 &&
6125 !VM_Version::supports_avx512dq()) ||
6126 (Matcher::vector_length_in_bytes(n) < 64 &&
6127 !VM_Version::supports_avx512vldq())));
6128 match(Set dst (MulVL src1 src2));
6129 effect(TEMP xtmp1, TEMP xtmp2);
6130 ins_cost(500);
6131 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
6132 ins_encode %{
6133 int vlen_enc = vector_length_encoding(this);
6134 // Get the lo-hi products, only the lower 32 bits is in concerns
6135 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
6136 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6137 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
6138 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6139 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
6140 // Get the lo-lo products
6141 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6142 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
6143 %}
6144 ins_pipe( pipe_slow );
6145 %}
6146
6147 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
6148 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
6149 match(Set dst (MulVL src1 src2));
6150 ins_cost(100);
6151 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
6152 ins_encode %{
6153 int vlen_enc = vector_length_encoding(this);
6154 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6155 %}
6156 ins_pipe( pipe_slow );
6157 %}
6158
6159 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
6160 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
6161 match(Set dst (MulVL src1 src2));
6162 ins_cost(100);
6163 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
6164 ins_encode %{
6165 int vlen_enc = vector_length_encoding(this);
6166 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6167 %}
6168 ins_pipe( pipe_slow );
6169 %}
6170
6171 // Floats vector mul
6172 instruct vmulF(vec dst, vec src) %{
6173 predicate(UseAVX == 0);
6174 match(Set dst (MulVF dst src));
6175 format %{ "mulps $dst,$src\t! mul packedF" %}
6176 ins_encode %{
6177 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6178 %}
6179 ins_pipe( pipe_slow );
6180 %}
6181
6182 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
6183 predicate(UseAVX > 0);
6184 match(Set dst (MulVF src1 src2));
6185 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
6186 ins_encode %{
6187 int vlen_enc = vector_length_encoding(this);
6188 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6189 %}
6190 ins_pipe( pipe_slow );
6191 %}
6192
6193 instruct vmulF_mem(vec dst, vec src, memory mem) %{
6194 predicate((UseAVX > 0) &&
6195 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6196 match(Set dst (MulVF src (LoadVector mem)));
6197 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
6198 ins_encode %{
6199 int vlen_enc = vector_length_encoding(this);
6200 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6201 %}
6202 ins_pipe( pipe_slow );
6203 %}
6204
6205 // Doubles vector mul
6206 instruct vmulD(vec dst, vec src) %{
6207 predicate(UseAVX == 0);
6208 match(Set dst (MulVD dst src));
6209 format %{ "mulpd $dst,$src\t! mul packedD" %}
6210 ins_encode %{
6211 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6212 %}
6213 ins_pipe( pipe_slow );
6214 %}
6215
6216 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
6217 predicate(UseAVX > 0);
6218 match(Set dst (MulVD src1 src2));
6219 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
6220 ins_encode %{
6221 int vlen_enc = vector_length_encoding(this);
6222 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6223 %}
6224 ins_pipe( pipe_slow );
6225 %}
6226
6227 instruct vmulD_mem(vec dst, vec src, memory mem) %{
6228 predicate((UseAVX > 0) &&
6229 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6230 match(Set dst (MulVD src (LoadVector mem)));
6231 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
6232 ins_encode %{
6233 int vlen_enc = vector_length_encoding(this);
6234 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6235 %}
6236 ins_pipe( pipe_slow );
6237 %}
6238
6239 // --------------------------------- DIV --------------------------------------
6240
6241 // Floats vector div
6242 instruct vdivF(vec dst, vec src) %{
6243 predicate(UseAVX == 0);
6244 match(Set dst (DivVF dst src));
6245 format %{ "divps $dst,$src\t! div packedF" %}
6246 ins_encode %{
6247 __ divps($dst$$XMMRegister, $src$$XMMRegister);
6248 %}
6249 ins_pipe( pipe_slow );
6250 %}
6251
6252 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
6253 predicate(UseAVX > 0);
6254 match(Set dst (DivVF src1 src2));
6255 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
6256 ins_encode %{
6257 int vlen_enc = vector_length_encoding(this);
6258 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6259 %}
6260 ins_pipe( pipe_slow );
6261 %}
6262
6263 instruct vdivF_mem(vec dst, vec src, memory mem) %{
6264 predicate((UseAVX > 0) &&
6265 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6266 match(Set dst (DivVF src (LoadVector mem)));
6267 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
6268 ins_encode %{
6269 int vlen_enc = vector_length_encoding(this);
6270 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6271 %}
6272 ins_pipe( pipe_slow );
6273 %}
6274
6275 // Doubles vector div
6276 instruct vdivD(vec dst, vec src) %{
6277 predicate(UseAVX == 0);
6278 match(Set dst (DivVD dst src));
6279 format %{ "divpd $dst,$src\t! div packedD" %}
6280 ins_encode %{
6281 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
6282 %}
6283 ins_pipe( pipe_slow );
6284 %}
6285
6286 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
6287 predicate(UseAVX > 0);
6288 match(Set dst (DivVD src1 src2));
6289 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
6290 ins_encode %{
6291 int vlen_enc = vector_length_encoding(this);
6292 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6293 %}
6294 ins_pipe( pipe_slow );
6295 %}
6296
6297 instruct vdivD_mem(vec dst, vec src, memory mem) %{
6298 predicate((UseAVX > 0) &&
6299 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6300 match(Set dst (DivVD src (LoadVector mem)));
6301 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
6302 ins_encode %{
6303 int vlen_enc = vector_length_encoding(this);
6304 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6305 %}
6306 ins_pipe( pipe_slow );
6307 %}
6308
6309 // ------------------------------ MinMax ---------------------------------------
6310
6311 // Byte, Short, Int vector Min/Max
6312 instruct minmax_reg_sse(vec dst, vec src) %{
6313 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6314 UseAVX == 0);
6315 match(Set dst (MinV dst src));
6316 match(Set dst (MaxV dst src));
6317 format %{ "vector_minmax $dst,$src\t! " %}
6318 ins_encode %{
6319 assert(UseSSE >= 4, "required");
6320
6321 int opcode = this->ideal_Opcode();
6322 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6323 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
6324 %}
6325 ins_pipe( pipe_slow );
6326 %}
6327
6328 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
6329 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6330 UseAVX > 0);
6331 match(Set dst (MinV src1 src2));
6332 match(Set dst (MaxV src1 src2));
6333 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
6334 ins_encode %{
6335 int opcode = this->ideal_Opcode();
6336 int vlen_enc = vector_length_encoding(this);
6337 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6338
6339 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6340 %}
6341 ins_pipe( pipe_slow );
6342 %}
6343
6344 // Long vector Min/Max
6345 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
6346 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
6347 UseAVX == 0);
6348 match(Set dst (MinV dst src));
6349 match(Set dst (MaxV src dst));
6350 effect(TEMP dst, TEMP tmp);
6351 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
6352 ins_encode %{
6353 assert(UseSSE >= 4, "required");
6354
6355 int opcode = this->ideal_Opcode();
6356 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6357 assert(elem_bt == T_LONG, "sanity");
6358
6359 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
6360 %}
6361 ins_pipe( pipe_slow );
6362 %}
6363
6364 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
6365 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
6366 UseAVX > 0 && !VM_Version::supports_avx512vl());
6367 match(Set dst (MinV src1 src2));
6368 match(Set dst (MaxV src1 src2));
6369 effect(TEMP dst);
6370 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
6371 ins_encode %{
6372 int vlen_enc = vector_length_encoding(this);
6373 int opcode = this->ideal_Opcode();
6374 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6375 assert(elem_bt == T_LONG, "sanity");
6376
6377 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6378 %}
6379 ins_pipe( pipe_slow );
6380 %}
6381
6382 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
6383 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
6384 Matcher::vector_element_basic_type(n) == T_LONG);
6385 match(Set dst (MinV src1 src2));
6386 match(Set dst (MaxV src1 src2));
6387 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
6388 ins_encode %{
6389 assert(UseAVX > 2, "required");
6390
6391 int vlen_enc = vector_length_encoding(this);
6392 int opcode = this->ideal_Opcode();
6393 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6394 assert(elem_bt == T_LONG, "sanity");
6395
6396 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6397 %}
6398 ins_pipe( pipe_slow );
6399 %}
6400
6401 // Float/Double vector Min/Max
6402 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
6403 predicate(VM_Version::supports_avx10_2() &&
6404 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
6405 match(Set dst (MinV a b));
6406 match(Set dst (MaxV a b));
6407 format %{ "vector_minmaxFP $dst, $a, $b" %}
6408 ins_encode %{
6409 int vlen_enc = vector_length_encoding(this);
6410 int opcode = this->ideal_Opcode();
6411 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6412 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
6413 %}
6414 ins_pipe( pipe_slow );
6415 %}
6416
6417 // Float/Double vector Min/Max
6418 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
6419 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
6420 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
6421 UseAVX > 0);
6422 match(Set dst (MinV a b));
6423 match(Set dst (MaxV a b));
6424 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
6425 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
6426 ins_encode %{
6427 assert(UseAVX > 0, "required");
6428
6429 int opcode = this->ideal_Opcode();
6430 int vlen_enc = vector_length_encoding(this);
6431 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6432
6433 __ vminmax_fp(opcode, elem_bt,
6434 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6435 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6436 %}
6437 ins_pipe( pipe_slow );
6438 %}
6439
6440 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
6441 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
6442 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
6443 match(Set dst (MinV a b));
6444 match(Set dst (MaxV a b));
6445 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
6446 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
6447 ins_encode %{
6448 assert(UseAVX > 2, "required");
6449
6450 int opcode = this->ideal_Opcode();
6451 int vlen_enc = vector_length_encoding(this);
6452 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6453
6454 __ evminmax_fp(opcode, elem_bt,
6455 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6456 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6457 %}
6458 ins_pipe( pipe_slow );
6459 %}
6460
6461 // ------------------------------ Unsigned vector Min/Max ----------------------
6462
6463 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
6464 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
6465 match(Set dst (UMinV a b));
6466 match(Set dst (UMaxV a b));
6467 format %{ "vector_uminmax $dst,$a,$b\t!" %}
6468 ins_encode %{
6469 int opcode = this->ideal_Opcode();
6470 int vlen_enc = vector_length_encoding(this);
6471 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6472 assert(is_integral_type(elem_bt), "");
6473 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
6474 %}
6475 ins_pipe( pipe_slow );
6476 %}
6477
6478 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
6479 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
6480 match(Set dst (UMinV a (LoadVector b)));
6481 match(Set dst (UMaxV a (LoadVector b)));
6482 format %{ "vector_uminmax $dst,$a,$b\t!" %}
6483 ins_encode %{
6484 int opcode = this->ideal_Opcode();
6485 int vlen_enc = vector_length_encoding(this);
6486 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6487 assert(is_integral_type(elem_bt), "");
6488 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
6489 %}
6490 ins_pipe( pipe_slow );
6491 %}
6492
6493 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
6494 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
6495 match(Set dst (UMinV a b));
6496 match(Set dst (UMaxV a b));
6497 effect(TEMP xtmp1, TEMP xtmp2);
6498 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
6499 ins_encode %{
6500 int opcode = this->ideal_Opcode();
6501 int vlen_enc = vector_length_encoding(this);
6502 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
6503 %}
6504 ins_pipe( pipe_slow );
6505 %}
6506
6507 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
6508 match(Set dst (UMinV (Binary dst src2) mask));
6509 match(Set dst (UMaxV (Binary dst src2) mask));
6510 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
6511 ins_encode %{
6512 int vlen_enc = vector_length_encoding(this);
6513 BasicType bt = Matcher::vector_element_basic_type(this);
6514 int opc = this->ideal_Opcode();
6515 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
6516 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
6517 %}
6518 ins_pipe( pipe_slow );
6519 %}
6520
6521 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
6522 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
6523 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
6524 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
6525 ins_encode %{
6526 int vlen_enc = vector_length_encoding(this);
6527 BasicType bt = Matcher::vector_element_basic_type(this);
6528 int opc = this->ideal_Opcode();
6529 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
6530 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
6531 %}
6532 ins_pipe( pipe_slow );
6533 %}
6534
6535 // --------------------------------- Signum/CopySign ---------------------------
6536
6537 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
6538 match(Set dst (SignumF dst (Binary zero one)));
6539 effect(KILL cr);
6540 format %{ "signumF $dst, $dst" %}
6541 ins_encode %{
6542 int opcode = this->ideal_Opcode();
6543 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6544 %}
6545 ins_pipe( pipe_slow );
6546 %}
6547
6548 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
6549 match(Set dst (SignumD dst (Binary zero one)));
6550 effect(KILL cr);
6551 format %{ "signumD $dst, $dst" %}
6552 ins_encode %{
6553 int opcode = this->ideal_Opcode();
6554 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6555 %}
6556 ins_pipe( pipe_slow );
6557 %}
6558
6559 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
6560 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
6561 match(Set dst (SignumVF src (Binary zero one)));
6562 match(Set dst (SignumVD src (Binary zero one)));
6563 effect(TEMP dst, TEMP xtmp1);
6564 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
6565 ins_encode %{
6566 int opcode = this->ideal_Opcode();
6567 int vec_enc = vector_length_encoding(this);
6568 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6569 $xtmp1$$XMMRegister, vec_enc);
6570 %}
6571 ins_pipe( pipe_slow );
6572 %}
6573
6574 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
6575 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
6576 match(Set dst (SignumVF src (Binary zero one)));
6577 match(Set dst (SignumVD src (Binary zero one)));
6578 effect(TEMP dst, TEMP ktmp1);
6579 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
6580 ins_encode %{
6581 int opcode = this->ideal_Opcode();
6582 int vec_enc = vector_length_encoding(this);
6583 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6584 $ktmp1$$KRegister, vec_enc);
6585 %}
6586 ins_pipe( pipe_slow );
6587 %}
6588
6589 // ---------------------------------------
6590 // For copySign use 0xE4 as writemask for vpternlog
6591 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
6592 // C (xmm2) is set to 0x7FFFFFFF
6593 // Wherever xmm2 is 0, we want to pick from B (sign)
6594 // Wherever xmm2 is 1, we want to pick from A (src)
6595 //
6596 // A B C Result
6597 // 0 0 0 0
6598 // 0 0 1 0
6599 // 0 1 0 1
6600 // 0 1 1 0
6601 // 1 0 0 0
6602 // 1 0 1 1
6603 // 1 1 0 1
6604 // 1 1 1 1
6605 //
6606 // Result going from high bit to low bit is 0x11100100 = 0xe4
6607 // ---------------------------------------
6608
6609 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
6610 match(Set dst (CopySignF dst src));
6611 effect(TEMP tmp1, TEMP tmp2);
6612 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6613 ins_encode %{
6614 __ movl($tmp2$$Register, 0x7FFFFFFF);
6615 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
6616 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6617 %}
6618 ins_pipe( pipe_slow );
6619 %}
6620
6621 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
6622 match(Set dst (CopySignD dst (Binary src zero)));
6623 ins_cost(100);
6624 effect(TEMP tmp1, TEMP tmp2);
6625 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6626 ins_encode %{
6627 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
6628 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
6629 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6630 %}
6631 ins_pipe( pipe_slow );
6632 %}
6633
6634 //----------------------------- CompressBits/ExpandBits ------------------------
6635
6636 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6637 predicate(n->bottom_type()->isa_int());
6638 match(Set dst (CompressBits src mask));
6639 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6640 ins_encode %{
6641 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
6642 %}
6643 ins_pipe( pipe_slow );
6644 %}
6645
6646 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6647 predicate(n->bottom_type()->isa_int());
6648 match(Set dst (ExpandBits src mask));
6649 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6650 ins_encode %{
6651 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
6652 %}
6653 ins_pipe( pipe_slow );
6654 %}
6655
6656 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6657 predicate(n->bottom_type()->isa_int());
6658 match(Set dst (CompressBits src (LoadI mask)));
6659 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6660 ins_encode %{
6661 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
6662 %}
6663 ins_pipe( pipe_slow );
6664 %}
6665
6666 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6667 predicate(n->bottom_type()->isa_int());
6668 match(Set dst (ExpandBits src (LoadI mask)));
6669 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6670 ins_encode %{
6671 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
6672 %}
6673 ins_pipe( pipe_slow );
6674 %}
6675
6676 // --------------------------------- Sqrt --------------------------------------
6677
6678 instruct vsqrtF_reg(vec dst, vec src) %{
6679 match(Set dst (SqrtVF src));
6680 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
6681 ins_encode %{
6682 assert(UseAVX > 0, "required");
6683 int vlen_enc = vector_length_encoding(this);
6684 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6685 %}
6686 ins_pipe( pipe_slow );
6687 %}
6688
6689 instruct vsqrtF_mem(vec dst, memory mem) %{
6690 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6691 match(Set dst (SqrtVF (LoadVector mem)));
6692 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
6693 ins_encode %{
6694 assert(UseAVX > 0, "required");
6695 int vlen_enc = vector_length_encoding(this);
6696 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
6697 %}
6698 ins_pipe( pipe_slow );
6699 %}
6700
6701 // Floating point vector sqrt
6702 instruct vsqrtD_reg(vec dst, vec src) %{
6703 match(Set dst (SqrtVD src));
6704 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
6705 ins_encode %{
6706 assert(UseAVX > 0, "required");
6707 int vlen_enc = vector_length_encoding(this);
6708 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6709 %}
6710 ins_pipe( pipe_slow );
6711 %}
6712
6713 instruct vsqrtD_mem(vec dst, memory mem) %{
6714 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6715 match(Set dst (SqrtVD (LoadVector mem)));
6716 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
6717 ins_encode %{
6718 assert(UseAVX > 0, "required");
6719 int vlen_enc = vector_length_encoding(this);
6720 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
6721 %}
6722 ins_pipe( pipe_slow );
6723 %}
6724
6725 // ------------------------------ Shift ---------------------------------------
6726
6727 // Left and right shift count vectors are the same on x86
6728 // (only lowest bits of xmm reg are used for count).
6729 instruct vshiftcnt(vec dst, rRegI cnt) %{
6730 match(Set dst (LShiftCntV cnt));
6731 match(Set dst (RShiftCntV cnt));
6732 format %{ "movdl $dst,$cnt\t! load shift count" %}
6733 ins_encode %{
6734 __ movdl($dst$$XMMRegister, $cnt$$Register);
6735 %}
6736 ins_pipe( pipe_slow );
6737 %}
6738
6739 // Byte vector shift
6740 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
6741 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
6742 match(Set dst ( LShiftVB src shift));
6743 match(Set dst ( RShiftVB src shift));
6744 match(Set dst (URShiftVB src shift));
6745 effect(TEMP dst, USE src, USE shift, TEMP tmp);
6746 format %{"vector_byte_shift $dst,$src,$shift" %}
6747 ins_encode %{
6748 assert(UseSSE > 3, "required");
6749 int opcode = this->ideal_Opcode();
6750 bool sign = (opcode != Op_URShiftVB);
6751 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
6752 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
6753 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6754 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
6755 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
6756 %}
6757 ins_pipe( pipe_slow );
6758 %}
6759
6760 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6761 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6762 UseAVX <= 1);
6763 match(Set dst ( LShiftVB src shift));
6764 match(Set dst ( RShiftVB src shift));
6765 match(Set dst (URShiftVB src shift));
6766 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
6767 format %{"vector_byte_shift $dst,$src,$shift" %}
6768 ins_encode %{
6769 assert(UseSSE > 3, "required");
6770 int opcode = this->ideal_Opcode();
6771 bool sign = (opcode != Op_URShiftVB);
6772 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
6773 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
6774 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
6775 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
6776 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
6777 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6778 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
6779 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
6780 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
6781 %}
6782 ins_pipe( pipe_slow );
6783 %}
6784
6785 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
6786 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6787 UseAVX > 1);
6788 match(Set dst ( LShiftVB src shift));
6789 match(Set dst ( RShiftVB src shift));
6790 match(Set dst (URShiftVB src shift));
6791 effect(TEMP dst, TEMP tmp);
6792 format %{"vector_byte_shift $dst,$src,$shift" %}
6793 ins_encode %{
6794 int opcode = this->ideal_Opcode();
6795 bool sign = (opcode != Op_URShiftVB);
6796 int vlen_enc = Assembler::AVX_256bit;
6797 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
6798 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6799 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6800 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
6801 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
6802 %}
6803 ins_pipe( pipe_slow );
6804 %}
6805
6806 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
6807 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
6808 match(Set dst ( LShiftVB src shift));
6809 match(Set dst ( RShiftVB src shift));
6810 match(Set dst (URShiftVB src shift));
6811 effect(TEMP dst, TEMP tmp);
6812 format %{"vector_byte_shift $dst,$src,$shift" %}
6813 ins_encode %{
6814 assert(UseAVX > 1, "required");
6815 int opcode = this->ideal_Opcode();
6816 bool sign = (opcode != Op_URShiftVB);
6817 int vlen_enc = Assembler::AVX_256bit;
6818 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
6819 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6820 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6821 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6822 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6823 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6824 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6825 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6826 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
6827 %}
6828 ins_pipe( pipe_slow );
6829 %}
6830
6831 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6832 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
6833 match(Set dst ( LShiftVB src shift));
6834 match(Set dst (RShiftVB src shift));
6835 match(Set dst (URShiftVB src shift));
6836 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
6837 format %{"vector_byte_shift $dst,$src,$shift" %}
6838 ins_encode %{
6839 assert(UseAVX > 2, "required");
6840 int opcode = this->ideal_Opcode();
6841 bool sign = (opcode != Op_URShiftVB);
6842 int vlen_enc = Assembler::AVX_512bit;
6843 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
6844 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
6845 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
6846 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6847 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6848 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6849 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6850 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6851 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6852 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
6853 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
6854 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6855 %}
6856 ins_pipe( pipe_slow );
6857 %}
6858
6859 // Shorts vector logical right shift produces incorrect Java result
6860 // for negative data because java code convert short value into int with
6861 // sign extension before a shift. But char vectors are fine since chars are
6862 // unsigned values.
6863 // Shorts/Chars vector left shift
6864 instruct vshiftS(vec dst, vec src, vec shift) %{
6865 predicate(!n->as_ShiftV()->is_var_shift());
6866 match(Set dst ( LShiftVS src shift));
6867 match(Set dst ( RShiftVS src shift));
6868 match(Set dst (URShiftVS src shift));
6869 effect(TEMP dst, USE src, USE shift);
6870 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
6871 ins_encode %{
6872 int opcode = this->ideal_Opcode();
6873 if (UseAVX > 0) {
6874 int vlen_enc = vector_length_encoding(this);
6875 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6876 } else {
6877 int vlen = Matcher::vector_length(this);
6878 if (vlen == 2) {
6879 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6880 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6881 } else if (vlen == 4) {
6882 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6883 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6884 } else {
6885 assert (vlen == 8, "sanity");
6886 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6887 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6888 }
6889 }
6890 %}
6891 ins_pipe( pipe_slow );
6892 %}
6893
6894 // Integers vector left shift
6895 instruct vshiftI(vec dst, vec src, vec shift) %{
6896 predicate(!n->as_ShiftV()->is_var_shift());
6897 match(Set dst ( LShiftVI src shift));
6898 match(Set dst ( RShiftVI src shift));
6899 match(Set dst (URShiftVI src shift));
6900 effect(TEMP dst, USE src, USE shift);
6901 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
6902 ins_encode %{
6903 int opcode = this->ideal_Opcode();
6904 if (UseAVX > 0) {
6905 int vlen_enc = vector_length_encoding(this);
6906 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6907 } else {
6908 int vlen = Matcher::vector_length(this);
6909 if (vlen == 2) {
6910 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6911 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6912 } else {
6913 assert(vlen == 4, "sanity");
6914 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6915 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6916 }
6917 }
6918 %}
6919 ins_pipe( pipe_slow );
6920 %}
6921
6922 // Integers vector left constant shift
6923 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
6924 match(Set dst (LShiftVI src (LShiftCntV shift)));
6925 match(Set dst (RShiftVI src (RShiftCntV shift)));
6926 match(Set dst (URShiftVI src (RShiftCntV shift)));
6927 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
6928 ins_encode %{
6929 int opcode = this->ideal_Opcode();
6930 if (UseAVX > 0) {
6931 int vector_len = vector_length_encoding(this);
6932 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6933 } else {
6934 int vlen = Matcher::vector_length(this);
6935 if (vlen == 2) {
6936 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6937 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6938 } else {
6939 assert(vlen == 4, "sanity");
6940 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6941 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6942 }
6943 }
6944 %}
6945 ins_pipe( pipe_slow );
6946 %}
6947
6948 // Longs vector shift
6949 instruct vshiftL(vec dst, vec src, vec shift) %{
6950 predicate(!n->as_ShiftV()->is_var_shift());
6951 match(Set dst ( LShiftVL src shift));
6952 match(Set dst (URShiftVL src shift));
6953 effect(TEMP dst, USE src, USE shift);
6954 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
6955 ins_encode %{
6956 int opcode = this->ideal_Opcode();
6957 if (UseAVX > 0) {
6958 int vlen_enc = vector_length_encoding(this);
6959 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6960 } else {
6961 assert(Matcher::vector_length(this) == 2, "");
6962 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6963 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6964 }
6965 %}
6966 ins_pipe( pipe_slow );
6967 %}
6968
6969 // Longs vector constant shift
6970 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
6971 match(Set dst (LShiftVL src (LShiftCntV shift)));
6972 match(Set dst (URShiftVL src (RShiftCntV shift)));
6973 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
6974 ins_encode %{
6975 int opcode = this->ideal_Opcode();
6976 if (UseAVX > 0) {
6977 int vector_len = vector_length_encoding(this);
6978 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6979 } else {
6980 assert(Matcher::vector_length(this) == 2, "");
6981 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6982 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6983 }
6984 %}
6985 ins_pipe( pipe_slow );
6986 %}
6987
6988 // -------------------ArithmeticRightShift -----------------------------------
6989 // Long vector arithmetic right shift
6990 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
6991 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
6992 match(Set dst (RShiftVL src shift));
6993 effect(TEMP dst, TEMP tmp);
6994 format %{ "vshiftq $dst,$src,$shift" %}
6995 ins_encode %{
6996 uint vlen = Matcher::vector_length(this);
6997 if (vlen == 2) {
6998 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6999 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
7000 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
7001 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
7002 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
7003 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
7004 } else {
7005 assert(vlen == 4, "sanity");
7006 assert(UseAVX > 1, "required");
7007 int vlen_enc = Assembler::AVX_256bit;
7008 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7009 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
7010 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7011 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
7012 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
7013 }
7014 %}
7015 ins_pipe( pipe_slow );
7016 %}
7017
7018 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
7019 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
7020 match(Set dst (RShiftVL src shift));
7021 format %{ "vshiftq $dst,$src,$shift" %}
7022 ins_encode %{
7023 int vlen_enc = vector_length_encoding(this);
7024 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7025 %}
7026 ins_pipe( pipe_slow );
7027 %}
7028
7029 // ------------------- Variable Shift -----------------------------
7030 // Byte variable shift
7031 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
7032 predicate(Matcher::vector_length(n) <= 8 &&
7033 n->as_ShiftV()->is_var_shift() &&
7034 !VM_Version::supports_avx512bw());
7035 match(Set dst ( LShiftVB src shift));
7036 match(Set dst ( RShiftVB src shift));
7037 match(Set dst (URShiftVB src shift));
7038 effect(TEMP dst, TEMP vtmp);
7039 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
7040 ins_encode %{
7041 assert(UseAVX >= 2, "required");
7042
7043 int opcode = this->ideal_Opcode();
7044 int vlen_enc = Assembler::AVX_128bit;
7045 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
7046 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
7047 %}
7048 ins_pipe( pipe_slow );
7049 %}
7050
7051 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7052 predicate(Matcher::vector_length(n) == 16 &&
7053 n->as_ShiftV()->is_var_shift() &&
7054 !VM_Version::supports_avx512bw());
7055 match(Set dst ( LShiftVB src shift));
7056 match(Set dst ( RShiftVB src shift));
7057 match(Set dst (URShiftVB src shift));
7058 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7059 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
7060 ins_encode %{
7061 assert(UseAVX >= 2, "required");
7062
7063 int opcode = this->ideal_Opcode();
7064 int vlen_enc = Assembler::AVX_128bit;
7065 // Shift lower half and get word result in dst
7066 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7067
7068 // Shift upper half and get word result in vtmp1
7069 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
7070 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
7071 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7072
7073 // Merge and down convert the two word results to byte in dst
7074 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
7075 %}
7076 ins_pipe( pipe_slow );
7077 %}
7078
7079 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
7080 predicate(Matcher::vector_length(n) == 32 &&
7081 n->as_ShiftV()->is_var_shift() &&
7082 !VM_Version::supports_avx512bw());
7083 match(Set dst ( LShiftVB src shift));
7084 match(Set dst ( RShiftVB src shift));
7085 match(Set dst (URShiftVB src shift));
7086 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
7087 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
7088 ins_encode %{
7089 assert(UseAVX >= 2, "required");
7090
7091 int opcode = this->ideal_Opcode();
7092 int vlen_enc = Assembler::AVX_128bit;
7093 // Process lower 128 bits and get result in dst
7094 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7095 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
7096 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
7097 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7098 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
7099
7100 // Process higher 128 bits and get result in vtmp3
7101 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
7102 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
7103 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
7104 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
7105 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
7106 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7107 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
7108
7109 // Merge the two results in dst
7110 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
7111 %}
7112 ins_pipe( pipe_slow );
7113 %}
7114
7115 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
7116 predicate(Matcher::vector_length(n) <= 32 &&
7117 n->as_ShiftV()->is_var_shift() &&
7118 VM_Version::supports_avx512bw());
7119 match(Set dst ( LShiftVB src shift));
7120 match(Set dst ( RShiftVB src shift));
7121 match(Set dst (URShiftVB src shift));
7122 effect(TEMP dst, TEMP vtmp);
7123 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
7124 ins_encode %{
7125 assert(UseAVX > 2, "required");
7126
7127 int opcode = this->ideal_Opcode();
7128 int vlen_enc = vector_length_encoding(this);
7129 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
7130 %}
7131 ins_pipe( pipe_slow );
7132 %}
7133
7134 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7135 predicate(Matcher::vector_length(n) == 64 &&
7136 n->as_ShiftV()->is_var_shift() &&
7137 VM_Version::supports_avx512bw());
7138 match(Set dst ( LShiftVB src shift));
7139 match(Set dst ( RShiftVB src shift));
7140 match(Set dst (URShiftVB src shift));
7141 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7142 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
7143 ins_encode %{
7144 assert(UseAVX > 2, "required");
7145
7146 int opcode = this->ideal_Opcode();
7147 int vlen_enc = Assembler::AVX_256bit;
7148 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7149 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
7150 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
7151 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7152 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
7153 %}
7154 ins_pipe( pipe_slow );
7155 %}
7156
7157 // Short variable shift
7158 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
7159 predicate(Matcher::vector_length(n) <= 8 &&
7160 n->as_ShiftV()->is_var_shift() &&
7161 !VM_Version::supports_avx512bw());
7162 match(Set dst ( LShiftVS src shift));
7163 match(Set dst ( RShiftVS src shift));
7164 match(Set dst (URShiftVS src shift));
7165 effect(TEMP dst, TEMP vtmp);
7166 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
7167 ins_encode %{
7168 assert(UseAVX >= 2, "required");
7169
7170 int opcode = this->ideal_Opcode();
7171 bool sign = (opcode != Op_URShiftVS);
7172 int vlen_enc = Assembler::AVX_256bit;
7173 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
7174 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
7175 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7176 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7177 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
7178 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7179 %}
7180 ins_pipe( pipe_slow );
7181 %}
7182
7183 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7184 predicate(Matcher::vector_length(n) == 16 &&
7185 n->as_ShiftV()->is_var_shift() &&
7186 !VM_Version::supports_avx512bw());
7187 match(Set dst ( LShiftVS src shift));
7188 match(Set dst ( RShiftVS src shift));
7189 match(Set dst (URShiftVS src shift));
7190 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7191 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
7192 ins_encode %{
7193 assert(UseAVX >= 2, "required");
7194
7195 int opcode = this->ideal_Opcode();
7196 bool sign = (opcode != Op_URShiftVS);
7197 int vlen_enc = Assembler::AVX_256bit;
7198 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
7199 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
7200 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7201 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7202 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7203
7204 // Shift upper half, with result in dst using vtmp1 as TEMP
7205 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
7206 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
7207 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7208 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7209 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7210 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7211
7212 // Merge lower and upper half result into dst
7213 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7214 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
7215 %}
7216 ins_pipe( pipe_slow );
7217 %}
7218
7219 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
7220 predicate(n->as_ShiftV()->is_var_shift() &&
7221 VM_Version::supports_avx512bw());
7222 match(Set dst ( LShiftVS src shift));
7223 match(Set dst ( RShiftVS src shift));
7224 match(Set dst (URShiftVS src shift));
7225 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
7226 ins_encode %{
7227 assert(UseAVX > 2, "required");
7228
7229 int opcode = this->ideal_Opcode();
7230 int vlen_enc = vector_length_encoding(this);
7231 if (!VM_Version::supports_avx512vl()) {
7232 vlen_enc = Assembler::AVX_512bit;
7233 }
7234 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7235 %}
7236 ins_pipe( pipe_slow );
7237 %}
7238
7239 //Integer variable shift
7240 instruct vshiftI_var(vec dst, vec src, vec shift) %{
7241 predicate(n->as_ShiftV()->is_var_shift());
7242 match(Set dst ( LShiftVI src shift));
7243 match(Set dst ( RShiftVI src shift));
7244 match(Set dst (URShiftVI src shift));
7245 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
7246 ins_encode %{
7247 assert(UseAVX >= 2, "required");
7248
7249 int opcode = this->ideal_Opcode();
7250 int vlen_enc = vector_length_encoding(this);
7251 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7252 %}
7253 ins_pipe( pipe_slow );
7254 %}
7255
7256 //Long variable shift
7257 instruct vshiftL_var(vec dst, vec src, vec shift) %{
7258 predicate(n->as_ShiftV()->is_var_shift());
7259 match(Set dst ( LShiftVL src shift));
7260 match(Set dst (URShiftVL src shift));
7261 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
7262 ins_encode %{
7263 assert(UseAVX >= 2, "required");
7264
7265 int opcode = this->ideal_Opcode();
7266 int vlen_enc = vector_length_encoding(this);
7267 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7268 %}
7269 ins_pipe( pipe_slow );
7270 %}
7271
7272 //Long variable right shift arithmetic
7273 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
7274 predicate(Matcher::vector_length(n) <= 4 &&
7275 n->as_ShiftV()->is_var_shift() &&
7276 UseAVX == 2);
7277 match(Set dst (RShiftVL src shift));
7278 effect(TEMP dst, TEMP vtmp);
7279 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
7280 ins_encode %{
7281 int opcode = this->ideal_Opcode();
7282 int vlen_enc = vector_length_encoding(this);
7283 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
7284 $vtmp$$XMMRegister);
7285 %}
7286 ins_pipe( pipe_slow );
7287 %}
7288
7289 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
7290 predicate(n->as_ShiftV()->is_var_shift() &&
7291 UseAVX > 2);
7292 match(Set dst (RShiftVL src shift));
7293 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
7294 ins_encode %{
7295 int opcode = this->ideal_Opcode();
7296 int vlen_enc = vector_length_encoding(this);
7297 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7298 %}
7299 ins_pipe( pipe_slow );
7300 %}
7301
7302 // --------------------------------- AND --------------------------------------
7303
7304 instruct vand(vec dst, vec src) %{
7305 predicate(UseAVX == 0);
7306 match(Set dst (AndV dst src));
7307 format %{ "pand $dst,$src\t! and vectors" %}
7308 ins_encode %{
7309 __ pand($dst$$XMMRegister, $src$$XMMRegister);
7310 %}
7311 ins_pipe( pipe_slow );
7312 %}
7313
7314 instruct vand_reg(vec dst, vec src1, vec src2) %{
7315 predicate(UseAVX > 0);
7316 match(Set dst (AndV src1 src2));
7317 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
7318 ins_encode %{
7319 int vlen_enc = vector_length_encoding(this);
7320 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7321 %}
7322 ins_pipe( pipe_slow );
7323 %}
7324
7325 instruct vand_mem(vec dst, vec src, memory mem) %{
7326 predicate((UseAVX > 0) &&
7327 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7328 match(Set dst (AndV src (LoadVector mem)));
7329 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
7330 ins_encode %{
7331 int vlen_enc = vector_length_encoding(this);
7332 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7333 %}
7334 ins_pipe( pipe_slow );
7335 %}
7336
7337 // --------------------------------- OR ---------------------------------------
7338
7339 instruct vor(vec dst, vec src) %{
7340 predicate(UseAVX == 0);
7341 match(Set dst (OrV dst src));
7342 format %{ "por $dst,$src\t! or vectors" %}
7343 ins_encode %{
7344 __ por($dst$$XMMRegister, $src$$XMMRegister);
7345 %}
7346 ins_pipe( pipe_slow );
7347 %}
7348
7349 instruct vor_reg(vec dst, vec src1, vec src2) %{
7350 predicate(UseAVX > 0);
7351 match(Set dst (OrV src1 src2));
7352 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
7353 ins_encode %{
7354 int vlen_enc = vector_length_encoding(this);
7355 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7356 %}
7357 ins_pipe( pipe_slow );
7358 %}
7359
7360 instruct vor_mem(vec dst, vec src, memory mem) %{
7361 predicate((UseAVX > 0) &&
7362 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7363 match(Set dst (OrV src (LoadVector mem)));
7364 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
7365 ins_encode %{
7366 int vlen_enc = vector_length_encoding(this);
7367 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7368 %}
7369 ins_pipe( pipe_slow );
7370 %}
7371
7372 // --------------------------------- XOR --------------------------------------
7373
7374 instruct vxor(vec dst, vec src) %{
7375 predicate(UseAVX == 0);
7376 match(Set dst (XorV dst src));
7377 format %{ "pxor $dst,$src\t! xor vectors" %}
7378 ins_encode %{
7379 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
7380 %}
7381 ins_pipe( pipe_slow );
7382 %}
7383
7384 instruct vxor_reg(vec dst, vec src1, vec src2) %{
7385 predicate(UseAVX > 0);
7386 match(Set dst (XorV src1 src2));
7387 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
7388 ins_encode %{
7389 int vlen_enc = vector_length_encoding(this);
7390 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7391 %}
7392 ins_pipe( pipe_slow );
7393 %}
7394
7395 instruct vxor_mem(vec dst, vec src, memory mem) %{
7396 predicate((UseAVX > 0) &&
7397 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7398 match(Set dst (XorV src (LoadVector mem)));
7399 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
7400 ins_encode %{
7401 int vlen_enc = vector_length_encoding(this);
7402 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7403 %}
7404 ins_pipe( pipe_slow );
7405 %}
7406
7407 // --------------------------------- VectorCast --------------------------------------
7408
7409 instruct vcastBtoX(vec dst, vec src) %{
7410 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
7411 match(Set dst (VectorCastB2X src));
7412 format %{ "vector_cast_b2x $dst,$src\t!" %}
7413 ins_encode %{
7414 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7415 int vlen_enc = vector_length_encoding(this);
7416 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7417 %}
7418 ins_pipe( pipe_slow );
7419 %}
7420
7421 instruct vcastBtoD(legVec dst, legVec src) %{
7422 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
7423 match(Set dst (VectorCastB2X src));
7424 format %{ "vector_cast_b2x $dst,$src\t!" %}
7425 ins_encode %{
7426 int vlen_enc = vector_length_encoding(this);
7427 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 instruct castStoX(vec dst, vec src) %{
7433 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7434 Matcher::vector_length(n->in(1)) <= 8 && // src
7435 Matcher::vector_element_basic_type(n) == T_BYTE);
7436 match(Set dst (VectorCastS2X src));
7437 format %{ "vector_cast_s2x $dst,$src" %}
7438 ins_encode %{
7439 assert(UseAVX > 0, "required");
7440
7441 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
7442 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
7443 %}
7444 ins_pipe( pipe_slow );
7445 %}
7446
7447 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
7448 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7449 Matcher::vector_length(n->in(1)) == 16 && // src
7450 Matcher::vector_element_basic_type(n) == T_BYTE);
7451 effect(TEMP dst, TEMP vtmp);
7452 match(Set dst (VectorCastS2X src));
7453 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
7454 ins_encode %{
7455 assert(UseAVX > 0, "required");
7456
7457 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
7458 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
7459 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
7460 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7461 %}
7462 ins_pipe( pipe_slow );
7463 %}
7464
7465 instruct vcastStoX_evex(vec dst, vec src) %{
7466 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
7467 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7468 match(Set dst (VectorCastS2X src));
7469 format %{ "vector_cast_s2x $dst,$src\t!" %}
7470 ins_encode %{
7471 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7472 int src_vlen_enc = vector_length_encoding(this, $src);
7473 int vlen_enc = vector_length_encoding(this);
7474 switch (to_elem_bt) {
7475 case T_BYTE:
7476 if (!VM_Version::supports_avx512vl()) {
7477 vlen_enc = Assembler::AVX_512bit;
7478 }
7479 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7480 break;
7481 case T_INT:
7482 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7483 break;
7484 case T_FLOAT:
7485 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7486 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7487 break;
7488 case T_LONG:
7489 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7490 break;
7491 case T_DOUBLE: {
7492 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
7493 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
7494 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7495 break;
7496 }
7497 default:
7498 ShouldNotReachHere();
7499 }
7500 %}
7501 ins_pipe( pipe_slow );
7502 %}
7503
7504 instruct castItoX(vec dst, vec src) %{
7505 predicate(UseAVX <= 2 &&
7506 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
7507 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7508 match(Set dst (VectorCastI2X src));
7509 format %{ "vector_cast_i2x $dst,$src" %}
7510 ins_encode %{
7511 assert(UseAVX > 0, "required");
7512
7513 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7514 int vlen_enc = vector_length_encoding(this, $src);
7515
7516 if (to_elem_bt == T_BYTE) {
7517 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7518 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7519 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7520 } else {
7521 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7522 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7523 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7524 }
7525 %}
7526 ins_pipe( pipe_slow );
7527 %}
7528
7529 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
7530 predicate(UseAVX <= 2 &&
7531 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
7532 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7533 match(Set dst (VectorCastI2X src));
7534 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
7535 effect(TEMP dst, TEMP vtmp);
7536 ins_encode %{
7537 assert(UseAVX > 0, "required");
7538
7539 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7540 int vlen_enc = vector_length_encoding(this, $src);
7541
7542 if (to_elem_bt == T_BYTE) {
7543 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7544 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7545 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7546 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7547 } else {
7548 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7549 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7550 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7551 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7552 }
7553 %}
7554 ins_pipe( pipe_slow );
7555 %}
7556
7557 instruct vcastItoX_evex(vec dst, vec src) %{
7558 predicate(UseAVX > 2 ||
7559 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7560 match(Set dst (VectorCastI2X src));
7561 format %{ "vector_cast_i2x $dst,$src\t!" %}
7562 ins_encode %{
7563 assert(UseAVX > 0, "required");
7564
7565 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
7566 int src_vlen_enc = vector_length_encoding(this, $src);
7567 int dst_vlen_enc = vector_length_encoding(this);
7568 switch (dst_elem_bt) {
7569 case T_BYTE:
7570 if (!VM_Version::supports_avx512vl()) {
7571 src_vlen_enc = Assembler::AVX_512bit;
7572 }
7573 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7574 break;
7575 case T_SHORT:
7576 if (!VM_Version::supports_avx512vl()) {
7577 src_vlen_enc = Assembler::AVX_512bit;
7578 }
7579 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7580 break;
7581 case T_FLOAT:
7582 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7583 break;
7584 case T_LONG:
7585 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7586 break;
7587 case T_DOUBLE:
7588 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7589 break;
7590 default:
7591 ShouldNotReachHere();
7592 }
7593 %}
7594 ins_pipe( pipe_slow );
7595 %}
7596
7597 instruct vcastLtoBS(vec dst, vec src) %{
7598 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
7599 UseAVX <= 2);
7600 match(Set dst (VectorCastL2X src));
7601 format %{ "vector_cast_l2x $dst,$src" %}
7602 ins_encode %{
7603 assert(UseAVX > 0, "required");
7604
7605 int vlen = Matcher::vector_length_in_bytes(this, $src);
7606 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7607 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
7608 : ExternalAddress(vector_int_to_short_mask());
7609 if (vlen <= 16) {
7610 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
7611 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7612 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7613 } else {
7614 assert(vlen <= 32, "required");
7615 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
7616 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
7617 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7618 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7619 }
7620 if (to_elem_bt == T_BYTE) {
7621 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7622 }
7623 %}
7624 ins_pipe( pipe_slow );
7625 %}
7626
7627 instruct vcastLtoX_evex(vec dst, vec src) %{
7628 predicate(UseAVX > 2 ||
7629 (Matcher::vector_element_basic_type(n) == T_INT ||
7630 Matcher::vector_element_basic_type(n) == T_FLOAT ||
7631 Matcher::vector_element_basic_type(n) == T_DOUBLE));
7632 match(Set dst (VectorCastL2X src));
7633 format %{ "vector_cast_l2x $dst,$src\t!" %}
7634 ins_encode %{
7635 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7636 int vlen = Matcher::vector_length_in_bytes(this, $src);
7637 int vlen_enc = vector_length_encoding(this, $src);
7638 switch (to_elem_bt) {
7639 case T_BYTE:
7640 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7641 vlen_enc = Assembler::AVX_512bit;
7642 }
7643 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7644 break;
7645 case T_SHORT:
7646 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7647 vlen_enc = Assembler::AVX_512bit;
7648 }
7649 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7650 break;
7651 case T_INT:
7652 if (vlen == 8) {
7653 if ($dst$$XMMRegister != $src$$XMMRegister) {
7654 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7655 }
7656 } else if (vlen == 16) {
7657 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
7658 } else if (vlen == 32) {
7659 if (UseAVX > 2) {
7660 if (!VM_Version::supports_avx512vl()) {
7661 vlen_enc = Assembler::AVX_512bit;
7662 }
7663 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7664 } else {
7665 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
7666 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
7667 }
7668 } else { // vlen == 64
7669 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7670 }
7671 break;
7672 case T_FLOAT:
7673 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7674 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7675 break;
7676 case T_DOUBLE:
7677 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7678 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7679 break;
7680
7681 default: assert(false, "%s", type2name(to_elem_bt));
7682 }
7683 %}
7684 ins_pipe( pipe_slow );
7685 %}
7686
7687 instruct vcastFtoD_reg(vec dst, vec src) %{
7688 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
7689 match(Set dst (VectorCastF2X src));
7690 format %{ "vector_cast_f2d $dst,$src\t!" %}
7691 ins_encode %{
7692 int vlen_enc = vector_length_encoding(this);
7693 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7694 %}
7695 ins_pipe( pipe_slow );
7696 %}
7697
7698
7699 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7700 predicate(!VM_Version::supports_avx10_2() &&
7701 !VM_Version::supports_avx512vl() &&
7702 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7703 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
7704 is_integral_type(Matcher::vector_element_basic_type(n)));
7705 match(Set dst (VectorCastF2X src));
7706 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7707 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
7708 ins_encode %{
7709 int vlen_enc = vector_length_encoding(this, $src);
7710 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7711 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
7712 // 32 bit addresses for register indirect addressing mode since stub constants
7713 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
7714 // However, targets are free to increase this limit, but having a large code cache size
7715 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
7716 // cap we save a temporary register allocation which in limiting case can prevent
7717 // spilling in high register pressure blocks.
7718 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7719 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
7720 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7721 %}
7722 ins_pipe( pipe_slow );
7723 %}
7724
7725 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7726 predicate(!VM_Version::supports_avx10_2() &&
7727 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7728 is_integral_type(Matcher::vector_element_basic_type(n)));
7729 match(Set dst (VectorCastF2X src));
7730 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7731 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7732 ins_encode %{
7733 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7734 if (to_elem_bt == T_LONG) {
7735 int vlen_enc = vector_length_encoding(this);
7736 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7737 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7738 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
7739 } else {
7740 int vlen_enc = vector_length_encoding(this, $src);
7741 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7742 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7743 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7744 }
7745 %}
7746 ins_pipe( pipe_slow );
7747 %}
7748
7749 instruct castFtoX_reg_avx10(vec dst, vec src) %{
7750 predicate(VM_Version::supports_avx10_2() &&
7751 is_integral_type(Matcher::vector_element_basic_type(n)));
7752 match(Set dst (VectorCastF2X src));
7753 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
7754 ins_encode %{
7755 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7756 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
7757 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7758 %}
7759 ins_pipe( pipe_slow );
7760 %}
7761
7762 instruct castFtoX_mem_avx10(vec dst, memory src) %{
7763 predicate(VM_Version::supports_avx10_2() &&
7764 is_integral_type(Matcher::vector_element_basic_type(n)));
7765 match(Set dst (VectorCastF2X (LoadVector src)));
7766 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
7767 ins_encode %{
7768 int vlen = Matcher::vector_length(this);
7769 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7770 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
7771 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
7772 %}
7773 ins_pipe( pipe_slow );
7774 %}
7775
7776 instruct vcastDtoF_reg(vec dst, vec src) %{
7777 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
7778 match(Set dst (VectorCastD2X src));
7779 format %{ "vector_cast_d2x $dst,$src\t!" %}
7780 ins_encode %{
7781 int vlen_enc = vector_length_encoding(this, $src);
7782 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7783 %}
7784 ins_pipe( pipe_slow );
7785 %}
7786
7787 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
7788 predicate(!VM_Version::supports_avx10_2() &&
7789 !VM_Version::supports_avx512vl() &&
7790 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7791 is_integral_type(Matcher::vector_element_basic_type(n)));
7792 match(Set dst (VectorCastD2X src));
7793 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
7794 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
7795 ins_encode %{
7796 int vlen_enc = vector_length_encoding(this, $src);
7797 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7798 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7799 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
7800 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7801 %}
7802 ins_pipe( pipe_slow );
7803 %}
7804
7805 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7806 predicate(!VM_Version::supports_avx10_2() &&
7807 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7808 is_integral_type(Matcher::vector_element_basic_type(n)));
7809 match(Set dst (VectorCastD2X src));
7810 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7811 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7812 ins_encode %{
7813 int vlen_enc = vector_length_encoding(this, $src);
7814 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7815 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
7816 ExternalAddress(vector_float_signflip());
7817 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7818 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
7819 %}
7820 ins_pipe( pipe_slow );
7821 %}
7822
7823 instruct castDtoX_reg_avx10(vec dst, vec src) %{
7824 predicate(VM_Version::supports_avx10_2() &&
7825 is_integral_type(Matcher::vector_element_basic_type(n)));
7826 match(Set dst (VectorCastD2X src));
7827 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
7828 ins_encode %{
7829 int vlen_enc = vector_length_encoding(this, $src);
7830 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7831 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7832 %}
7833 ins_pipe( pipe_slow );
7834 %}
7835
7836 instruct castDtoX_mem_avx10(vec dst, memory src) %{
7837 predicate(VM_Version::supports_avx10_2() &&
7838 is_integral_type(Matcher::vector_element_basic_type(n)));
7839 match(Set dst (VectorCastD2X (LoadVector src)));
7840 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
7841 ins_encode %{
7842 int vlen = Matcher::vector_length(this);
7843 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
7844 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7845 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
7846 %}
7847 ins_pipe( pipe_slow );
7848 %}
7849
7850 instruct vucast(vec dst, vec src) %{
7851 match(Set dst (VectorUCastB2X src));
7852 match(Set dst (VectorUCastS2X src));
7853 match(Set dst (VectorUCastI2X src));
7854 format %{ "vector_ucast $dst,$src\t!" %}
7855 ins_encode %{
7856 assert(UseAVX > 0, "required");
7857
7858 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
7859 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7860 int vlen_enc = vector_length_encoding(this);
7861 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
7862 %}
7863 ins_pipe( pipe_slow );
7864 %}
7865
7866 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7867 predicate(!VM_Version::supports_avx512vl() &&
7868 Matcher::vector_length_in_bytes(n) < 64 &&
7869 Matcher::vector_element_basic_type(n) == T_INT);
7870 match(Set dst (RoundVF src));
7871 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7872 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
7873 ins_encode %{
7874 int vlen_enc = vector_length_encoding(this);
7875 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7876 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
7877 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7878 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
7879 %}
7880 ins_pipe( pipe_slow );
7881 %}
7882
7883 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7884 predicate((VM_Version::supports_avx512vl() ||
7885 Matcher::vector_length_in_bytes(n) == 64) &&
7886 Matcher::vector_element_basic_type(n) == T_INT);
7887 match(Set dst (RoundVF src));
7888 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7889 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7890 ins_encode %{
7891 int vlen_enc = vector_length_encoding(this);
7892 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7893 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
7894 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7895 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7896 %}
7897 ins_pipe( pipe_slow );
7898 %}
7899
7900 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7901 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7902 match(Set dst (RoundVD src));
7903 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7904 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7905 ins_encode %{
7906 int vlen_enc = vector_length_encoding(this);
7907 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7908 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
7909 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
7910 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7911 %}
7912 ins_pipe( pipe_slow );
7913 %}
7914
7915 // --------------------------------- VectorMaskCmp --------------------------------------
7916
7917 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7918 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7919 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
7920 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7921 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7922 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7923 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7924 ins_encode %{
7925 int vlen_enc = vector_length_encoding(this, $src1);
7926 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7927 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7928 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7929 } else {
7930 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7931 }
7932 %}
7933 ins_pipe( pipe_slow );
7934 %}
7935
7936 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7937 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7938 n->bottom_type()->isa_vectmask() == nullptr &&
7939 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7940 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7941 effect(TEMP ktmp);
7942 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7943 ins_encode %{
7944 int vlen_enc = Assembler::AVX_512bit;
7945 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7946 KRegister mask = k0; // The comparison itself is not being masked.
7947 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7948 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7949 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7950 } else {
7951 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7952 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7953 }
7954 %}
7955 ins_pipe( pipe_slow );
7956 %}
7957
7958 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7959 predicate(n->bottom_type()->isa_vectmask() &&
7960 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7961 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7962 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7963 ins_encode %{
7964 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7965 int vlen_enc = vector_length_encoding(this, $src1);
7966 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7967 KRegister mask = k0; // The comparison itself is not being masked.
7968 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7969 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7970 } else {
7971 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7972 }
7973 %}
7974 ins_pipe( pipe_slow );
7975 %}
7976
7977 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7978 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7979 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7980 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7981 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7982 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7983 (n->in(2)->get_int() == BoolTest::eq ||
7984 n->in(2)->get_int() == BoolTest::lt ||
7985 n->in(2)->get_int() == BoolTest::gt)); // cond
7986 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7987 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7988 ins_encode %{
7989 int vlen_enc = vector_length_encoding(this, $src1);
7990 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7991 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7992 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
7993 %}
7994 ins_pipe( pipe_slow );
7995 %}
7996
7997 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7998 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7999 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
8000 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
8001 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
8002 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
8003 (n->in(2)->get_int() == BoolTest::ne ||
8004 n->in(2)->get_int() == BoolTest::le ||
8005 n->in(2)->get_int() == BoolTest::ge)); // cond
8006 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8007 effect(TEMP dst, TEMP xtmp);
8008 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
8009 ins_encode %{
8010 int vlen_enc = vector_length_encoding(this, $src1);
8011 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8012 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
8013 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
8014 %}
8015 ins_pipe( pipe_slow );
8016 %}
8017
8018 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
8019 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
8020 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
8021 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
8022 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
8023 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
8024 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8025 effect(TEMP dst, TEMP xtmp);
8026 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
8027 ins_encode %{
8028 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
8029 int vlen_enc = vector_length_encoding(this, $src1);
8030 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8031 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
8032
8033 if (vlen_enc == Assembler::AVX_128bit) {
8034 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
8035 } else {
8036 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
8037 }
8038 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
8039 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8040 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
8041 %}
8042 ins_pipe( pipe_slow );
8043 %}
8044
8045 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
8046 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
8047 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
8048 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
8049 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8050 effect(TEMP ktmp);
8051 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
8052 ins_encode %{
8053 assert(UseAVX > 2, "required");
8054
8055 int vlen_enc = vector_length_encoding(this, $src1);
8056 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8057 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
8058 KRegister mask = k0; // The comparison itself is not being masked.
8059 bool merge = false;
8060 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
8061
8062 switch (src1_elem_bt) {
8063 case T_INT: {
8064 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8065 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
8066 break;
8067 }
8068 case T_LONG: {
8069 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8070 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
8071 break;
8072 }
8073 default: assert(false, "%s", type2name(src1_elem_bt));
8074 }
8075 %}
8076 ins_pipe( pipe_slow );
8077 %}
8078
8079
8080 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
8081 predicate(n->bottom_type()->isa_vectmask() &&
8082 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
8083 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8084 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
8085 ins_encode %{
8086 assert(UseAVX > 2, "required");
8087 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
8088
8089 int vlen_enc = vector_length_encoding(this, $src1);
8090 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8091 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
8092 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
8093
8094 // Comparison i
8095 switch (src1_elem_bt) {
8096 case T_BYTE: {
8097 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8098 break;
8099 }
8100 case T_SHORT: {
8101 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8102 break;
8103 }
8104 case T_INT: {
8105 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8106 break;
8107 }
8108 case T_LONG: {
8109 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8110 break;
8111 }
8112 default: assert(false, "%s", type2name(src1_elem_bt));
8113 }
8114 %}
8115 ins_pipe( pipe_slow );
8116 %}
8117
8118 // Extract
8119
8120 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
8121 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
8122 match(Set dst (ExtractI src idx));
8123 match(Set dst (ExtractS src idx));
8124 match(Set dst (ExtractB src idx));
8125 format %{ "extractI $dst,$src,$idx\t!" %}
8126 ins_encode %{
8127 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8128
8129 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
8130 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
8131 %}
8132 ins_pipe( pipe_slow );
8133 %}
8134
8135 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
8136 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
8137 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
8138 match(Set dst (ExtractI src idx));
8139 match(Set dst (ExtractS src idx));
8140 match(Set dst (ExtractB src idx));
8141 effect(TEMP vtmp);
8142 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
8143 ins_encode %{
8144 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8145
8146 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
8147 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8148 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
8149 %}
8150 ins_pipe( pipe_slow );
8151 %}
8152
8153 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
8154 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
8155 match(Set dst (ExtractL src idx));
8156 format %{ "extractL $dst,$src,$idx\t!" %}
8157 ins_encode %{
8158 assert(UseSSE >= 4, "required");
8159 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8160
8161 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
8162 %}
8163 ins_pipe( pipe_slow );
8164 %}
8165
8166 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
8167 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
8168 Matcher::vector_length(n->in(1)) == 8); // src
8169 match(Set dst (ExtractL src idx));
8170 effect(TEMP vtmp);
8171 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
8172 ins_encode %{
8173 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8174
8175 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8176 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
8177 %}
8178 ins_pipe( pipe_slow );
8179 %}
8180
8181 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
8182 predicate(Matcher::vector_length(n->in(1)) <= 4);
8183 match(Set dst (ExtractF src idx));
8184 effect(TEMP dst, TEMP vtmp);
8185 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
8186 ins_encode %{
8187 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8188
8189 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
8190 %}
8191 ins_pipe( pipe_slow );
8192 %}
8193
8194 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
8195 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
8196 Matcher::vector_length(n->in(1)/*src*/) == 16);
8197 match(Set dst (ExtractF src idx));
8198 effect(TEMP vtmp);
8199 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
8200 ins_encode %{
8201 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8202
8203 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8204 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
8205 %}
8206 ins_pipe( pipe_slow );
8207 %}
8208
8209 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
8210 predicate(Matcher::vector_length(n->in(1)) == 2); // src
8211 match(Set dst (ExtractD src idx));
8212 format %{ "extractD $dst,$src,$idx\t!" %}
8213 ins_encode %{
8214 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8215
8216 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8217 %}
8218 ins_pipe( pipe_slow );
8219 %}
8220
8221 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
8222 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
8223 Matcher::vector_length(n->in(1)) == 8); // src
8224 match(Set dst (ExtractD src idx));
8225 effect(TEMP vtmp);
8226 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
8227 ins_encode %{
8228 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8229
8230 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8231 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
8232 %}
8233 ins_pipe( pipe_slow );
8234 %}
8235
8236 // --------------------------------- Vector Blend --------------------------------------
8237
8238 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
8239 predicate(UseAVX == 0);
8240 match(Set dst (VectorBlend (Binary dst src) mask));
8241 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
8242 effect(TEMP tmp);
8243 ins_encode %{
8244 assert(UseSSE >= 4, "required");
8245
8246 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
8247 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
8248 }
8249 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
8250 %}
8251 ins_pipe( pipe_slow );
8252 %}
8253
8254 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
8255 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8256 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8257 Matcher::vector_length_in_bytes(n) <= 32 &&
8258 is_integral_type(Matcher::vector_element_basic_type(n)));
8259 match(Set dst (VectorBlend (Binary src1 src2) mask));
8260 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8261 ins_encode %{
8262 int vlen_enc = vector_length_encoding(this);
8263 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8264 %}
8265 ins_pipe( pipe_slow );
8266 %}
8267
8268 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
8269 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8270 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8271 Matcher::vector_length_in_bytes(n) <= 32 &&
8272 !is_integral_type(Matcher::vector_element_basic_type(n)));
8273 match(Set dst (VectorBlend (Binary src1 src2) mask));
8274 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8275 ins_encode %{
8276 int vlen_enc = vector_length_encoding(this);
8277 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8278 %}
8279 ins_pipe( pipe_slow );
8280 %}
8281
8282 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
8283 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
8284 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8285 Matcher::vector_length_in_bytes(n) <= 32);
8286 match(Set dst (VectorBlend (Binary src1 src2) mask));
8287 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
8288 effect(TEMP vtmp, TEMP dst);
8289 ins_encode %{
8290 int vlen_enc = vector_length_encoding(this);
8291 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
8292 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8293 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8294 %}
8295 ins_pipe( pipe_slow );
8296 %}
8297
8298 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
8299 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
8300 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
8301 match(Set dst (VectorBlend (Binary src1 src2) mask));
8302 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8303 effect(TEMP ktmp);
8304 ins_encode %{
8305 int vlen_enc = Assembler::AVX_512bit;
8306 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8307 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
8308 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8309 %}
8310 ins_pipe( pipe_slow );
8311 %}
8312
8313
8314 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
8315 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
8316 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
8317 VM_Version::supports_avx512bw()));
8318 match(Set dst (VectorBlend (Binary src1 src2) mask));
8319 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8320 ins_encode %{
8321 int vlen_enc = vector_length_encoding(this);
8322 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8323 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8324 %}
8325 ins_pipe( pipe_slow );
8326 %}
8327
8328 // --------------------------------- ABS --------------------------------------
8329 // a = |a|
8330 instruct vabsB_reg(vec dst, vec src) %{
8331 match(Set dst (AbsVB src));
8332 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
8333 ins_encode %{
8334 uint vlen = Matcher::vector_length(this);
8335 if (vlen <= 16) {
8336 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8337 } else {
8338 int vlen_enc = vector_length_encoding(this);
8339 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8340 }
8341 %}
8342 ins_pipe( pipe_slow );
8343 %}
8344
8345 instruct vabsS_reg(vec dst, vec src) %{
8346 match(Set dst (AbsVS src));
8347 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
8348 ins_encode %{
8349 uint vlen = Matcher::vector_length(this);
8350 if (vlen <= 8) {
8351 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8352 } else {
8353 int vlen_enc = vector_length_encoding(this);
8354 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8355 }
8356 %}
8357 ins_pipe( pipe_slow );
8358 %}
8359
8360 instruct vabsI_reg(vec dst, vec src) %{
8361 match(Set dst (AbsVI src));
8362 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
8363 ins_encode %{
8364 uint vlen = Matcher::vector_length(this);
8365 if (vlen <= 4) {
8366 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8367 } else {
8368 int vlen_enc = vector_length_encoding(this);
8369 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8370 }
8371 %}
8372 ins_pipe( pipe_slow );
8373 %}
8374
8375 instruct vabsL_reg(vec dst, vec src) %{
8376 match(Set dst (AbsVL src));
8377 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
8378 ins_encode %{
8379 assert(UseAVX > 2, "required");
8380 int vlen_enc = vector_length_encoding(this);
8381 if (!VM_Version::supports_avx512vl()) {
8382 vlen_enc = Assembler::AVX_512bit;
8383 }
8384 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8385 %}
8386 ins_pipe( pipe_slow );
8387 %}
8388
8389 // --------------------------------- ABSNEG --------------------------------------
8390
8391 instruct vabsnegF(vec dst, vec src) %{
8392 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
8393 match(Set dst (AbsVF src));
8394 match(Set dst (NegVF src));
8395 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
8396 ins_cost(150);
8397 ins_encode %{
8398 int opcode = this->ideal_Opcode();
8399 int vlen = Matcher::vector_length(this);
8400 if (vlen == 2) {
8401 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8402 } else {
8403 assert(vlen == 8 || vlen == 16, "required");
8404 int vlen_enc = vector_length_encoding(this);
8405 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8406 }
8407 %}
8408 ins_pipe( pipe_slow );
8409 %}
8410
8411 instruct vabsneg4F(vec dst) %{
8412 predicate(Matcher::vector_length(n) == 4);
8413 match(Set dst (AbsVF dst));
8414 match(Set dst (NegVF dst));
8415 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
8416 ins_cost(150);
8417 ins_encode %{
8418 int opcode = this->ideal_Opcode();
8419 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
8420 %}
8421 ins_pipe( pipe_slow );
8422 %}
8423
8424 instruct vabsnegD(vec dst, vec src) %{
8425 match(Set dst (AbsVD src));
8426 match(Set dst (NegVD src));
8427 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
8428 ins_encode %{
8429 int opcode = this->ideal_Opcode();
8430 uint vlen = Matcher::vector_length(this);
8431 if (vlen == 2) {
8432 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8433 } else {
8434 int vlen_enc = vector_length_encoding(this);
8435 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8436 }
8437 %}
8438 ins_pipe( pipe_slow );
8439 %}
8440
8441 //------------------------------------- VectorTest --------------------------------------------
8442
8443 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
8444 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
8445 match(Set cr (VectorTest src1 src2));
8446 effect(TEMP vtmp);
8447 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
8448 ins_encode %{
8449 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8450 int vlen = Matcher::vector_length_in_bytes(this, $src1);
8451 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
8452 %}
8453 ins_pipe( pipe_slow );
8454 %}
8455
8456 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
8457 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
8458 match(Set cr (VectorTest src1 src2));
8459 format %{ "vptest_ge16 $src1, $src2\n\t" %}
8460 ins_encode %{
8461 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8462 int vlen = Matcher::vector_length_in_bytes(this, $src1);
8463 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
8464 %}
8465 ins_pipe( pipe_slow );
8466 %}
8467
8468 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8469 predicate((Matcher::vector_length(n->in(1)) < 8 ||
8470 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8471 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
8472 match(Set cr (VectorTest src1 src2));
8473 effect(TEMP tmp);
8474 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8475 ins_encode %{
8476 uint masklen = Matcher::vector_length(this, $src1);
8477 __ kmovwl($tmp$$Register, $src1$$KRegister);
8478 __ andl($tmp$$Register, (1 << masklen) - 1);
8479 __ cmpl($tmp$$Register, (1 << masklen) - 1);
8480 %}
8481 ins_pipe( pipe_slow );
8482 %}
8483
8484 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8485 predicate((Matcher::vector_length(n->in(1)) < 8 ||
8486 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8487 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
8488 match(Set cr (VectorTest src1 src2));
8489 effect(TEMP tmp);
8490 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8491 ins_encode %{
8492 uint masklen = Matcher::vector_length(this, $src1);
8493 __ kmovwl($tmp$$Register, $src1$$KRegister);
8494 __ andl($tmp$$Register, (1 << masklen) - 1);
8495 %}
8496 ins_pipe( pipe_slow );
8497 %}
8498
8499 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
8500 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
8501 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
8502 match(Set cr (VectorTest src1 src2));
8503 format %{ "ktest_ge8 $src1, $src2\n\t" %}
8504 ins_encode %{
8505 uint masklen = Matcher::vector_length(this, $src1);
8506 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
8507 %}
8508 ins_pipe( pipe_slow );
8509 %}
8510
8511 //------------------------------------- LoadMask --------------------------------------------
8512
8513 instruct loadMask(legVec dst, legVec src) %{
8514 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
8515 match(Set dst (VectorLoadMask src));
8516 effect(TEMP dst);
8517 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
8518 ins_encode %{
8519 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8520 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8521 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
8522 %}
8523 ins_pipe( pipe_slow );
8524 %}
8525
8526 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
8527 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8528 match(Set dst (VectorLoadMask src));
8529 effect(TEMP xtmp);
8530 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
8531 ins_encode %{
8532 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8533 true, Assembler::AVX_512bit);
8534 %}
8535 ins_pipe( pipe_slow );
8536 %}
8537
8538 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
8539 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8540 match(Set dst (VectorLoadMask src));
8541 effect(TEMP xtmp);
8542 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
8543 ins_encode %{
8544 int vlen_enc = vector_length_encoding(in(1));
8545 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8546 false, vlen_enc);
8547 %}
8548 ins_pipe( pipe_slow );
8549 %}
8550
8551 //------------------------------------- StoreMask --------------------------------------------
8552
8553 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
8554 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8555 match(Set dst (VectorStoreMask src size));
8556 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8557 ins_encode %{
8558 int vlen = Matcher::vector_length(this);
8559 if (vlen <= 16 && UseAVX <= 2) {
8560 assert(UseSSE >= 3, "required");
8561 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8562 } else {
8563 assert(UseAVX > 0, "required");
8564 int src_vlen_enc = vector_length_encoding(this, $src);
8565 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8566 }
8567 %}
8568 ins_pipe( pipe_slow );
8569 %}
8570
8571 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
8572 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8573 match(Set dst (VectorStoreMask src size));
8574 effect(TEMP_DEF dst, TEMP xtmp);
8575 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8576 ins_encode %{
8577 int vlen_enc = Assembler::AVX_128bit;
8578 int vlen = Matcher::vector_length(this);
8579 if (vlen <= 8) {
8580 assert(UseSSE >= 3, "required");
8581 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8582 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8583 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8584 } else {
8585 assert(UseAVX > 0, "required");
8586 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8587 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8588 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8589 }
8590 %}
8591 ins_pipe( pipe_slow );
8592 %}
8593
8594 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
8595 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8596 match(Set dst (VectorStoreMask src size));
8597 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8598 effect(TEMP_DEF dst, TEMP xtmp);
8599 ins_encode %{
8600 int vlen_enc = Assembler::AVX_128bit;
8601 int vlen = Matcher::vector_length(this);
8602 if (vlen <= 4) {
8603 assert(UseSSE >= 3, "required");
8604 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8605 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8606 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8607 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8608 } else {
8609 assert(UseAVX > 0, "required");
8610 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8611 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8612 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8613 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8614 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8615 }
8616 %}
8617 ins_pipe( pipe_slow );
8618 %}
8619
8620 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
8621 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
8622 match(Set dst (VectorStoreMask src size));
8623 effect(TEMP_DEF dst, TEMP xtmp);
8624 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8625 ins_encode %{
8626 assert(UseSSE >= 3, "required");
8627 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8628 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
8629 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
8630 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8631 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8632 %}
8633 ins_pipe( pipe_slow );
8634 %}
8635
8636 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
8637 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
8638 match(Set dst (VectorStoreMask src size));
8639 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
8640 effect(TEMP_DEF dst, TEMP vtmp);
8641 ins_encode %{
8642 int vlen_enc = Assembler::AVX_128bit;
8643 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
8644 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
8645 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
8646 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8647 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8648 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8649 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8650 %}
8651 ins_pipe( pipe_slow );
8652 %}
8653
8654 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8655 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8656 match(Set dst (VectorStoreMask src size));
8657 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8658 ins_encode %{
8659 int src_vlen_enc = vector_length_encoding(this, $src);
8660 int dst_vlen_enc = vector_length_encoding(this);
8661 if (!VM_Version::supports_avx512vl()) {
8662 src_vlen_enc = Assembler::AVX_512bit;
8663 }
8664 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8665 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8666 %}
8667 ins_pipe( pipe_slow );
8668 %}
8669
8670 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8671 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8672 match(Set dst (VectorStoreMask src size));
8673 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8674 ins_encode %{
8675 int src_vlen_enc = vector_length_encoding(this, $src);
8676 int dst_vlen_enc = vector_length_encoding(this);
8677 if (!VM_Version::supports_avx512vl()) {
8678 src_vlen_enc = Assembler::AVX_512bit;
8679 }
8680 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8681 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8682 %}
8683 ins_pipe( pipe_slow );
8684 %}
8685
8686 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
8687 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8688 match(Set dst (VectorStoreMask mask size));
8689 effect(TEMP_DEF dst);
8690 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8691 ins_encode %{
8692 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
8693 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
8694 false, Assembler::AVX_512bit, noreg);
8695 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
8696 %}
8697 ins_pipe( pipe_slow );
8698 %}
8699
8700 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
8701 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8702 match(Set dst (VectorStoreMask mask size));
8703 effect(TEMP_DEF dst);
8704 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8705 ins_encode %{
8706 int dst_vlen_enc = vector_length_encoding(this);
8707 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
8708 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8709 %}
8710 ins_pipe( pipe_slow );
8711 %}
8712
8713 instruct vmaskcast_evex(kReg dst) %{
8714 match(Set dst (VectorMaskCast dst));
8715 ins_cost(0);
8716 format %{ "vector_mask_cast $dst" %}
8717 ins_encode %{
8718 // empty
8719 %}
8720 ins_pipe(empty);
8721 %}
8722
8723 instruct vmaskcast(vec dst) %{
8724 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
8725 match(Set dst (VectorMaskCast dst));
8726 ins_cost(0);
8727 format %{ "vector_mask_cast $dst" %}
8728 ins_encode %{
8729 // empty
8730 %}
8731 ins_pipe(empty);
8732 %}
8733
8734 instruct vmaskcast_avx(vec dst, vec src) %{
8735 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
8736 match(Set dst (VectorMaskCast src));
8737 format %{ "vector_mask_cast $dst, $src" %}
8738 ins_encode %{
8739 int vlen = Matcher::vector_length(this);
8740 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
8741 BasicType dst_bt = Matcher::vector_element_basic_type(this);
8742 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
8743 %}
8744 ins_pipe(pipe_slow);
8745 %}
8746
8747 //-------------------------------- Load Iota Indices ----------------------------------
8748
8749 instruct loadIotaIndices(vec dst, immI_0 src) %{
8750 match(Set dst (VectorLoadConst src));
8751 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
8752 ins_encode %{
8753 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8754 BasicType bt = Matcher::vector_element_basic_type(this);
8755 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
8756 %}
8757 ins_pipe( pipe_slow );
8758 %}
8759
8760 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
8761 match(Set dst (PopulateIndex src1 src2));
8762 effect(TEMP dst, TEMP vtmp);
8763 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8764 ins_encode %{
8765 assert($src2$$constant == 1, "required");
8766 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8767 int vlen_enc = vector_length_encoding(this);
8768 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8769 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8770 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8771 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8772 %}
8773 ins_pipe( pipe_slow );
8774 %}
8775
8776 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
8777 match(Set dst (PopulateIndex src1 src2));
8778 effect(TEMP dst, TEMP vtmp);
8779 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8780 ins_encode %{
8781 assert($src2$$constant == 1, "required");
8782 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8783 int vlen_enc = vector_length_encoding(this);
8784 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8785 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8786 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8787 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8788 %}
8789 ins_pipe( pipe_slow );
8790 %}
8791
8792 //-------------------------------- Rearrange ----------------------------------
8793
8794 // LoadShuffle/Rearrange for Byte
8795 instruct rearrangeB(vec dst, vec shuffle) %{
8796 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8797 Matcher::vector_length(n) < 32);
8798 match(Set dst (VectorRearrange dst shuffle));
8799 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8800 ins_encode %{
8801 assert(UseSSE >= 4, "required");
8802 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8803 %}
8804 ins_pipe( pipe_slow );
8805 %}
8806
8807 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8808 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8809 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
8810 match(Set dst (VectorRearrange src shuffle));
8811 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8812 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8813 ins_encode %{
8814 assert(UseAVX >= 2, "required");
8815 // Swap src into vtmp1
8816 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8817 // Shuffle swapped src to get entries from other 128 bit lane
8818 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8819 // Shuffle original src to get entries from self 128 bit lane
8820 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8821 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
8822 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8823 // Perform the blend
8824 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8825 %}
8826 ins_pipe( pipe_slow );
8827 %}
8828
8829
8830 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
8831 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8832 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
8833 match(Set dst (VectorRearrange src shuffle));
8834 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
8835 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
8836 ins_encode %{
8837 int vlen_enc = vector_length_encoding(this);
8838 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
8839 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
8840 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
8841 %}
8842 ins_pipe( pipe_slow );
8843 %}
8844
8845 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
8846 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8847 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
8848 match(Set dst (VectorRearrange src shuffle));
8849 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8850 ins_encode %{
8851 int vlen_enc = vector_length_encoding(this);
8852 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8853 %}
8854 ins_pipe( pipe_slow );
8855 %}
8856
8857 // LoadShuffle/Rearrange for Short
8858
8859 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
8860 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8861 !VM_Version::supports_avx512bw());
8862 match(Set dst (VectorLoadShuffle src));
8863 effect(TEMP dst, TEMP vtmp);
8864 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8865 ins_encode %{
8866 // Create a byte shuffle mask from short shuffle mask
8867 // only byte shuffle instruction available on these platforms
8868 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8869 if (UseAVX == 0) {
8870 assert(vlen_in_bytes <= 16, "required");
8871 // Multiply each shuffle by two to get byte index
8872 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
8873 __ psllw($vtmp$$XMMRegister, 1);
8874
8875 // Duplicate to create 2 copies of byte index
8876 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8877 __ psllw($dst$$XMMRegister, 8);
8878 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
8879
8880 // Add one to get alternate byte index
8881 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
8882 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8883 } else {
8884 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
8885 int vlen_enc = vector_length_encoding(this);
8886 // Multiply each shuffle by two to get byte index
8887 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
8888
8889 // Duplicate to create 2 copies of byte index
8890 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
8891 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8892
8893 // Add one to get alternate byte index
8894 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
8895 }
8896 %}
8897 ins_pipe( pipe_slow );
8898 %}
8899
8900 instruct rearrangeS(vec dst, vec shuffle) %{
8901 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8902 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
8903 match(Set dst (VectorRearrange dst shuffle));
8904 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8905 ins_encode %{
8906 assert(UseSSE >= 4, "required");
8907 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8908 %}
8909 ins_pipe( pipe_slow );
8910 %}
8911
8912 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8913 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8914 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
8915 match(Set dst (VectorRearrange src shuffle));
8916 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8917 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8918 ins_encode %{
8919 assert(UseAVX >= 2, "required");
8920 // Swap src into vtmp1
8921 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8922 // Shuffle swapped src to get entries from other 128 bit lane
8923 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8924 // Shuffle original src to get entries from self 128 bit lane
8925 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8926 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
8927 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8928 // Perform the blend
8929 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8930 %}
8931 ins_pipe( pipe_slow );
8932 %}
8933
8934 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
8935 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8936 VM_Version::supports_avx512bw());
8937 match(Set dst (VectorRearrange src shuffle));
8938 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8939 ins_encode %{
8940 int vlen_enc = vector_length_encoding(this);
8941 if (!VM_Version::supports_avx512vl()) {
8942 vlen_enc = Assembler::AVX_512bit;
8943 }
8944 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8945 %}
8946 ins_pipe( pipe_slow );
8947 %}
8948
8949 // LoadShuffle/Rearrange for Integer and Float
8950
8951 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
8952 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8953 Matcher::vector_length(n) == 4 && UseAVX == 0);
8954 match(Set dst (VectorLoadShuffle src));
8955 effect(TEMP dst, TEMP vtmp);
8956 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8957 ins_encode %{
8958 assert(UseSSE >= 4, "required");
8959
8960 // Create a byte shuffle mask from int shuffle mask
8961 // only byte shuffle instruction available on these platforms
8962
8963 // Duplicate and multiply each shuffle by 4
8964 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
8965 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8966 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8967 __ psllw($vtmp$$XMMRegister, 2);
8968
8969 // Duplicate again to create 4 copies of byte index
8970 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8971 __ psllw($dst$$XMMRegister, 8);
8972 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
8973
8974 // Add 3,2,1,0 to get alternate byte index
8975 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
8976 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8977 %}
8978 ins_pipe( pipe_slow );
8979 %}
8980
8981 instruct rearrangeI(vec dst, vec shuffle) %{
8982 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8983 UseAVX == 0);
8984 match(Set dst (VectorRearrange dst shuffle));
8985 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8986 ins_encode %{
8987 assert(UseSSE >= 4, "required");
8988 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8989 %}
8990 ins_pipe( pipe_slow );
8991 %}
8992
8993 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
8994 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8995 UseAVX > 0);
8996 match(Set dst (VectorRearrange src shuffle));
8997 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8998 ins_encode %{
8999 int vlen_enc = vector_length_encoding(this);
9000 BasicType bt = Matcher::vector_element_basic_type(this);
9001 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
9002 %}
9003 ins_pipe( pipe_slow );
9004 %}
9005
9006 // LoadShuffle/Rearrange for Long and Double
9007
9008 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
9009 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
9010 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
9011 match(Set dst (VectorLoadShuffle src));
9012 effect(TEMP dst, TEMP vtmp);
9013 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
9014 ins_encode %{
9015 assert(UseAVX >= 2, "required");
9016
9017 int vlen_enc = vector_length_encoding(this);
9018 // Create a double word shuffle mask from long shuffle mask
9019 // only double word shuffle instruction available on these platforms
9020
9021 // Multiply each shuffle by two to get double word index
9022 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
9023
9024 // Duplicate each double word shuffle
9025 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
9026 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
9027
9028 // Add one to get alternate double word index
9029 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
9030 %}
9031 ins_pipe( pipe_slow );
9032 %}
9033
9034 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
9035 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
9036 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
9037 match(Set dst (VectorRearrange src shuffle));
9038 format %{ "vector_rearrange $dst, $shuffle, $src" %}
9039 ins_encode %{
9040 assert(UseAVX >= 2, "required");
9041
9042 int vlen_enc = vector_length_encoding(this);
9043 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
9044 %}
9045 ins_pipe( pipe_slow );
9046 %}
9047
9048 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
9049 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
9050 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
9051 match(Set dst (VectorRearrange src shuffle));
9052 format %{ "vector_rearrange $dst, $shuffle, $src" %}
9053 ins_encode %{
9054 assert(UseAVX > 2, "required");
9055
9056 int vlen_enc = vector_length_encoding(this);
9057 if (vlen_enc == Assembler::AVX_128bit) {
9058 vlen_enc = Assembler::AVX_256bit;
9059 }
9060 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
9061 %}
9062 ins_pipe( pipe_slow );
9063 %}
9064
9065 // --------------------------------- FMA --------------------------------------
9066 // a * b + c
9067
9068 instruct vfmaF_reg(vec a, vec b, vec c) %{
9069 match(Set c (FmaVF c (Binary a b)));
9070 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
9071 ins_cost(150);
9072 ins_encode %{
9073 assert(UseFMA, "not enabled");
9074 int vlen_enc = vector_length_encoding(this);
9075 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
9076 %}
9077 ins_pipe( pipe_slow );
9078 %}
9079
9080 instruct vfmaF_mem(vec a, memory b, vec c) %{
9081 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
9082 match(Set c (FmaVF c (Binary a (LoadVector b))));
9083 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
9084 ins_cost(150);
9085 ins_encode %{
9086 assert(UseFMA, "not enabled");
9087 int vlen_enc = vector_length_encoding(this);
9088 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
9089 %}
9090 ins_pipe( pipe_slow );
9091 %}
9092
9093 instruct vfmaD_reg(vec a, vec b, vec c) %{
9094 match(Set c (FmaVD c (Binary a b)));
9095 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
9096 ins_cost(150);
9097 ins_encode %{
9098 assert(UseFMA, "not enabled");
9099 int vlen_enc = vector_length_encoding(this);
9100 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
9101 %}
9102 ins_pipe( pipe_slow );
9103 %}
9104
9105 instruct vfmaD_mem(vec a, memory b, vec c) %{
9106 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
9107 match(Set c (FmaVD c (Binary a (LoadVector b))));
9108 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
9109 ins_cost(150);
9110 ins_encode %{
9111 assert(UseFMA, "not enabled");
9112 int vlen_enc = vector_length_encoding(this);
9113 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
9114 %}
9115 ins_pipe( pipe_slow );
9116 %}
9117
9118 // --------------------------------- Vector Multiply Add --------------------------------------
9119
9120 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
9121 predicate(UseAVX == 0);
9122 match(Set dst (MulAddVS2VI dst src1));
9123 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
9124 ins_encode %{
9125 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
9126 %}
9127 ins_pipe( pipe_slow );
9128 %}
9129
9130 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
9131 predicate(UseAVX > 0);
9132 match(Set dst (MulAddVS2VI src1 src2));
9133 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
9134 ins_encode %{
9135 int vlen_enc = vector_length_encoding(this);
9136 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
9137 %}
9138 ins_pipe( pipe_slow );
9139 %}
9140
9141 // --------------------------------- Vector Multiply Add Add ----------------------------------
9142
9143 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
9144 predicate(VM_Version::supports_avx512_vnni());
9145 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
9146 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
9147 ins_encode %{
9148 assert(UseAVX > 2, "required");
9149 int vlen_enc = vector_length_encoding(this);
9150 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
9151 %}
9152 ins_pipe( pipe_slow );
9153 ins_cost(10);
9154 %}
9155
9156 // --------------------------------- PopCount --------------------------------------
9157
9158 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
9159 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9160 match(Set dst (PopCountVI src));
9161 match(Set dst (PopCountVL src));
9162 format %{ "vector_popcount_integral $dst, $src" %}
9163 ins_encode %{
9164 int opcode = this->ideal_Opcode();
9165 int vlen_enc = vector_length_encoding(this, $src);
9166 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9167 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
9168 %}
9169 ins_pipe( pipe_slow );
9170 %}
9171
9172 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
9173 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9174 match(Set dst (PopCountVI src mask));
9175 match(Set dst (PopCountVL src mask));
9176 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
9177 ins_encode %{
9178 int vlen_enc = vector_length_encoding(this, $src);
9179 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9180 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9181 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
9182 %}
9183 ins_pipe( pipe_slow );
9184 %}
9185
9186 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
9187 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9188 match(Set dst (PopCountVI src));
9189 match(Set dst (PopCountVL src));
9190 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9191 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
9192 ins_encode %{
9193 int opcode = this->ideal_Opcode();
9194 int vlen_enc = vector_length_encoding(this, $src);
9195 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9196 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9197 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
9198 %}
9199 ins_pipe( pipe_slow );
9200 %}
9201
9202 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
9203
9204 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
9205 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9206 Matcher::vector_length_in_bytes(n->in(1))));
9207 match(Set dst (CountTrailingZerosV src));
9208 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
9209 ins_cost(400);
9210 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
9211 ins_encode %{
9212 int vlen_enc = vector_length_encoding(this, $src);
9213 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9214 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9215 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9216 %}
9217 ins_pipe( pipe_slow );
9218 %}
9219
9220 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9221 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9222 VM_Version::supports_avx512cd() &&
9223 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9224 match(Set dst (CountTrailingZerosV src));
9225 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9226 ins_cost(400);
9227 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
9228 ins_encode %{
9229 int vlen_enc = vector_length_encoding(this, $src);
9230 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9231 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9232 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9233 %}
9234 ins_pipe( pipe_slow );
9235 %}
9236
9237 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
9238 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9239 match(Set dst (CountTrailingZerosV src));
9240 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
9241 ins_cost(400);
9242 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
9243 ins_encode %{
9244 int vlen_enc = vector_length_encoding(this, $src);
9245 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9246 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9247 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
9248 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
9249 %}
9250 ins_pipe( pipe_slow );
9251 %}
9252
9253 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9254 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9255 match(Set dst (CountTrailingZerosV src));
9256 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9257 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9258 ins_encode %{
9259 int vlen_enc = vector_length_encoding(this, $src);
9260 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9261 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9262 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9263 %}
9264 ins_pipe( pipe_slow );
9265 %}
9266
9267
9268 // --------------------------------- Bitwise Ternary Logic ----------------------------------
9269
9270 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
9271 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
9272 effect(TEMP dst);
9273 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9274 ins_encode %{
9275 int vector_len = vector_length_encoding(this);
9276 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
9277 %}
9278 ins_pipe( pipe_slow );
9279 %}
9280
9281 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
9282 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
9283 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
9284 effect(TEMP dst);
9285 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9286 ins_encode %{
9287 int vector_len = vector_length_encoding(this);
9288 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
9289 %}
9290 ins_pipe( pipe_slow );
9291 %}
9292
9293 // --------------------------------- Rotation Operations ----------------------------------
9294 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
9295 match(Set dst (RotateLeftV src shift));
9296 match(Set dst (RotateRightV src shift));
9297 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
9298 ins_encode %{
9299 int opcode = this->ideal_Opcode();
9300 int vector_len = vector_length_encoding(this);
9301 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9302 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
9303 %}
9304 ins_pipe( pipe_slow );
9305 %}
9306
9307 instruct vprorate(vec dst, vec src, vec shift) %{
9308 match(Set dst (RotateLeftV src shift));
9309 match(Set dst (RotateRightV src shift));
9310 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
9311 ins_encode %{
9312 int opcode = this->ideal_Opcode();
9313 int vector_len = vector_length_encoding(this);
9314 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9315 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9316 %}
9317 ins_pipe( pipe_slow );
9318 %}
9319
9320 // ---------------------------------- Masked Operations ------------------------------------
9321 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
9322 predicate(!n->in(3)->bottom_type()->isa_vectmask());
9323 match(Set dst (LoadVectorMasked mem mask));
9324 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9325 ins_encode %{
9326 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9327 int vlen_enc = vector_length_encoding(this);
9328 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
9329 %}
9330 ins_pipe( pipe_slow );
9331 %}
9332
9333
9334 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
9335 predicate(n->in(3)->bottom_type()->isa_vectmask());
9336 match(Set dst (LoadVectorMasked mem mask));
9337 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9338 ins_encode %{
9339 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9340 int vector_len = vector_length_encoding(this);
9341 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
9342 %}
9343 ins_pipe( pipe_slow );
9344 %}
9345
9346 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
9347 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
9348 match(Set mem (StoreVectorMasked mem (Binary src mask)));
9349 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9350 ins_encode %{
9351 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9352 int vlen_enc = vector_length_encoding(src_node);
9353 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9354 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
9355 %}
9356 ins_pipe( pipe_slow );
9357 %}
9358
9359 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
9360 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
9361 match(Set mem (StoreVectorMasked mem (Binary src mask)));
9362 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9363 ins_encode %{
9364 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9365 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9366 int vlen_enc = vector_length_encoding(src_node);
9367 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
9368 %}
9369 ins_pipe( pipe_slow );
9370 %}
9371
9372 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
9373 match(Set addr (VerifyVectorAlignment addr mask));
9374 effect(KILL cr);
9375 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
9376 ins_encode %{
9377 Label Lskip;
9378 // check if masked bits of addr are zero
9379 __ testq($addr$$Register, $mask$$constant);
9380 __ jccb(Assembler::equal, Lskip);
9381 __ stop("verify_vector_alignment found a misaligned vector memory access");
9382 __ bind(Lskip);
9383 %}
9384 ins_pipe(pipe_slow);
9385 %}
9386
9387 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
9388 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
9389 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
9390 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
9391 ins_encode %{
9392 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
9393 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
9394
9395 Label DONE;
9396 int vlen_enc = vector_length_encoding(this, $src1);
9397 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
9398
9399 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
9400 __ mov64($dst$$Register, -1L);
9401 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
9402 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
9403 __ jccb(Assembler::carrySet, DONE);
9404 __ kmovql($dst$$Register, $ktmp1$$KRegister);
9405 __ notq($dst$$Register);
9406 __ tzcntq($dst$$Register, $dst$$Register);
9407 __ bind(DONE);
9408 %}
9409 ins_pipe( pipe_slow );
9410 %}
9411
9412
9413 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
9414 match(Set dst (VectorMaskGen len));
9415 effect(TEMP temp, KILL cr);
9416 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
9417 ins_encode %{
9418 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
9419 %}
9420 ins_pipe( pipe_slow );
9421 %}
9422
9423 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
9424 match(Set dst (VectorMaskGen len));
9425 format %{ "vector_mask_gen $len \t! vector mask generator" %}
9426 effect(TEMP temp);
9427 ins_encode %{
9428 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
9429 __ kmovql($dst$$KRegister, $temp$$Register);
9430 %}
9431 ins_pipe( pipe_slow );
9432 %}
9433
9434 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
9435 predicate(n->in(1)->bottom_type()->isa_vectmask());
9436 match(Set dst (VectorMaskToLong mask));
9437 effect(TEMP dst, KILL cr);
9438 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
9439 ins_encode %{
9440 int opcode = this->ideal_Opcode();
9441 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9442 int mask_len = Matcher::vector_length(this, $mask);
9443 int mask_size = mask_len * type2aelembytes(mbt);
9444 int vlen_enc = vector_length_encoding(this, $mask);
9445 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9446 $dst$$Register, mask_len, mask_size, vlen_enc);
9447 %}
9448 ins_pipe( pipe_slow );
9449 %}
9450
9451 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
9452 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9453 match(Set dst (VectorMaskToLong mask));
9454 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
9455 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9456 ins_encode %{
9457 int opcode = this->ideal_Opcode();
9458 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9459 int mask_len = Matcher::vector_length(this, $mask);
9460 int vlen_enc = vector_length_encoding(this, $mask);
9461 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9462 $dst$$Register, mask_len, mbt, vlen_enc);
9463 %}
9464 ins_pipe( pipe_slow );
9465 %}
9466
9467 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
9468 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9469 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
9470 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
9471 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9472 ins_encode %{
9473 int opcode = this->ideal_Opcode();
9474 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9475 int mask_len = Matcher::vector_length(this, $mask);
9476 int vlen_enc = vector_length_encoding(this, $mask);
9477 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9478 $dst$$Register, mask_len, mbt, vlen_enc);
9479 %}
9480 ins_pipe( pipe_slow );
9481 %}
9482
9483 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9484 predicate(n->in(1)->bottom_type()->isa_vectmask());
9485 match(Set dst (VectorMaskTrueCount mask));
9486 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9487 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
9488 ins_encode %{
9489 int opcode = this->ideal_Opcode();
9490 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9491 int mask_len = Matcher::vector_length(this, $mask);
9492 int mask_size = mask_len * type2aelembytes(mbt);
9493 int vlen_enc = vector_length_encoding(this, $mask);
9494 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9495 $tmp$$Register, mask_len, mask_size, vlen_enc);
9496 %}
9497 ins_pipe( pipe_slow );
9498 %}
9499
9500 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9501 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9502 match(Set dst (VectorMaskTrueCount mask));
9503 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9504 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9505 ins_encode %{
9506 int opcode = this->ideal_Opcode();
9507 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9508 int mask_len = Matcher::vector_length(this, $mask);
9509 int vlen_enc = vector_length_encoding(this, $mask);
9510 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9511 $tmp$$Register, mask_len, mbt, vlen_enc);
9512 %}
9513 ins_pipe( pipe_slow );
9514 %}
9515
9516 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9517 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9518 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
9519 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9520 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9521 ins_encode %{
9522 int opcode = this->ideal_Opcode();
9523 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9524 int mask_len = Matcher::vector_length(this, $mask);
9525 int vlen_enc = vector_length_encoding(this, $mask);
9526 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9527 $tmp$$Register, mask_len, mbt, vlen_enc);
9528 %}
9529 ins_pipe( pipe_slow );
9530 %}
9531
9532 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9533 predicate(n->in(1)->bottom_type()->isa_vectmask());
9534 match(Set dst (VectorMaskFirstTrue mask));
9535 match(Set dst (VectorMaskLastTrue mask));
9536 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9537 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
9538 ins_encode %{
9539 int opcode = this->ideal_Opcode();
9540 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9541 int mask_len = Matcher::vector_length(this, $mask);
9542 int mask_size = mask_len * type2aelembytes(mbt);
9543 int vlen_enc = vector_length_encoding(this, $mask);
9544 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9545 $tmp$$Register, mask_len, mask_size, vlen_enc);
9546 %}
9547 ins_pipe( pipe_slow );
9548 %}
9549
9550 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9551 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9552 match(Set dst (VectorMaskFirstTrue mask));
9553 match(Set dst (VectorMaskLastTrue mask));
9554 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9555 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9556 ins_encode %{
9557 int opcode = this->ideal_Opcode();
9558 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9559 int mask_len = Matcher::vector_length(this, $mask);
9560 int vlen_enc = vector_length_encoding(this, $mask);
9561 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9562 $tmp$$Register, mask_len, mbt, vlen_enc);
9563 %}
9564 ins_pipe( pipe_slow );
9565 %}
9566
9567 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9568 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9569 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9570 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9571 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9572 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9573 ins_encode %{
9574 int opcode = this->ideal_Opcode();
9575 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9576 int mask_len = Matcher::vector_length(this, $mask);
9577 int vlen_enc = vector_length_encoding(this, $mask);
9578 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9579 $tmp$$Register, mask_len, mbt, vlen_enc);
9580 %}
9581 ins_pipe( pipe_slow );
9582 %}
9583
9584 // --------------------------------- Compress/Expand Operations ---------------------------
9585 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
9586 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
9587 match(Set dst (CompressV src mask));
9588 match(Set dst (ExpandV src mask));
9589 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
9590 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
9591 ins_encode %{
9592 int opcode = this->ideal_Opcode();
9593 int vlen_enc = vector_length_encoding(this);
9594 BasicType bt = Matcher::vector_element_basic_type(this);
9595 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
9596 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
9597 %}
9598 ins_pipe( pipe_slow );
9599 %}
9600
9601 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9602 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
9603 match(Set dst (CompressV src mask));
9604 match(Set dst (ExpandV src mask));
9605 format %{ "vector_compress_expand $dst, $src, $mask" %}
9606 ins_encode %{
9607 int opcode = this->ideal_Opcode();
9608 int vector_len = vector_length_encoding(this);
9609 BasicType bt = Matcher::vector_element_basic_type(this);
9610 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
9611 %}
9612 ins_pipe( pipe_slow );
9613 %}
9614
9615 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
9616 match(Set dst (CompressM mask));
9617 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
9618 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
9619 ins_encode %{
9620 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
9621 int mask_len = Matcher::vector_length(this);
9622 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
9623 %}
9624 ins_pipe( pipe_slow );
9625 %}
9626
9627 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
9628
9629 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9630 predicate(!VM_Version::supports_gfni());
9631 match(Set dst (ReverseV src));
9632 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9633 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9634 ins_encode %{
9635 int vec_enc = vector_length_encoding(this);
9636 BasicType bt = Matcher::vector_element_basic_type(this);
9637 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9638 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9639 %}
9640 ins_pipe( pipe_slow );
9641 %}
9642
9643 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
9644 predicate(VM_Version::supports_gfni());
9645 match(Set dst (ReverseV src));
9646 effect(TEMP dst, TEMP xtmp);
9647 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
9648 ins_encode %{
9649 int vec_enc = vector_length_encoding(this);
9650 BasicType bt = Matcher::vector_element_basic_type(this);
9651 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
9652 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
9653 $xtmp$$XMMRegister);
9654 %}
9655 ins_pipe( pipe_slow );
9656 %}
9657
9658 instruct vreverse_byte_reg(vec dst, vec src) %{
9659 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
9660 match(Set dst (ReverseBytesV src));
9661 effect(TEMP dst);
9662 format %{ "vector_reverse_byte $dst, $src" %}
9663 ins_encode %{
9664 int vec_enc = vector_length_encoding(this);
9665 BasicType bt = Matcher::vector_element_basic_type(this);
9666 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
9667 %}
9668 ins_pipe( pipe_slow );
9669 %}
9670
9671 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9672 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
9673 match(Set dst (ReverseBytesV src));
9674 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9675 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9676 ins_encode %{
9677 int vec_enc = vector_length_encoding(this);
9678 BasicType bt = Matcher::vector_element_basic_type(this);
9679 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9680 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9681 %}
9682 ins_pipe( pipe_slow );
9683 %}
9684
9685 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
9686
9687 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
9688 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9689 Matcher::vector_length_in_bytes(n->in(1))));
9690 match(Set dst (CountLeadingZerosV src));
9691 format %{ "vector_count_leading_zeros $dst, $src" %}
9692 ins_encode %{
9693 int vlen_enc = vector_length_encoding(this, $src);
9694 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9695 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9696 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
9697 %}
9698 ins_pipe( pipe_slow );
9699 %}
9700
9701 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
9702 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9703 Matcher::vector_length_in_bytes(n->in(1))));
9704 match(Set dst (CountLeadingZerosV src mask));
9705 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
9706 ins_encode %{
9707 int vlen_enc = vector_length_encoding(this, $src);
9708 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9709 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9710 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
9711 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
9712 %}
9713 ins_pipe( pipe_slow );
9714 %}
9715
9716 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
9717 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9718 VM_Version::supports_avx512cd() &&
9719 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9720 match(Set dst (CountLeadingZerosV src));
9721 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
9722 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
9723 ins_encode %{
9724 int vlen_enc = vector_length_encoding(this, $src);
9725 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9726 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9727 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
9728 %}
9729 ins_pipe( pipe_slow );
9730 %}
9731
9732 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
9733 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9734 match(Set dst (CountLeadingZerosV src));
9735 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
9736 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
9737 ins_encode %{
9738 int vlen_enc = vector_length_encoding(this, $src);
9739 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9740 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9741 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
9742 $rtmp$$Register, true, vlen_enc);
9743 %}
9744 ins_pipe( pipe_slow );
9745 %}
9746
9747 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
9748 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
9749 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9750 match(Set dst (CountLeadingZerosV src));
9751 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
9752 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
9753 ins_encode %{
9754 int vlen_enc = vector_length_encoding(this, $src);
9755 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9756 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9757 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
9758 %}
9759 ins_pipe( pipe_slow );
9760 %}
9761
9762 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9763 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
9764 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9765 match(Set dst (CountLeadingZerosV src));
9766 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9767 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9768 ins_encode %{
9769 int vlen_enc = vector_length_encoding(this, $src);
9770 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9771 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9772 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9773 %}
9774 ins_pipe( pipe_slow );
9775 %}
9776
9777 // ---------------------------------- Vector Masked Operations ------------------------------------
9778
9779 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
9780 match(Set dst (AddVB (Binary dst src2) mask));
9781 match(Set dst (AddVS (Binary dst src2) mask));
9782 match(Set dst (AddVI (Binary dst src2) mask));
9783 match(Set dst (AddVL (Binary dst src2) mask));
9784 match(Set dst (AddVF (Binary dst src2) mask));
9785 match(Set dst (AddVD (Binary dst src2) mask));
9786 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9787 ins_encode %{
9788 int vlen_enc = vector_length_encoding(this);
9789 BasicType bt = Matcher::vector_element_basic_type(this);
9790 int opc = this->ideal_Opcode();
9791 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9792 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9793 %}
9794 ins_pipe( pipe_slow );
9795 %}
9796
9797 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
9798 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
9799 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
9800 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
9801 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
9802 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
9803 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
9804 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9805 ins_encode %{
9806 int vlen_enc = vector_length_encoding(this);
9807 BasicType bt = Matcher::vector_element_basic_type(this);
9808 int opc = this->ideal_Opcode();
9809 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9810 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9811 %}
9812 ins_pipe( pipe_slow );
9813 %}
9814
9815 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
9816 match(Set dst (XorV (Binary dst src2) mask));
9817 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9818 ins_encode %{
9819 int vlen_enc = vector_length_encoding(this);
9820 BasicType bt = Matcher::vector_element_basic_type(this);
9821 int opc = this->ideal_Opcode();
9822 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9823 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9824 %}
9825 ins_pipe( pipe_slow );
9826 %}
9827
9828 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
9829 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
9830 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9831 ins_encode %{
9832 int vlen_enc = vector_length_encoding(this);
9833 BasicType bt = Matcher::vector_element_basic_type(this);
9834 int opc = this->ideal_Opcode();
9835 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9836 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9837 %}
9838 ins_pipe( pipe_slow );
9839 %}
9840
9841 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
9842 match(Set dst (OrV (Binary dst src2) mask));
9843 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9844 ins_encode %{
9845 int vlen_enc = vector_length_encoding(this);
9846 BasicType bt = Matcher::vector_element_basic_type(this);
9847 int opc = this->ideal_Opcode();
9848 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9849 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9850 %}
9851 ins_pipe( pipe_slow );
9852 %}
9853
9854 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
9855 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
9856 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9857 ins_encode %{
9858 int vlen_enc = vector_length_encoding(this);
9859 BasicType bt = Matcher::vector_element_basic_type(this);
9860 int opc = this->ideal_Opcode();
9861 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9862 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9863 %}
9864 ins_pipe( pipe_slow );
9865 %}
9866
9867 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
9868 match(Set dst (AndV (Binary dst src2) mask));
9869 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9870 ins_encode %{
9871 int vlen_enc = vector_length_encoding(this);
9872 BasicType bt = Matcher::vector_element_basic_type(this);
9873 int opc = this->ideal_Opcode();
9874 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9875 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9876 %}
9877 ins_pipe( pipe_slow );
9878 %}
9879
9880 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
9881 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
9882 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9883 ins_encode %{
9884 int vlen_enc = vector_length_encoding(this);
9885 BasicType bt = Matcher::vector_element_basic_type(this);
9886 int opc = this->ideal_Opcode();
9887 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9888 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9889 %}
9890 ins_pipe( pipe_slow );
9891 %}
9892
9893 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
9894 match(Set dst (SubVB (Binary dst src2) mask));
9895 match(Set dst (SubVS (Binary dst src2) mask));
9896 match(Set dst (SubVI (Binary dst src2) mask));
9897 match(Set dst (SubVL (Binary dst src2) mask));
9898 match(Set dst (SubVF (Binary dst src2) mask));
9899 match(Set dst (SubVD (Binary dst src2) mask));
9900 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9901 ins_encode %{
9902 int vlen_enc = vector_length_encoding(this);
9903 BasicType bt = Matcher::vector_element_basic_type(this);
9904 int opc = this->ideal_Opcode();
9905 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9906 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9907 %}
9908 ins_pipe( pipe_slow );
9909 %}
9910
9911 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
9912 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
9913 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
9914 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
9915 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
9916 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
9917 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
9918 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9919 ins_encode %{
9920 int vlen_enc = vector_length_encoding(this);
9921 BasicType bt = Matcher::vector_element_basic_type(this);
9922 int opc = this->ideal_Opcode();
9923 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9924 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9925 %}
9926 ins_pipe( pipe_slow );
9927 %}
9928
9929 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
9930 match(Set dst (MulVS (Binary dst src2) mask));
9931 match(Set dst (MulVI (Binary dst src2) mask));
9932 match(Set dst (MulVL (Binary dst src2) mask));
9933 match(Set dst (MulVF (Binary dst src2) mask));
9934 match(Set dst (MulVD (Binary dst src2) mask));
9935 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9936 ins_encode %{
9937 int vlen_enc = vector_length_encoding(this);
9938 BasicType bt = Matcher::vector_element_basic_type(this);
9939 int opc = this->ideal_Opcode();
9940 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9941 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9942 %}
9943 ins_pipe( pipe_slow );
9944 %}
9945
9946 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
9947 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
9948 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
9949 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
9950 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
9951 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
9952 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9953 ins_encode %{
9954 int vlen_enc = vector_length_encoding(this);
9955 BasicType bt = Matcher::vector_element_basic_type(this);
9956 int opc = this->ideal_Opcode();
9957 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9958 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9959 %}
9960 ins_pipe( pipe_slow );
9961 %}
9962
9963 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
9964 match(Set dst (SqrtVF dst mask));
9965 match(Set dst (SqrtVD dst mask));
9966 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
9967 ins_encode %{
9968 int vlen_enc = vector_length_encoding(this);
9969 BasicType bt = Matcher::vector_element_basic_type(this);
9970 int opc = this->ideal_Opcode();
9971 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9972 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
9973 %}
9974 ins_pipe( pipe_slow );
9975 %}
9976
9977 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
9978 match(Set dst (DivVF (Binary dst src2) mask));
9979 match(Set dst (DivVD (Binary dst src2) mask));
9980 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9981 ins_encode %{
9982 int vlen_enc = vector_length_encoding(this);
9983 BasicType bt = Matcher::vector_element_basic_type(this);
9984 int opc = this->ideal_Opcode();
9985 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9986 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9987 %}
9988 ins_pipe( pipe_slow );
9989 %}
9990
9991 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
9992 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
9993 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
9994 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9995 ins_encode %{
9996 int vlen_enc = vector_length_encoding(this);
9997 BasicType bt = Matcher::vector_element_basic_type(this);
9998 int opc = this->ideal_Opcode();
9999 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10000 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10001 %}
10002 ins_pipe( pipe_slow );
10003 %}
10004
10005
10006 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
10007 match(Set dst (RotateLeftV (Binary dst shift) mask));
10008 match(Set dst (RotateRightV (Binary dst shift) mask));
10009 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
10010 ins_encode %{
10011 int vlen_enc = vector_length_encoding(this);
10012 BasicType bt = Matcher::vector_element_basic_type(this);
10013 int opc = this->ideal_Opcode();
10014 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10015 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10016 %}
10017 ins_pipe( pipe_slow );
10018 %}
10019
10020 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
10021 match(Set dst (RotateLeftV (Binary dst src2) mask));
10022 match(Set dst (RotateRightV (Binary dst src2) mask));
10023 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
10024 ins_encode %{
10025 int vlen_enc = vector_length_encoding(this);
10026 BasicType bt = Matcher::vector_element_basic_type(this);
10027 int opc = this->ideal_Opcode();
10028 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10029 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10030 %}
10031 ins_pipe( pipe_slow );
10032 %}
10033
10034 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10035 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
10036 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
10037 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
10038 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
10039 ins_encode %{
10040 int vlen_enc = vector_length_encoding(this);
10041 BasicType bt = Matcher::vector_element_basic_type(this);
10042 int opc = this->ideal_Opcode();
10043 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10044 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10045 %}
10046 ins_pipe( pipe_slow );
10047 %}
10048
10049 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
10050 predicate(!n->as_ShiftV()->is_var_shift());
10051 match(Set dst (LShiftVS (Binary dst src2) mask));
10052 match(Set dst (LShiftVI (Binary dst src2) mask));
10053 match(Set dst (LShiftVL (Binary dst src2) mask));
10054 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
10055 ins_encode %{
10056 int vlen_enc = vector_length_encoding(this);
10057 BasicType bt = Matcher::vector_element_basic_type(this);
10058 int opc = this->ideal_Opcode();
10059 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10060 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10061 %}
10062 ins_pipe( pipe_slow );
10063 %}
10064
10065 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10066 predicate(n->as_ShiftV()->is_var_shift());
10067 match(Set dst (LShiftVS (Binary dst src2) mask));
10068 match(Set dst (LShiftVI (Binary dst src2) mask));
10069 match(Set dst (LShiftVL (Binary dst src2) mask));
10070 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
10071 ins_encode %{
10072 int vlen_enc = vector_length_encoding(this);
10073 BasicType bt = Matcher::vector_element_basic_type(this);
10074 int opc = this->ideal_Opcode();
10075 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10076 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10077 %}
10078 ins_pipe( pipe_slow );
10079 %}
10080
10081 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10082 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
10083 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
10084 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
10085 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
10086 ins_encode %{
10087 int vlen_enc = vector_length_encoding(this);
10088 BasicType bt = Matcher::vector_element_basic_type(this);
10089 int opc = this->ideal_Opcode();
10090 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10091 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10092 %}
10093 ins_pipe( pipe_slow );
10094 %}
10095
10096 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
10097 predicate(!n->as_ShiftV()->is_var_shift());
10098 match(Set dst (RShiftVS (Binary dst src2) mask));
10099 match(Set dst (RShiftVI (Binary dst src2) mask));
10100 match(Set dst (RShiftVL (Binary dst src2) mask));
10101 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
10102 ins_encode %{
10103 int vlen_enc = vector_length_encoding(this);
10104 BasicType bt = Matcher::vector_element_basic_type(this);
10105 int opc = this->ideal_Opcode();
10106 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10107 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10108 %}
10109 ins_pipe( pipe_slow );
10110 %}
10111
10112 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10113 predicate(n->as_ShiftV()->is_var_shift());
10114 match(Set dst (RShiftVS (Binary dst src2) mask));
10115 match(Set dst (RShiftVI (Binary dst src2) mask));
10116 match(Set dst (RShiftVL (Binary dst src2) mask));
10117 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
10118 ins_encode %{
10119 int vlen_enc = vector_length_encoding(this);
10120 BasicType bt = Matcher::vector_element_basic_type(this);
10121 int opc = this->ideal_Opcode();
10122 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10123 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10124 %}
10125 ins_pipe( pipe_slow );
10126 %}
10127
10128 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10129 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
10130 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
10131 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
10132 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
10133 ins_encode %{
10134 int vlen_enc = vector_length_encoding(this);
10135 BasicType bt = Matcher::vector_element_basic_type(this);
10136 int opc = this->ideal_Opcode();
10137 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10138 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10139 %}
10140 ins_pipe( pipe_slow );
10141 %}
10142
10143 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
10144 predicate(!n->as_ShiftV()->is_var_shift());
10145 match(Set dst (URShiftVS (Binary dst src2) mask));
10146 match(Set dst (URShiftVI (Binary dst src2) mask));
10147 match(Set dst (URShiftVL (Binary dst src2) mask));
10148 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
10149 ins_encode %{
10150 int vlen_enc = vector_length_encoding(this);
10151 BasicType bt = Matcher::vector_element_basic_type(this);
10152 int opc = this->ideal_Opcode();
10153 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10154 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10155 %}
10156 ins_pipe( pipe_slow );
10157 %}
10158
10159 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10160 predicate(n->as_ShiftV()->is_var_shift());
10161 match(Set dst (URShiftVS (Binary dst src2) mask));
10162 match(Set dst (URShiftVI (Binary dst src2) mask));
10163 match(Set dst (URShiftVL (Binary dst src2) mask));
10164 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
10165 ins_encode %{
10166 int vlen_enc = vector_length_encoding(this);
10167 BasicType bt = Matcher::vector_element_basic_type(this);
10168 int opc = this->ideal_Opcode();
10169 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10170 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10171 %}
10172 ins_pipe( pipe_slow );
10173 %}
10174
10175 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
10176 match(Set dst (MaxV (Binary dst src2) mask));
10177 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10178 ins_encode %{
10179 int vlen_enc = vector_length_encoding(this);
10180 BasicType bt = Matcher::vector_element_basic_type(this);
10181 int opc = this->ideal_Opcode();
10182 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10183 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10184 %}
10185 ins_pipe( pipe_slow );
10186 %}
10187
10188 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
10189 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
10190 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10191 ins_encode %{
10192 int vlen_enc = vector_length_encoding(this);
10193 BasicType bt = Matcher::vector_element_basic_type(this);
10194 int opc = this->ideal_Opcode();
10195 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10196 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10197 %}
10198 ins_pipe( pipe_slow );
10199 %}
10200
10201 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
10202 match(Set dst (MinV (Binary dst src2) mask));
10203 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10204 ins_encode %{
10205 int vlen_enc = vector_length_encoding(this);
10206 BasicType bt = Matcher::vector_element_basic_type(this);
10207 int opc = this->ideal_Opcode();
10208 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10209 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10210 %}
10211 ins_pipe( pipe_slow );
10212 %}
10213
10214 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
10215 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
10216 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10217 ins_encode %{
10218 int vlen_enc = vector_length_encoding(this);
10219 BasicType bt = Matcher::vector_element_basic_type(this);
10220 int opc = this->ideal_Opcode();
10221 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10222 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10223 %}
10224 ins_pipe( pipe_slow );
10225 %}
10226
10227 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
10228 match(Set dst (VectorRearrange (Binary dst src2) mask));
10229 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
10230 ins_encode %{
10231 int vlen_enc = vector_length_encoding(this);
10232 BasicType bt = Matcher::vector_element_basic_type(this);
10233 int opc = this->ideal_Opcode();
10234 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10235 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
10236 %}
10237 ins_pipe( pipe_slow );
10238 %}
10239
10240 instruct vabs_masked(vec dst, kReg mask) %{
10241 match(Set dst (AbsVB dst mask));
10242 match(Set dst (AbsVS dst mask));
10243 match(Set dst (AbsVI dst mask));
10244 match(Set dst (AbsVL dst mask));
10245 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
10246 ins_encode %{
10247 int vlen_enc = vector_length_encoding(this);
10248 BasicType bt = Matcher::vector_element_basic_type(this);
10249 int opc = this->ideal_Opcode();
10250 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10251 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
10252 %}
10253 ins_pipe( pipe_slow );
10254 %}
10255
10256 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
10257 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
10258 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
10259 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10260 ins_encode %{
10261 assert(UseFMA, "Needs FMA instructions support.");
10262 int vlen_enc = vector_length_encoding(this);
10263 BasicType bt = Matcher::vector_element_basic_type(this);
10264 int opc = this->ideal_Opcode();
10265 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10266 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
10267 %}
10268 ins_pipe( pipe_slow );
10269 %}
10270
10271 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
10272 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
10273 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
10274 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10275 ins_encode %{
10276 assert(UseFMA, "Needs FMA instructions support.");
10277 int vlen_enc = vector_length_encoding(this);
10278 BasicType bt = Matcher::vector_element_basic_type(this);
10279 int opc = this->ideal_Opcode();
10280 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10281 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
10282 %}
10283 ins_pipe( pipe_slow );
10284 %}
10285
10286 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
10287 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
10288 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
10289 ins_encode %{
10290 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
10291 int vlen_enc = vector_length_encoding(this, $src1);
10292 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
10293
10294 // Comparison i
10295 switch (src1_elem_bt) {
10296 case T_BYTE: {
10297 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10298 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10299 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10300 break;
10301 }
10302 case T_SHORT: {
10303 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10304 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10305 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10306 break;
10307 }
10308 case T_INT: {
10309 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10310 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10311 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10312 break;
10313 }
10314 case T_LONG: {
10315 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10316 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10317 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10318 break;
10319 }
10320 case T_FLOAT: {
10321 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10322 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10323 break;
10324 }
10325 case T_DOUBLE: {
10326 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10327 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10328 break;
10329 }
10330 default: assert(false, "%s", type2name(src1_elem_bt)); break;
10331 }
10332 %}
10333 ins_pipe( pipe_slow );
10334 %}
10335
10336 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
10337 predicate(Matcher::vector_length(n) <= 32);
10338 match(Set dst (MaskAll src));
10339 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
10340 ins_encode %{
10341 int mask_len = Matcher::vector_length(this);
10342 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
10343 %}
10344 ins_pipe( pipe_slow );
10345 %}
10346
10347 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
10348 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
10349 match(Set dst (XorVMask src (MaskAll cnt)));
10350 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
10351 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
10352 ins_encode %{
10353 uint masklen = Matcher::vector_length(this);
10354 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
10355 %}
10356 ins_pipe( pipe_slow );
10357 %}
10358
10359 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
10360 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
10361 (Matcher::vector_length(n) == 16) ||
10362 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
10363 match(Set dst (XorVMask src (MaskAll cnt)));
10364 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
10365 ins_encode %{
10366 uint masklen = Matcher::vector_length(this);
10367 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
10368 %}
10369 ins_pipe( pipe_slow );
10370 %}
10371
10372 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
10373 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
10374 match(Set dst (VectorLongToMask src));
10375 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
10376 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
10377 ins_encode %{
10378 int mask_len = Matcher::vector_length(this);
10379 int vec_enc = vector_length_encoding(mask_len);
10380 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10381 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
10382 %}
10383 ins_pipe( pipe_slow );
10384 %}
10385
10386
10387 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
10388 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
10389 match(Set dst (VectorLongToMask src));
10390 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
10391 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
10392 ins_encode %{
10393 int mask_len = Matcher::vector_length(this);
10394 assert(mask_len <= 32, "invalid mask length");
10395 int vec_enc = vector_length_encoding(mask_len);
10396 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10397 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
10398 %}
10399 ins_pipe( pipe_slow );
10400 %}
10401
10402 instruct long_to_mask_evex(kReg dst, rRegL src) %{
10403 predicate(n->bottom_type()->isa_vectmask());
10404 match(Set dst (VectorLongToMask src));
10405 format %{ "long_to_mask_evex $dst, $src\t!" %}
10406 ins_encode %{
10407 __ kmov($dst$$KRegister, $src$$Register);
10408 %}
10409 ins_pipe( pipe_slow );
10410 %}
10411
10412 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
10413 match(Set dst (AndVMask src1 src2));
10414 match(Set dst (OrVMask src1 src2));
10415 match(Set dst (XorVMask src1 src2));
10416 effect(TEMP kscratch);
10417 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
10418 ins_encode %{
10419 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
10420 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
10421 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
10422 uint masklen = Matcher::vector_length(this);
10423 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
10424 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
10425 %}
10426 ins_pipe( pipe_slow );
10427 %}
10428
10429 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
10430 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10431 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10432 ins_encode %{
10433 int vlen_enc = vector_length_encoding(this);
10434 BasicType bt = Matcher::vector_element_basic_type(this);
10435 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10436 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
10437 %}
10438 ins_pipe( pipe_slow );
10439 %}
10440
10441 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
10442 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10443 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10444 ins_encode %{
10445 int vlen_enc = vector_length_encoding(this);
10446 BasicType bt = Matcher::vector_element_basic_type(this);
10447 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10448 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
10449 %}
10450 ins_pipe( pipe_slow );
10451 %}
10452
10453 instruct castMM(kReg dst)
10454 %{
10455 match(Set dst (CastVV dst));
10456
10457 size(0);
10458 format %{ "# castVV of $dst" %}
10459 ins_encode(/* empty encoding */);
10460 ins_cost(0);
10461 ins_pipe(empty);
10462 %}
10463
10464 instruct castVV(vec dst)
10465 %{
10466 match(Set dst (CastVV dst));
10467
10468 size(0);
10469 format %{ "# castVV of $dst" %}
10470 ins_encode(/* empty encoding */);
10471 ins_cost(0);
10472 ins_pipe(empty);
10473 %}
10474
10475 instruct castVVLeg(legVec dst)
10476 %{
10477 match(Set dst (CastVV dst));
10478
10479 size(0);
10480 format %{ "# castVV of $dst" %}
10481 ins_encode(/* empty encoding */);
10482 ins_cost(0);
10483 ins_pipe(empty);
10484 %}
10485
10486 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
10487 %{
10488 match(Set dst (IsInfiniteF src));
10489 effect(TEMP ktmp, KILL cr);
10490 format %{ "float_class_check $dst, $src" %}
10491 ins_encode %{
10492 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10493 __ kmovbl($dst$$Register, $ktmp$$KRegister);
10494 %}
10495 ins_pipe(pipe_slow);
10496 %}
10497
10498 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
10499 %{
10500 match(Set dst (IsInfiniteD src));
10501 effect(TEMP ktmp, KILL cr);
10502 format %{ "double_class_check $dst, $src" %}
10503 ins_encode %{
10504 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10505 __ kmovbl($dst$$Register, $ktmp$$KRegister);
10506 %}
10507 ins_pipe(pipe_slow);
10508 %}
10509
10510 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
10511 %{
10512 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10513 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10514 match(Set dst (SaturatingAddV src1 src2));
10515 match(Set dst (SaturatingSubV src1 src2));
10516 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
10517 ins_encode %{
10518 int vlen_enc = vector_length_encoding(this);
10519 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10520 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10521 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
10522 %}
10523 ins_pipe(pipe_slow);
10524 %}
10525
10526 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
10527 %{
10528 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10529 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10530 match(Set dst (SaturatingAddV src1 src2));
10531 match(Set dst (SaturatingSubV src1 src2));
10532 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
10533 ins_encode %{
10534 int vlen_enc = vector_length_encoding(this);
10535 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10536 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10537 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10538 %}
10539 ins_pipe(pipe_slow);
10540 %}
10541
10542 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
10543 %{
10544 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10545 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
10546 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10547 match(Set dst (SaturatingAddV src1 src2));
10548 match(Set dst (SaturatingSubV src1 src2));
10549 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
10550 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
10551 ins_encode %{
10552 int vlen_enc = vector_length_encoding(this);
10553 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10554 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10555 $src1$$XMMRegister, $src2$$XMMRegister,
10556 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
10557 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
10558 %}
10559 ins_pipe(pipe_slow);
10560 %}
10561
10562 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
10563 %{
10564 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10565 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
10566 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10567 match(Set dst (SaturatingAddV src1 src2));
10568 match(Set dst (SaturatingSubV src1 src2));
10569 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
10570 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
10571 ins_encode %{
10572 int vlen_enc = vector_length_encoding(this);
10573 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10574 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
10575 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
10576 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
10577 %}
10578 ins_pipe(pipe_slow);
10579 %}
10580
10581 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
10582 %{
10583 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10584 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10585 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10586 match(Set dst (SaturatingAddV src1 src2));
10587 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
10588 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
10589 ins_encode %{
10590 int vlen_enc = vector_length_encoding(this);
10591 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10592 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10593 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
10594 %}
10595 ins_pipe(pipe_slow);
10596 %}
10597
10598 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
10599 %{
10600 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10601 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10602 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10603 match(Set dst (SaturatingAddV src1 src2));
10604 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
10605 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
10606 ins_encode %{
10607 int vlen_enc = vector_length_encoding(this);
10608 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10609 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10610 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
10611 %}
10612 ins_pipe(pipe_slow);
10613 %}
10614
10615 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
10616 %{
10617 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10618 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10619 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10620 match(Set dst (SaturatingSubV src1 src2));
10621 effect(TEMP ktmp);
10622 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
10623 ins_encode %{
10624 int vlen_enc = vector_length_encoding(this);
10625 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10626 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
10627 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
10628 %}
10629 ins_pipe(pipe_slow);
10630 %}
10631
10632 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
10633 %{
10634 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10635 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10636 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10637 match(Set dst (SaturatingSubV src1 src2));
10638 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
10639 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
10640 ins_encode %{
10641 int vlen_enc = vector_length_encoding(this);
10642 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10643 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10644 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
10645 %}
10646 ins_pipe(pipe_slow);
10647 %}
10648
10649 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
10650 %{
10651 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10652 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10653 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
10654 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
10655 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
10656 ins_encode %{
10657 int vlen_enc = vector_length_encoding(this);
10658 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10659 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10660 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
10661 %}
10662 ins_pipe(pipe_slow);
10663 %}
10664
10665 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
10666 %{
10667 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10668 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10669 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
10670 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
10671 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
10672 ins_encode %{
10673 int vlen_enc = vector_length_encoding(this);
10674 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10675 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10676 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
10677 %}
10678 ins_pipe(pipe_slow);
10679 %}
10680
10681 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
10682 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10683 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10684 match(Set dst (SaturatingAddV (Binary dst src) mask));
10685 match(Set dst (SaturatingSubV (Binary dst src) mask));
10686 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
10687 ins_encode %{
10688 int vlen_enc = vector_length_encoding(this);
10689 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10690 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10691 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
10692 %}
10693 ins_pipe( pipe_slow );
10694 %}
10695
10696 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
10697 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10698 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10699 match(Set dst (SaturatingAddV (Binary dst src) mask));
10700 match(Set dst (SaturatingSubV (Binary dst src) mask));
10701 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
10702 ins_encode %{
10703 int vlen_enc = vector_length_encoding(this);
10704 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10705 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10706 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
10707 %}
10708 ins_pipe( pipe_slow );
10709 %}
10710
10711 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
10712 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10713 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10714 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
10715 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
10716 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
10717 ins_encode %{
10718 int vlen_enc = vector_length_encoding(this);
10719 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10720 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10721 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
10722 %}
10723 ins_pipe( pipe_slow );
10724 %}
10725
10726 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
10727 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10728 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10729 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
10730 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
10731 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
10732 ins_encode %{
10733 int vlen_enc = vector_length_encoding(this);
10734 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10735 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10736 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
10737 %}
10738 ins_pipe( pipe_slow );
10739 %}
10740
10741 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
10742 %{
10743 match(Set index (SelectFromTwoVector (Binary index src1) src2));
10744 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
10745 ins_encode %{
10746 int vlen_enc = vector_length_encoding(this);
10747 BasicType bt = Matcher::vector_element_basic_type(this);
10748 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
10749 %}
10750 ins_pipe(pipe_slow);
10751 %}
10752
10753 instruct reinterpretS2HF(regF dst, rRegI src)
10754 %{
10755 match(Set dst (ReinterpretS2HF src));
10756 format %{ "vmovw $dst, $src" %}
10757 ins_encode %{
10758 __ vmovw($dst$$XMMRegister, $src$$Register);
10759 %}
10760 ins_pipe(pipe_slow);
10761 %}
10762
10763 instruct reinterpretHF2S(rRegI dst, regF src)
10764 %{
10765 match(Set dst (ReinterpretHF2S src));
10766 format %{ "vmovw $dst, $src" %}
10767 ins_encode %{
10768 __ vmovw($dst$$Register, $src$$XMMRegister);
10769 %}
10770 ins_pipe(pipe_slow);
10771 %}
10772
10773 instruct convF2HFAndS2HF(regF dst, regF src)
10774 %{
10775 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
10776 format %{ "convF2HFAndS2HF $dst, $src" %}
10777 ins_encode %{
10778 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
10779 %}
10780 ins_pipe(pipe_slow);
10781 %}
10782
10783 instruct convHF2SAndHF2F(regF dst, regF src)
10784 %{
10785 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
10786 format %{ "convHF2SAndHF2F $dst, $src" %}
10787 ins_encode %{
10788 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
10789 %}
10790 ins_pipe(pipe_slow);
10791 %}
10792
10793 instruct scalar_sqrt_HF_reg(regF dst, regF src)
10794 %{
10795 match(Set dst (SqrtHF src));
10796 format %{ "scalar_sqrt_fp16 $dst, $src" %}
10797 ins_encode %{
10798 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
10799 %}
10800 ins_pipe(pipe_slow);
10801 %}
10802
10803 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
10804 %{
10805 match(Set dst (AddHF src1 src2));
10806 match(Set dst (DivHF src1 src2));
10807 match(Set dst (MulHF src1 src2));
10808 match(Set dst (SubHF src1 src2));
10809 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
10810 ins_encode %{
10811 int opcode = this->ideal_Opcode();
10812 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
10813 %}
10814 ins_pipe(pipe_slow);
10815 %}
10816
10817 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
10818 %{
10819 predicate(VM_Version::supports_avx10_2());
10820 match(Set dst (MaxHF src1 src2));
10821 match(Set dst (MinHF src1 src2));
10822 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
10823 ins_encode %{
10824 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10825 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
10826 %}
10827 ins_pipe( pipe_slow );
10828 %}
10829
10830 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
10831 %{
10832 predicate(!VM_Version::supports_avx10_2());
10833 match(Set dst (MaxHF src1 src2));
10834 match(Set dst (MinHF src1 src2));
10835 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
10836 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
10837 ins_encode %{
10838 int opcode = this->ideal_Opcode();
10839 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
10840 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
10841 %}
10842 ins_pipe( pipe_slow );
10843 %}
10844
10845 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
10846 %{
10847 match(Set dst (FmaHF src2 (Binary dst src1)));
10848 effect(DEF dst);
10849 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10850 ins_encode %{
10851 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
10852 %}
10853 ins_pipe( pipe_slow );
10854 %}
10855
10856
10857 instruct vector_sqrt_HF_reg(vec dst, vec src)
10858 %{
10859 match(Set dst (SqrtVHF src));
10860 format %{ "vector_sqrt_fp16 $dst, $src" %}
10861 ins_encode %{
10862 int vlen_enc = vector_length_encoding(this);
10863 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
10864 %}
10865 ins_pipe(pipe_slow);
10866 %}
10867
10868 instruct vector_sqrt_HF_mem(vec dst, memory src)
10869 %{
10870 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
10871 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
10872 ins_encode %{
10873 int vlen_enc = vector_length_encoding(this);
10874 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
10875 %}
10876 ins_pipe(pipe_slow);
10877 %}
10878
10879 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
10880 %{
10881 match(Set dst (AddVHF src1 src2));
10882 match(Set dst (DivVHF src1 src2));
10883 match(Set dst (MulVHF src1 src2));
10884 match(Set dst (SubVHF src1 src2));
10885 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
10886 ins_encode %{
10887 int vlen_enc = vector_length_encoding(this);
10888 int opcode = this->ideal_Opcode();
10889 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
10890 %}
10891 ins_pipe(pipe_slow);
10892 %}
10893
10894
10895 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
10896 %{
10897 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
10898 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
10899 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
10900 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
10901 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
10902 ins_encode %{
10903 int vlen_enc = vector_length_encoding(this);
10904 int opcode = this->ideal_Opcode();
10905 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
10906 %}
10907 ins_pipe(pipe_slow);
10908 %}
10909
10910 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
10911 %{
10912 match(Set dst (FmaVHF src2 (Binary dst src1)));
10913 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10914 ins_encode %{
10915 int vlen_enc = vector_length_encoding(this);
10916 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
10917 %}
10918 ins_pipe( pipe_slow );
10919 %}
10920
10921 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
10922 %{
10923 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
10924 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10925 ins_encode %{
10926 int vlen_enc = vector_length_encoding(this);
10927 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
10928 %}
10929 ins_pipe( pipe_slow );
10930 %}
10931
10932 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
10933 %{
10934 predicate(VM_Version::supports_avx10_2());
10935 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
10936 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
10937 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
10938 ins_encode %{
10939 int vlen_enc = vector_length_encoding(this);
10940 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10941 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
10942 %}
10943 ins_pipe( pipe_slow );
10944 %}
10945
10946 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
10947 %{
10948 predicate(VM_Version::supports_avx10_2());
10949 match(Set dst (MinVHF src1 src2));
10950 match(Set dst (MaxVHF src1 src2));
10951 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
10952 ins_encode %{
10953 int vlen_enc = vector_length_encoding(this);
10954 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10955 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
10956 %}
10957 ins_pipe( pipe_slow );
10958 %}
10959
10960 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
10961 %{
10962 predicate(!VM_Version::supports_avx10_2());
10963 match(Set dst (MinVHF src1 src2));
10964 match(Set dst (MaxVHF src1 src2));
10965 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
10966 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
10967 ins_encode %{
10968 int vlen_enc = vector_length_encoding(this);
10969 int opcode = this->ideal_Opcode();
10970 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
10971 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
10972 %}
10973 ins_pipe( pipe_slow );
10974 %}