1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 Common Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // For pre EVEX enabled architectures:
68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
69 // For EVEX enabled architectures:
70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
71 //
72 // Linux ABI: No register preserved across function calls
73 // XMM0-XMM7 might hold parameters
74 // Windows ABI: XMM6-XMM15 preserved across function calls
75 // XMM0-XMM3 might hold parameters
76
77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
93
94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
110
111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
127
128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
144
145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
161
162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
178
179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
195
196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
212
213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
229
230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
246
247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
263
264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
280
281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
297
298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
314
315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
331
332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
348
349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
365
366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
382
383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
399
400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
416
417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
433
434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
450
451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
467
468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
484
485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
501
502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
518
519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
535
536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
552
553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
569
570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
586
587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
603
604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
620
621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
622
623 // AVX3 Mask Registers.
624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
626
627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
629
630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
632
633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
635
636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
638
639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
641
642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
644
645
646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
678
679 alloc_class chunk2(K7, K7_H,
680 K6, K6_H,
681 K5, K5_H,
682 K4, K4_H,
683 K3, K3_H,
684 K2, K2_H,
685 K1, K1_H);
686
687 reg_class vectmask_reg(K1, K1_H,
688 K2, K2_H,
689 K3, K3_H,
690 K4, K4_H,
691 K5, K5_H,
692 K6, K6_H,
693 K7, K7_H);
694
695 reg_class vectmask_reg_K1(K1, K1_H);
696 reg_class vectmask_reg_K2(K2, K2_H);
697 reg_class vectmask_reg_K3(K3, K3_H);
698 reg_class vectmask_reg_K4(K4, K4_H);
699 reg_class vectmask_reg_K5(K5, K5_H);
700 reg_class vectmask_reg_K6(K6, K6_H);
701 reg_class vectmask_reg_K7(K7, K7_H);
702
703 // flags allocation class should be last.
704 alloc_class chunk3(RFLAGS);
705
706
707 // Singleton class for condition codes
708 reg_class int_flags(RFLAGS);
709
710 // Class for pre evex float registers
711 reg_class float_reg_legacy(XMM0,
712 XMM1,
713 XMM2,
714 XMM3,
715 XMM4,
716 XMM5,
717 XMM6,
718 XMM7,
719 XMM8,
720 XMM9,
721 XMM10,
722 XMM11,
723 XMM12,
724 XMM13,
725 XMM14,
726 XMM15);
727
728 // Class for evex float registers
729 reg_class float_reg_evex(XMM0,
730 XMM1,
731 XMM2,
732 XMM3,
733 XMM4,
734 XMM5,
735 XMM6,
736 XMM7,
737 XMM8,
738 XMM9,
739 XMM10,
740 XMM11,
741 XMM12,
742 XMM13,
743 XMM14,
744 XMM15,
745 XMM16,
746 XMM17,
747 XMM18,
748 XMM19,
749 XMM20,
750 XMM21,
751 XMM22,
752 XMM23,
753 XMM24,
754 XMM25,
755 XMM26,
756 XMM27,
757 XMM28,
758 XMM29,
759 XMM30,
760 XMM31);
761
762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
764
765 // Class for pre evex double registers
766 reg_class double_reg_legacy(XMM0, XMM0b,
767 XMM1, XMM1b,
768 XMM2, XMM2b,
769 XMM3, XMM3b,
770 XMM4, XMM4b,
771 XMM5, XMM5b,
772 XMM6, XMM6b,
773 XMM7, XMM7b,
774 XMM8, XMM8b,
775 XMM9, XMM9b,
776 XMM10, XMM10b,
777 XMM11, XMM11b,
778 XMM12, XMM12b,
779 XMM13, XMM13b,
780 XMM14, XMM14b,
781 XMM15, XMM15b);
782
783 // Class for evex double registers
784 reg_class double_reg_evex(XMM0, XMM0b,
785 XMM1, XMM1b,
786 XMM2, XMM2b,
787 XMM3, XMM3b,
788 XMM4, XMM4b,
789 XMM5, XMM5b,
790 XMM6, XMM6b,
791 XMM7, XMM7b,
792 XMM8, XMM8b,
793 XMM9, XMM9b,
794 XMM10, XMM10b,
795 XMM11, XMM11b,
796 XMM12, XMM12b,
797 XMM13, XMM13b,
798 XMM14, XMM14b,
799 XMM15, XMM15b,
800 XMM16, XMM16b,
801 XMM17, XMM17b,
802 XMM18, XMM18b,
803 XMM19, XMM19b,
804 XMM20, XMM20b,
805 XMM21, XMM21b,
806 XMM22, XMM22b,
807 XMM23, XMM23b,
808 XMM24, XMM24b,
809 XMM25, XMM25b,
810 XMM26, XMM26b,
811 XMM27, XMM27b,
812 XMM28, XMM28b,
813 XMM29, XMM29b,
814 XMM30, XMM30b,
815 XMM31, XMM31b);
816
817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
819
820 // Class for pre evex 32bit vector registers
821 reg_class vectors_reg_legacy(XMM0,
822 XMM1,
823 XMM2,
824 XMM3,
825 XMM4,
826 XMM5,
827 XMM6,
828 XMM7,
829 XMM8,
830 XMM9,
831 XMM10,
832 XMM11,
833 XMM12,
834 XMM13,
835 XMM14,
836 XMM15);
837
838 // Class for evex 32bit vector registers
839 reg_class vectors_reg_evex(XMM0,
840 XMM1,
841 XMM2,
842 XMM3,
843 XMM4,
844 XMM5,
845 XMM6,
846 XMM7,
847 XMM8,
848 XMM9,
849 XMM10,
850 XMM11,
851 XMM12,
852 XMM13,
853 XMM14,
854 XMM15,
855 XMM16,
856 XMM17,
857 XMM18,
858 XMM19,
859 XMM20,
860 XMM21,
861 XMM22,
862 XMM23,
863 XMM24,
864 XMM25,
865 XMM26,
866 XMM27,
867 XMM28,
868 XMM29,
869 XMM30,
870 XMM31);
871
872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
874
875 // Class for all 64bit vector registers
876 reg_class vectord_reg_legacy(XMM0, XMM0b,
877 XMM1, XMM1b,
878 XMM2, XMM2b,
879 XMM3, XMM3b,
880 XMM4, XMM4b,
881 XMM5, XMM5b,
882 XMM6, XMM6b,
883 XMM7, XMM7b,
884 XMM8, XMM8b,
885 XMM9, XMM9b,
886 XMM10, XMM10b,
887 XMM11, XMM11b,
888 XMM12, XMM12b,
889 XMM13, XMM13b,
890 XMM14, XMM14b,
891 XMM15, XMM15b);
892
893 // Class for all 64bit vector registers
894 reg_class vectord_reg_evex(XMM0, XMM0b,
895 XMM1, XMM1b,
896 XMM2, XMM2b,
897 XMM3, XMM3b,
898 XMM4, XMM4b,
899 XMM5, XMM5b,
900 XMM6, XMM6b,
901 XMM7, XMM7b,
902 XMM8, XMM8b,
903 XMM9, XMM9b,
904 XMM10, XMM10b,
905 XMM11, XMM11b,
906 XMM12, XMM12b,
907 XMM13, XMM13b,
908 XMM14, XMM14b,
909 XMM15, XMM15b,
910 XMM16, XMM16b,
911 XMM17, XMM17b,
912 XMM18, XMM18b,
913 XMM19, XMM19b,
914 XMM20, XMM20b,
915 XMM21, XMM21b,
916 XMM22, XMM22b,
917 XMM23, XMM23b,
918 XMM24, XMM24b,
919 XMM25, XMM25b,
920 XMM26, XMM26b,
921 XMM27, XMM27b,
922 XMM28, XMM28b,
923 XMM29, XMM29b,
924 XMM30, XMM30b,
925 XMM31, XMM31b);
926
927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
929
930 // Class for all 128bit vector registers
931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
932 XMM1, XMM1b, XMM1c, XMM1d,
933 XMM2, XMM2b, XMM2c, XMM2d,
934 XMM3, XMM3b, XMM3c, XMM3d,
935 XMM4, XMM4b, XMM4c, XMM4d,
936 XMM5, XMM5b, XMM5c, XMM5d,
937 XMM6, XMM6b, XMM6c, XMM6d,
938 XMM7, XMM7b, XMM7c, XMM7d,
939 XMM8, XMM8b, XMM8c, XMM8d,
940 XMM9, XMM9b, XMM9c, XMM9d,
941 XMM10, XMM10b, XMM10c, XMM10d,
942 XMM11, XMM11b, XMM11c, XMM11d,
943 XMM12, XMM12b, XMM12c, XMM12d,
944 XMM13, XMM13b, XMM13c, XMM13d,
945 XMM14, XMM14b, XMM14c, XMM14d,
946 XMM15, XMM15b, XMM15c, XMM15d);
947
948 // Class for all 128bit vector registers
949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
950 XMM1, XMM1b, XMM1c, XMM1d,
951 XMM2, XMM2b, XMM2c, XMM2d,
952 XMM3, XMM3b, XMM3c, XMM3d,
953 XMM4, XMM4b, XMM4c, XMM4d,
954 XMM5, XMM5b, XMM5c, XMM5d,
955 XMM6, XMM6b, XMM6c, XMM6d,
956 XMM7, XMM7b, XMM7c, XMM7d,
957 XMM8, XMM8b, XMM8c, XMM8d,
958 XMM9, XMM9b, XMM9c, XMM9d,
959 XMM10, XMM10b, XMM10c, XMM10d,
960 XMM11, XMM11b, XMM11c, XMM11d,
961 XMM12, XMM12b, XMM12c, XMM12d,
962 XMM13, XMM13b, XMM13c, XMM13d,
963 XMM14, XMM14b, XMM14c, XMM14d,
964 XMM15, XMM15b, XMM15c, XMM15d,
965 XMM16, XMM16b, XMM16c, XMM16d,
966 XMM17, XMM17b, XMM17c, XMM17d,
967 XMM18, XMM18b, XMM18c, XMM18d,
968 XMM19, XMM19b, XMM19c, XMM19d,
969 XMM20, XMM20b, XMM20c, XMM20d,
970 XMM21, XMM21b, XMM21c, XMM21d,
971 XMM22, XMM22b, XMM22c, XMM22d,
972 XMM23, XMM23b, XMM23c, XMM23d,
973 XMM24, XMM24b, XMM24c, XMM24d,
974 XMM25, XMM25b, XMM25c, XMM25d,
975 XMM26, XMM26b, XMM26c, XMM26d,
976 XMM27, XMM27b, XMM27c, XMM27d,
977 XMM28, XMM28b, XMM28c, XMM28d,
978 XMM29, XMM29b, XMM29c, XMM29d,
979 XMM30, XMM30b, XMM30c, XMM30d,
980 XMM31, XMM31b, XMM31c, XMM31d);
981
982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
984
985 // Class for all 256bit vector registers
986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1002
1003 // Class for all 256bit vector registers
1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1036
1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1039
1040 // Class for all 512bit vector registers
1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1073
1074 // Class for restricted 512bit vector registers
1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1091
1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1094
1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1096 %}
1097
1098
1099 //----------SOURCE BLOCK-------------------------------------------------------
1100 // This is a block of C++ code which provides values, functions, and
1101 // definitions necessary in the rest of the architecture description
1102
1103 source_hpp %{
1104 // Header information of the source block.
1105 // Method declarations/definitions which are used outside
1106 // the ad-scope can conveniently be defined here.
1107 //
1108 // To keep related declarations/definitions/uses close together,
1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1110
1111 #include "runtime/vm_version.hpp"
1112
1113 class NativeJump;
1114
1115 class CallStubImpl {
1116
1117 //--------------------------------------------------------------
1118 //---< Used for optimization in Compile::shorten_branches >---
1119 //--------------------------------------------------------------
1120
1121 public:
1122 // Size of call trampoline stub.
1123 static uint size_call_trampoline() {
1124 return 0; // no call trampolines on this platform
1125 }
1126
1127 // number of relocations needed by a call trampoline stub
1128 static uint reloc_call_trampoline() {
1129 return 0; // no call trampolines on this platform
1130 }
1131 };
1132
1133 class HandlerImpl {
1134
1135 public:
1136
1137 static int emit_exception_handler(C2_MacroAssembler *masm);
1138 static int emit_deopt_handler(C2_MacroAssembler* masm);
1139
1140 static uint size_exception_handler() {
1141 // NativeCall instruction size is the same as NativeJump.
1142 // exception handler starts out as jump and can be patched to
1143 // a call be deoptimization. (4932387)
1144 // Note that this value is also credited (in output.cpp) to
1145 // the size of the code section.
1146 return NativeJump::instruction_size;
1147 }
1148
1149 static uint size_deopt_handler() {
1150 // three 5 byte instructions plus one move for unreachable address.
1151 return 15+3;
1152 }
1153 };
1154
1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
1156 switch(bytes) {
1157 case 4: // fall-through
1158 case 8: // fall-through
1159 case 16: return Assembler::AVX_128bit;
1160 case 32: return Assembler::AVX_256bit;
1161 case 64: return Assembler::AVX_512bit;
1162
1163 default: {
1164 ShouldNotReachHere();
1165 return Assembler::AVX_NoVec;
1166 }
1167 }
1168 }
1169
1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
1172 }
1173
1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
1175 uint def_idx = use->operand_index(opnd);
1176 Node* def = use->in(def_idx);
1177 return vector_length_encoding(def);
1178 }
1179
1180 static inline bool is_vector_popcount_predicate(BasicType bt) {
1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1183 }
1184
1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
1188 }
1189
1190 class Node::PD {
1191 public:
1192 enum NodeFlags {
1193 Flag_intel_jcc_erratum = Node::_last_flag << 1,
1194 Flag_sets_carry_flag = Node::_last_flag << 2,
1195 Flag_sets_parity_flag = Node::_last_flag << 3,
1196 Flag_sets_zero_flag = Node::_last_flag << 4,
1197 Flag_sets_overflow_flag = Node::_last_flag << 5,
1198 Flag_sets_sign_flag = Node::_last_flag << 6,
1199 Flag_clears_carry_flag = Node::_last_flag << 7,
1200 Flag_clears_parity_flag = Node::_last_flag << 8,
1201 Flag_clears_zero_flag = Node::_last_flag << 9,
1202 Flag_clears_overflow_flag = Node::_last_flag << 10,
1203 Flag_clears_sign_flag = Node::_last_flag << 11,
1204 _last_flag = Flag_clears_sign_flag
1205 };
1206 };
1207
1208 %} // end source_hpp
1209
1210 source %{
1211
1212 #include "opto/addnode.hpp"
1213 #include "c2_intelJccErratum_x86.hpp"
1214
1215 void PhaseOutput::pd_perform_mach_node_analysis() {
1216 if (VM_Version::has_intel_jcc_erratum()) {
1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1218 _buf_sizes._code += extra_padding;
1219 }
1220 }
1221
1222 int MachNode::pd_alignment_required() const {
1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
1225 return IntelJccErratum::largest_jcc_size() + 1;
1226 } else {
1227 return 1;
1228 }
1229 }
1230
1231 int MachNode::compute_padding(int current_offset) const {
1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1233 Compile* C = Compile::current();
1234 PhaseOutput* output = C->output();
1235 Block* block = output->block();
1236 int index = output->index();
1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1238 } else {
1239 return 0;
1240 }
1241 }
1242
1243 // Emit exception handler code.
1244 // Stuff framesize into a register and call a VM stub routine.
1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
1246
1247 // Note that the code buffer's insts_mark is always relative to insts.
1248 // That's why we must use the macroassembler to generate a handler.
1249 address base = __ start_a_stub(size_exception_handler());
1250 if (base == nullptr) {
1251 ciEnv::current()->record_failure("CodeCache is full");
1252 return 0; // CodeBuffer::expand failed
1253 }
1254 int offset = __ offset();
1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1257 __ end_a_stub();
1258 return offset;
1259 }
1260
1261 // Emit deopt handler code.
1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
1263
1264 // Note that the code buffer's insts_mark is always relative to insts.
1265 // That's why we must use the macroassembler to generate a handler.
1266 address base = __ start_a_stub(size_deopt_handler());
1267 if (base == nullptr) {
1268 ciEnv::current()->record_failure("CodeCache is full");
1269 return 0; // CodeBuffer::expand failed
1270 }
1271 int offset = __ offset();
1272
1273 address the_pc = (address) __ pc();
1274 Label next;
1275 // push a "the_pc" on the stack without destroying any registers
1276 // as they all may be live.
1277
1278 // push address of "next"
1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1280 __ bind(next);
1281 // adjust it so it matches "the_pc"
1282 __ subptr(Address(rsp, 0), __ offset() - offset);
1283
1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1286 __ end_a_stub();
1287 return offset;
1288 }
1289
1290 static Assembler::Width widthForType(BasicType bt) {
1291 if (bt == T_BYTE) {
1292 return Assembler::B;
1293 } else if (bt == T_SHORT) {
1294 return Assembler::W;
1295 } else if (bt == T_INT) {
1296 return Assembler::D;
1297 } else {
1298 assert(bt == T_LONG, "not a long: %s", type2name(bt));
1299 return Assembler::Q;
1300 }
1301 }
1302
1303 //=============================================================================
1304
1305 // Float masks come from different places depending on platform.
1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
1325
1326 //=============================================================================
1327 bool Matcher::match_rule_supported(int opcode) {
1328 if (!has_match_rule(opcode)) {
1329 return false; // no match rule present
1330 }
1331 switch (opcode) {
1332 case Op_AbsVL:
1333 case Op_StoreVectorScatter:
1334 if (UseAVX < 3) {
1335 return false;
1336 }
1337 break;
1338 case Op_PopCountI:
1339 case Op_PopCountL:
1340 if (!UsePopCountInstruction) {
1341 return false;
1342 }
1343 break;
1344 case Op_PopCountVI:
1345 if (UseAVX < 2) {
1346 return false;
1347 }
1348 break;
1349 case Op_CompressV:
1350 case Op_ExpandV:
1351 case Op_PopCountVL:
1352 if (UseAVX < 2) {
1353 return false;
1354 }
1355 break;
1356 case Op_MulVI:
1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1358 return false;
1359 }
1360 break;
1361 case Op_MulVL:
1362 if (UseSSE < 4) { // only with SSE4_1 or AVX
1363 return false;
1364 }
1365 break;
1366 case Op_MulReductionVL:
1367 if (VM_Version::supports_avx512dq() == false) {
1368 return false;
1369 }
1370 break;
1371 case Op_AbsVB:
1372 case Op_AbsVS:
1373 case Op_AbsVI:
1374 case Op_AddReductionVI:
1375 case Op_AndReductionV:
1376 case Op_OrReductionV:
1377 case Op_XorReductionV:
1378 if (UseSSE < 3) { // requires at least SSSE3
1379 return false;
1380 }
1381 break;
1382 case Op_MaxHF:
1383 case Op_MinHF:
1384 if (!VM_Version::supports_avx512vlbw()) {
1385 return false;
1386 } // fallthrough
1387 case Op_AddHF:
1388 case Op_DivHF:
1389 case Op_FmaHF:
1390 case Op_MulHF:
1391 case Op_ReinterpretS2HF:
1392 case Op_ReinterpretHF2S:
1393 case Op_SubHF:
1394 case Op_SqrtHF:
1395 if (!VM_Version::supports_avx512_fp16()) {
1396 return false;
1397 }
1398 break;
1399 case Op_VectorLoadShuffle:
1400 case Op_VectorRearrange:
1401 case Op_MulReductionVI:
1402 if (UseSSE < 4) { // requires at least SSE4
1403 return false;
1404 }
1405 break;
1406 case Op_IsInfiniteF:
1407 case Op_IsInfiniteD:
1408 if (!VM_Version::supports_avx512dq()) {
1409 return false;
1410 }
1411 break;
1412 case Op_SqrtVD:
1413 case Op_SqrtVF:
1414 case Op_VectorMaskCmp:
1415 case Op_VectorCastB2X:
1416 case Op_VectorCastS2X:
1417 case Op_VectorCastI2X:
1418 case Op_VectorCastL2X:
1419 case Op_VectorCastF2X:
1420 case Op_VectorCastD2X:
1421 case Op_VectorUCastB2X:
1422 case Op_VectorUCastS2X:
1423 case Op_VectorUCastI2X:
1424 case Op_VectorMaskCast:
1425 if (UseAVX < 1) { // enabled for AVX only
1426 return false;
1427 }
1428 break;
1429 case Op_PopulateIndex:
1430 if (UseAVX < 2) {
1431 return false;
1432 }
1433 break;
1434 case Op_RoundVF:
1435 if (UseAVX < 2) { // enabled for AVX2 only
1436 return false;
1437 }
1438 break;
1439 case Op_RoundVD:
1440 if (UseAVX < 3) {
1441 return false; // enabled for AVX3 only
1442 }
1443 break;
1444 case Op_CompareAndSwapL:
1445 case Op_CompareAndSwapP:
1446 break;
1447 case Op_StrIndexOf:
1448 if (!UseSSE42Intrinsics) {
1449 return false;
1450 }
1451 break;
1452 case Op_StrIndexOfChar:
1453 if (!UseSSE42Intrinsics) {
1454 return false;
1455 }
1456 break;
1457 case Op_OnSpinWait:
1458 if (VM_Version::supports_on_spin_wait() == false) {
1459 return false;
1460 }
1461 break;
1462 case Op_MulVB:
1463 case Op_LShiftVB:
1464 case Op_RShiftVB:
1465 case Op_URShiftVB:
1466 case Op_VectorInsert:
1467 case Op_VectorLoadMask:
1468 case Op_VectorStoreMask:
1469 case Op_VectorBlend:
1470 if (UseSSE < 4) {
1471 return false;
1472 }
1473 break;
1474 case Op_MaxD:
1475 case Op_MaxF:
1476 case Op_MinD:
1477 case Op_MinF:
1478 if (UseAVX < 1) { // enabled for AVX only
1479 return false;
1480 }
1481 break;
1482 case Op_CacheWB:
1483 case Op_CacheWBPreSync:
1484 case Op_CacheWBPostSync:
1485 if (!VM_Version::supports_data_cache_line_flush()) {
1486 return false;
1487 }
1488 break;
1489 case Op_ExtractB:
1490 case Op_ExtractL:
1491 case Op_ExtractI:
1492 case Op_RoundDoubleMode:
1493 if (UseSSE < 4) {
1494 return false;
1495 }
1496 break;
1497 case Op_RoundDoubleModeV:
1498 if (VM_Version::supports_avx() == false) {
1499 return false; // 128bit vroundpd is not available
1500 }
1501 break;
1502 case Op_LoadVectorGather:
1503 case Op_LoadVectorGatherMasked:
1504 if (UseAVX < 2) {
1505 return false;
1506 }
1507 break;
1508 case Op_FmaF:
1509 case Op_FmaD:
1510 case Op_FmaVD:
1511 case Op_FmaVF:
1512 if (!UseFMA) {
1513 return false;
1514 }
1515 break;
1516 case Op_MacroLogicV:
1517 if (UseAVX < 3 || !UseVectorMacroLogic) {
1518 return false;
1519 }
1520 break;
1521
1522 case Op_VectorCmpMasked:
1523 case Op_VectorMaskGen:
1524 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1525 return false;
1526 }
1527 break;
1528 case Op_VectorMaskFirstTrue:
1529 case Op_VectorMaskLastTrue:
1530 case Op_VectorMaskTrueCount:
1531 case Op_VectorMaskToLong:
1532 if (UseAVX < 1) {
1533 return false;
1534 }
1535 break;
1536 case Op_RoundF:
1537 case Op_RoundD:
1538 break;
1539 case Op_CopySignD:
1540 case Op_CopySignF:
1541 if (UseAVX < 3) {
1542 return false;
1543 }
1544 if (!VM_Version::supports_avx512vl()) {
1545 return false;
1546 }
1547 break;
1548 case Op_CompressBits:
1549 case Op_ExpandBits:
1550 if (!VM_Version::supports_bmi2()) {
1551 return false;
1552 }
1553 break;
1554 case Op_CompressM:
1555 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
1556 return false;
1557 }
1558 break;
1559 case Op_ConvF2HF:
1560 case Op_ConvHF2F:
1561 if (!VM_Version::supports_float16()) {
1562 return false;
1563 }
1564 break;
1565 case Op_VectorCastF2HF:
1566 case Op_VectorCastHF2F:
1567 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
1568 return false;
1569 }
1570 break;
1571 }
1572 return true; // Match rules are supported by default.
1573 }
1574
1575 //------------------------------------------------------------------------
1576
1577 static inline bool is_pop_count_instr_target(BasicType bt) {
1578 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1579 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1580 }
1581
1582 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
1583 return match_rule_supported_vector(opcode, vlen, bt);
1584 }
1585
1586 // Identify extra cases that we might want to provide match rules for vector nodes and
1587 // other intrinsics guarded with vector length (vlen) and element type (bt).
1588 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1589 if (!match_rule_supported(opcode)) {
1590 return false;
1591 }
1592 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1593 // * SSE2 supports 128bit vectors for all types;
1594 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1595 // * AVX2 supports 256bit vectors for all types;
1596 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1597 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1598 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1599 // And MaxVectorSize is taken into account as well.
1600 if (!vector_size_supported(bt, vlen)) {
1601 return false;
1602 }
1603 // Special cases which require vector length follow:
1604 // * implementation limitations
1605 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
1606 // * 128bit vroundpd instruction is present only in AVX1
1607 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1608 switch (opcode) {
1609 case Op_MaxVHF:
1610 case Op_MinVHF:
1611 if (!VM_Version::supports_avx512bw()) {
1612 return false;
1613 }
1614 case Op_AddVHF:
1615 case Op_DivVHF:
1616 case Op_FmaVHF:
1617 case Op_MulVHF:
1618 case Op_SubVHF:
1619 case Op_SqrtVHF:
1620 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1621 return false;
1622 }
1623 if (!VM_Version::supports_avx512_fp16()) {
1624 return false;
1625 }
1626 break;
1627 case Op_AbsVF:
1628 case Op_NegVF:
1629 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
1630 return false; // 512bit vandps and vxorps are not available
1631 }
1632 break;
1633 case Op_AbsVD:
1634 case Op_NegVD:
1635 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
1636 return false; // 512bit vpmullq, vandpd and vxorpd are not available
1637 }
1638 break;
1639 case Op_RotateRightV:
1640 case Op_RotateLeftV:
1641 if (bt != T_INT && bt != T_LONG) {
1642 return false;
1643 } // fallthrough
1644 case Op_MacroLogicV:
1645 if (!VM_Version::supports_evex() ||
1646 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
1647 return false;
1648 }
1649 break;
1650 case Op_ClearArray:
1651 case Op_VectorMaskGen:
1652 case Op_VectorCmpMasked:
1653 if (!VM_Version::supports_avx512bw()) {
1654 return false;
1655 }
1656 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
1657 return false;
1658 }
1659 break;
1660 case Op_LoadVectorMasked:
1661 case Op_StoreVectorMasked:
1662 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
1663 return false;
1664 }
1665 break;
1666 case Op_UMinV:
1667 case Op_UMaxV:
1668 if (UseAVX == 0) {
1669 return false;
1670 }
1671 break;
1672 case Op_MaxV:
1673 case Op_MinV:
1674 if (UseSSE < 4 && is_integral_type(bt)) {
1675 return false;
1676 }
1677 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
1678 // Float/Double intrinsics are enabled for AVX family currently.
1679 if (UseAVX == 0) {
1680 return false;
1681 }
1682 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
1683 return false;
1684 }
1685 }
1686 break;
1687 case Op_CallLeafVector:
1688 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
1689 return false;
1690 }
1691 break;
1692 case Op_AddReductionVI:
1693 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
1694 return false;
1695 }
1696 // fallthrough
1697 case Op_AndReductionV:
1698 case Op_OrReductionV:
1699 case Op_XorReductionV:
1700 if (is_subword_type(bt) && (UseSSE < 4)) {
1701 return false;
1702 }
1703 break;
1704 case Op_MinReductionV:
1705 case Op_MaxReductionV:
1706 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
1707 return false;
1708 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
1709 return false;
1710 }
1711 // Float/Double intrinsics enabled for AVX family.
1712 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
1713 return false;
1714 }
1715 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
1716 return false;
1717 }
1718 break;
1719 case Op_VectorTest:
1720 if (UseSSE < 4) {
1721 return false; // Implementation limitation
1722 } else if (size_in_bits < 32) {
1723 return false; // Implementation limitation
1724 }
1725 break;
1726 case Op_VectorLoadShuffle:
1727 case Op_VectorRearrange:
1728 if(vlen == 2) {
1729 return false; // Implementation limitation due to how shuffle is loaded
1730 } else if (size_in_bits == 256 && UseAVX < 2) {
1731 return false; // Implementation limitation
1732 }
1733 break;
1734 case Op_VectorLoadMask:
1735 case Op_VectorMaskCast:
1736 if (size_in_bits == 256 && UseAVX < 2) {
1737 return false; // Implementation limitation
1738 }
1739 // fallthrough
1740 case Op_VectorStoreMask:
1741 if (vlen == 2) {
1742 return false; // Implementation limitation
1743 }
1744 break;
1745 case Op_PopulateIndex:
1746 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
1747 return false;
1748 }
1749 break;
1750 case Op_VectorCastB2X:
1751 case Op_VectorCastS2X:
1752 case Op_VectorCastI2X:
1753 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
1754 return false;
1755 }
1756 break;
1757 case Op_VectorCastL2X:
1758 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
1759 return false;
1760 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
1761 return false;
1762 }
1763 break;
1764 case Op_VectorCastF2X: {
1765 // As per JLS section 5.1.3 narrowing conversion to sub-word types
1766 // happen after intermediate conversion to integer and special handling
1767 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
1768 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
1769 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
1770 return false;
1771 }
1772 }
1773 // fallthrough
1774 case Op_VectorCastD2X:
1775 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
1776 return false;
1777 }
1778 break;
1779 case Op_VectorCastF2HF:
1780 case Op_VectorCastHF2F:
1781 if (!VM_Version::supports_f16c() &&
1782 ((!VM_Version::supports_evex() ||
1783 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
1784 return false;
1785 }
1786 break;
1787 case Op_RoundVD:
1788 if (!VM_Version::supports_avx512dq()) {
1789 return false;
1790 }
1791 break;
1792 case Op_MulReductionVI:
1793 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1794 return false;
1795 }
1796 break;
1797 case Op_LoadVectorGatherMasked:
1798 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1799 return false;
1800 }
1801 if (is_subword_type(bt) &&
1802 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
1803 (size_in_bits < 64) ||
1804 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
1805 return false;
1806 }
1807 break;
1808 case Op_StoreVectorScatterMasked:
1809 case Op_StoreVectorScatter:
1810 if (is_subword_type(bt)) {
1811 return false;
1812 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1813 return false;
1814 }
1815 // fallthrough
1816 case Op_LoadVectorGather:
1817 if (!is_subword_type(bt) && size_in_bits == 64) {
1818 return false;
1819 }
1820 if (is_subword_type(bt) && size_in_bits < 64) {
1821 return false;
1822 }
1823 break;
1824 case Op_SaturatingAddV:
1825 case Op_SaturatingSubV:
1826 if (UseAVX < 1) {
1827 return false; // Implementation limitation
1828 }
1829 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1830 return false;
1831 }
1832 break;
1833 case Op_SelectFromTwoVector:
1834 if (size_in_bits < 128) {
1835 return false;
1836 }
1837 if ((size_in_bits < 512 && !VM_Version::supports_avx512vl())) {
1838 return false;
1839 }
1840 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
1841 return false;
1842 }
1843 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
1844 return false;
1845 }
1846 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
1847 return false;
1848 }
1849 break;
1850 case Op_MaskAll:
1851 if (!VM_Version::supports_evex()) {
1852 return false;
1853 }
1854 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1855 return false;
1856 }
1857 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1858 return false;
1859 }
1860 break;
1861 case Op_VectorMaskCmp:
1862 if (vlen < 2 || size_in_bits < 32) {
1863 return false;
1864 }
1865 break;
1866 case Op_CompressM:
1867 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1868 return false;
1869 }
1870 break;
1871 case Op_CompressV:
1872 case Op_ExpandV:
1873 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
1874 return false;
1875 }
1876 if (size_in_bits < 128 ) {
1877 return false;
1878 }
1879 case Op_VectorLongToMask:
1880 if (UseAVX < 1) {
1881 return false;
1882 }
1883 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
1884 return false;
1885 }
1886 break;
1887 case Op_SignumVD:
1888 case Op_SignumVF:
1889 if (UseAVX < 1) {
1890 return false;
1891 }
1892 break;
1893 case Op_PopCountVI:
1894 case Op_PopCountVL: {
1895 if (!is_pop_count_instr_target(bt) &&
1896 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
1897 return false;
1898 }
1899 }
1900 break;
1901 case Op_ReverseV:
1902 case Op_ReverseBytesV:
1903 if (UseAVX < 2) {
1904 return false;
1905 }
1906 break;
1907 case Op_CountTrailingZerosV:
1908 case Op_CountLeadingZerosV:
1909 if (UseAVX < 2) {
1910 return false;
1911 }
1912 break;
1913 }
1914 return true; // Per default match rules are supported.
1915 }
1916
1917 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
1918 // ADLC based match_rule_supported routine checks for the existence of pattern based
1919 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
1920 // of their non-masked counterpart with mask edge being the differentiator.
1921 // This routine does a strict check on the existence of masked operation patterns
1922 // by returning a default false value for all the other opcodes apart from the
1923 // ones whose masked instruction patterns are defined in this file.
1924 if (!match_rule_supported_vector(opcode, vlen, bt)) {
1925 return false;
1926 }
1927
1928 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1929 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
1930 return false;
1931 }
1932 switch(opcode) {
1933 // Unary masked operations
1934 case Op_AbsVB:
1935 case Op_AbsVS:
1936 if(!VM_Version::supports_avx512bw()) {
1937 return false; // Implementation limitation
1938 }
1939 case Op_AbsVI:
1940 case Op_AbsVL:
1941 return true;
1942
1943 // Ternary masked operations
1944 case Op_FmaVF:
1945 case Op_FmaVD:
1946 return true;
1947
1948 case Op_MacroLogicV:
1949 if(bt != T_INT && bt != T_LONG) {
1950 return false;
1951 }
1952 return true;
1953
1954 // Binary masked operations
1955 case Op_AddVB:
1956 case Op_AddVS:
1957 case Op_SubVB:
1958 case Op_SubVS:
1959 case Op_MulVS:
1960 case Op_LShiftVS:
1961 case Op_RShiftVS:
1962 case Op_URShiftVS:
1963 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1964 if (!VM_Version::supports_avx512bw()) {
1965 return false; // Implementation limitation
1966 }
1967 return true;
1968
1969 case Op_MulVL:
1970 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1971 if (!VM_Version::supports_avx512dq()) {
1972 return false; // Implementation limitation
1973 }
1974 return true;
1975
1976 case Op_AndV:
1977 case Op_OrV:
1978 case Op_XorV:
1979 case Op_RotateRightV:
1980 case Op_RotateLeftV:
1981 if (bt != T_INT && bt != T_LONG) {
1982 return false; // Implementation limitation
1983 }
1984 return true;
1985
1986 case Op_VectorLoadMask:
1987 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1988 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
1989 return false;
1990 }
1991 return true;
1992
1993 case Op_AddVI:
1994 case Op_AddVL:
1995 case Op_AddVF:
1996 case Op_AddVD:
1997 case Op_SubVI:
1998 case Op_SubVL:
1999 case Op_SubVF:
2000 case Op_SubVD:
2001 case Op_MulVI:
2002 case Op_MulVF:
2003 case Op_MulVD:
2004 case Op_DivVF:
2005 case Op_DivVD:
2006 case Op_SqrtVF:
2007 case Op_SqrtVD:
2008 case Op_LShiftVI:
2009 case Op_LShiftVL:
2010 case Op_RShiftVI:
2011 case Op_RShiftVL:
2012 case Op_URShiftVI:
2013 case Op_URShiftVL:
2014 case Op_LoadVectorMasked:
2015 case Op_StoreVectorMasked:
2016 case Op_LoadVectorGatherMasked:
2017 case Op_StoreVectorScatterMasked:
2018 return true;
2019
2020 case Op_UMinV:
2021 case Op_UMaxV:
2022 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
2023 return false;
2024 } // fallthrough
2025 case Op_MaxV:
2026 case Op_MinV:
2027 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2028 return false; // Implementation limitation
2029 }
2030 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
2031 return false; // Implementation limitation
2032 }
2033 return true;
2034 case Op_SaturatingAddV:
2035 case Op_SaturatingSubV:
2036 if (!is_subword_type(bt)) {
2037 return false;
2038 }
2039 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
2040 return false; // Implementation limitation
2041 }
2042 return true;
2043
2044 case Op_VectorMaskCmp:
2045 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
2046 return false; // Implementation limitation
2047 }
2048 return true;
2049
2050 case Op_VectorRearrange:
2051 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
2052 return false; // Implementation limitation
2053 }
2054 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
2055 return false; // Implementation limitation
2056 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
2057 return false; // Implementation limitation
2058 }
2059 return true;
2060
2061 // Binary Logical operations
2062 case Op_AndVMask:
2063 case Op_OrVMask:
2064 case Op_XorVMask:
2065 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
2066 return false; // Implementation limitation
2067 }
2068 return true;
2069
2070 case Op_PopCountVI:
2071 case Op_PopCountVL:
2072 if (!is_pop_count_instr_target(bt)) {
2073 return false;
2074 }
2075 return true;
2076
2077 case Op_MaskAll:
2078 return true;
2079
2080 case Op_CountLeadingZerosV:
2081 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
2082 return true;
2083 }
2084 default:
2085 return false;
2086 }
2087 }
2088
2089 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
2090 return false;
2091 }
2092
2093 // Return true if Vector::rearrange needs preparation of the shuffle argument
2094 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
2095 switch (elem_bt) {
2096 case T_BYTE: return false;
2097 case T_SHORT: return !VM_Version::supports_avx512bw();
2098 case T_INT: return !VM_Version::supports_avx();
2099 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
2100 default:
2101 ShouldNotReachHere();
2102 return false;
2103 }
2104 }
2105
2106 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2107 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2108 bool legacy = (generic_opnd->opcode() == LEGVEC);
2109 if (!VM_Version::supports_avx512vlbwdq() && // KNL
2110 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
2111 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
2112 return new legVecZOper();
2113 }
2114 if (legacy) {
2115 switch (ideal_reg) {
2116 case Op_VecS: return new legVecSOper();
2117 case Op_VecD: return new legVecDOper();
2118 case Op_VecX: return new legVecXOper();
2119 case Op_VecY: return new legVecYOper();
2120 case Op_VecZ: return new legVecZOper();
2121 }
2122 } else {
2123 switch (ideal_reg) {
2124 case Op_VecS: return new vecSOper();
2125 case Op_VecD: return new vecDOper();
2126 case Op_VecX: return new vecXOper();
2127 case Op_VecY: return new vecYOper();
2128 case Op_VecZ: return new vecZOper();
2129 }
2130 }
2131 ShouldNotReachHere();
2132 return nullptr;
2133 }
2134
2135 bool Matcher::is_reg2reg_move(MachNode* m) {
2136 switch (m->rule()) {
2137 case MoveVec2Leg_rule:
2138 case MoveLeg2Vec_rule:
2139 case MoveF2VL_rule:
2140 case MoveF2LEG_rule:
2141 case MoveVL2F_rule:
2142 case MoveLEG2F_rule:
2143 case MoveD2VL_rule:
2144 case MoveD2LEG_rule:
2145 case MoveVL2D_rule:
2146 case MoveLEG2D_rule:
2147 return true;
2148 default:
2149 return false;
2150 }
2151 }
2152
2153 bool Matcher::is_generic_vector(MachOper* opnd) {
2154 switch (opnd->opcode()) {
2155 case VEC:
2156 case LEGVEC:
2157 return true;
2158 default:
2159 return false;
2160 }
2161 }
2162
2163 //------------------------------------------------------------------------
2164
2165 const RegMask* Matcher::predicate_reg_mask(void) {
2166 return &_VECTMASK_REG_mask;
2167 }
2168
2169 // Max vector size in bytes. 0 if not supported.
2170 int Matcher::vector_width_in_bytes(BasicType bt) {
2171 assert(is_java_primitive(bt), "only primitive type vectors");
2172 // SSE2 supports 128bit vectors for all types.
2173 // AVX2 supports 256bit vectors for all types.
2174 // AVX2/EVEX supports 512bit vectors for all types.
2175 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
2176 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
2177 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
2178 size = (UseAVX > 2) ? 64 : 32;
2179 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
2180 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
2181 // Use flag to limit vector size.
2182 size = MIN2(size,(int)MaxVectorSize);
2183 // Minimum 2 values in vector (or 4 for bytes).
2184 switch (bt) {
2185 case T_DOUBLE:
2186 case T_LONG:
2187 if (size < 16) return 0;
2188 break;
2189 case T_FLOAT:
2190 case T_INT:
2191 if (size < 8) return 0;
2192 break;
2193 case T_BOOLEAN:
2194 if (size < 4) return 0;
2195 break;
2196 case T_CHAR:
2197 if (size < 4) return 0;
2198 break;
2199 case T_BYTE:
2200 if (size < 4) return 0;
2201 break;
2202 case T_SHORT:
2203 if (size < 4) return 0;
2204 break;
2205 default:
2206 ShouldNotReachHere();
2207 }
2208 return size;
2209 }
2210
2211 // Limits on vector size (number of elements) loaded into vector.
2212 int Matcher::max_vector_size(const BasicType bt) {
2213 return vector_width_in_bytes(bt)/type2aelembytes(bt);
2214 }
2215 int Matcher::min_vector_size(const BasicType bt) {
2216 int max_size = max_vector_size(bt);
2217 // Min size which can be loaded into vector is 4 bytes.
2218 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
2219 // Support for calling svml double64 vectors
2220 if (bt == T_DOUBLE) {
2221 size = 1;
2222 }
2223 return MIN2(size,max_size);
2224 }
2225
2226 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
2227 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
2228 // by default on Cascade Lake
2229 if (VM_Version::is_default_intel_cascade_lake()) {
2230 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
2231 }
2232 return Matcher::max_vector_size(bt);
2233 }
2234
2235 int Matcher::scalable_vector_reg_size(const BasicType bt) {
2236 return -1;
2237 }
2238
2239 // Vector ideal reg corresponding to specified size in bytes
2240 uint Matcher::vector_ideal_reg(int size) {
2241 assert(MaxVectorSize >= size, "");
2242 switch(size) {
2243 case 4: return Op_VecS;
2244 case 8: return Op_VecD;
2245 case 16: return Op_VecX;
2246 case 32: return Op_VecY;
2247 case 64: return Op_VecZ;
2248 }
2249 ShouldNotReachHere();
2250 return 0;
2251 }
2252
2253 // Check for shift by small constant as well
2254 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
2255 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
2256 shift->in(2)->get_int() <= 3 &&
2257 // Are there other uses besides address expressions?
2258 !matcher->is_visited(shift)) {
2259 address_visited.set(shift->_idx); // Flag as address_visited
2260 mstack.push(shift->in(2), Matcher::Visit);
2261 Node *conv = shift->in(1);
2262 // Allow Matcher to match the rule which bypass
2263 // ConvI2L operation for an array index on LP64
2264 // if the index value is positive.
2265 if (conv->Opcode() == Op_ConvI2L &&
2266 conv->as_Type()->type()->is_long()->_lo >= 0 &&
2267 // Are there other uses besides address expressions?
2268 !matcher->is_visited(conv)) {
2269 address_visited.set(conv->_idx); // Flag as address_visited
2270 mstack.push(conv->in(1), Matcher::Pre_Visit);
2271 } else {
2272 mstack.push(conv, Matcher::Pre_Visit);
2273 }
2274 return true;
2275 }
2276 return false;
2277 }
2278
2279 // This function identifies sub-graphs in which a 'load' node is
2280 // input to two different nodes, and such that it can be matched
2281 // with BMI instructions like blsi, blsr, etc.
2282 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
2283 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
2284 // refers to the same node.
2285 //
2286 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
2287 // This is a temporary solution until we make DAGs expressible in ADL.
2288 template<typename ConType>
2289 class FusedPatternMatcher {
2290 Node* _op1_node;
2291 Node* _mop_node;
2292 int _con_op;
2293
2294 static int match_next(Node* n, int next_op, int next_op_idx) {
2295 if (n->in(1) == nullptr || n->in(2) == nullptr) {
2296 return -1;
2297 }
2298
2299 if (next_op_idx == -1) { // n is commutative, try rotations
2300 if (n->in(1)->Opcode() == next_op) {
2301 return 1;
2302 } else if (n->in(2)->Opcode() == next_op) {
2303 return 2;
2304 }
2305 } else {
2306 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
2307 if (n->in(next_op_idx)->Opcode() == next_op) {
2308 return next_op_idx;
2309 }
2310 }
2311 return -1;
2312 }
2313
2314 public:
2315 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
2316 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
2317
2318 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
2319 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
2320 typename ConType::NativeType con_value) {
2321 if (_op1_node->Opcode() != op1) {
2322 return false;
2323 }
2324 if (_mop_node->outcnt() > 2) {
2325 return false;
2326 }
2327 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
2328 if (op1_op2_idx == -1) {
2329 return false;
2330 }
2331 // Memory operation must be the other edge
2332 int op1_mop_idx = (op1_op2_idx & 1) + 1;
2333
2334 // Check that the mop node is really what we want
2335 if (_op1_node->in(op1_mop_idx) == _mop_node) {
2336 Node* op2_node = _op1_node->in(op1_op2_idx);
2337 if (op2_node->outcnt() > 1) {
2338 return false;
2339 }
2340 assert(op2_node->Opcode() == op2, "Should be");
2341 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
2342 if (op2_con_idx == -1) {
2343 return false;
2344 }
2345 // Memory operation must be the other edge
2346 int op2_mop_idx = (op2_con_idx & 1) + 1;
2347 // Check that the memory operation is the same node
2348 if (op2_node->in(op2_mop_idx) == _mop_node) {
2349 // Now check the constant
2350 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
2351 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
2352 return true;
2353 }
2354 }
2355 }
2356 return false;
2357 }
2358 };
2359
2360 static bool is_bmi_pattern(Node* n, Node* m) {
2361 assert(UseBMI1Instructions, "sanity");
2362 if (n != nullptr && m != nullptr) {
2363 if (m->Opcode() == Op_LoadI) {
2364 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
2365 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
2366 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
2367 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
2368 } else if (m->Opcode() == Op_LoadL) {
2369 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
2370 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
2371 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
2372 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
2373 }
2374 }
2375 return false;
2376 }
2377
2378 // Should the matcher clone input 'm' of node 'n'?
2379 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2380 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
2381 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
2382 mstack.push(m, Visit);
2383 return true;
2384 }
2385 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
2386 mstack.push(m, Visit); // m = ShiftCntV
2387 return true;
2388 }
2389 if (is_encode_and_store_pattern(n, m)) {
2390 mstack.push(m, Visit);
2391 return true;
2392 }
2393 return false;
2394 }
2395
2396 // Should the Matcher clone shifts on addressing modes, expecting them
2397 // to be subsumed into complex addressing expressions or compute them
2398 // into registers?
2399 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2400 Node *off = m->in(AddPNode::Offset);
2401 if (off->is_Con()) {
2402 address_visited.test_set(m->_idx); // Flag as address_visited
2403 Node *adr = m->in(AddPNode::Address);
2404
2405 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
2406 // AtomicAdd is not an addressing expression.
2407 // Cheap to find it by looking for screwy base.
2408 if (adr->is_AddP() &&
2409 !adr->in(AddPNode::Base)->is_top() &&
2410 !adr->in(AddPNode::Offset)->is_Con() &&
2411 off->get_long() == (int) (off->get_long()) && // immL32
2412 // Are there other uses besides address expressions?
2413 !is_visited(adr)) {
2414 address_visited.set(adr->_idx); // Flag as address_visited
2415 Node *shift = adr->in(AddPNode::Offset);
2416 if (!clone_shift(shift, this, mstack, address_visited)) {
2417 mstack.push(shift, Pre_Visit);
2418 }
2419 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
2420 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
2421 } else {
2422 mstack.push(adr, Pre_Visit);
2423 }
2424
2425 // Clone X+offset as it also folds into most addressing expressions
2426 mstack.push(off, Visit);
2427 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2428 return true;
2429 } else if (clone_shift(off, this, mstack, address_visited)) {
2430 address_visited.test_set(m->_idx); // Flag as address_visited
2431 mstack.push(m->in(AddPNode::Address), Pre_Visit);
2432 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2433 return true;
2434 }
2435 return false;
2436 }
2437
2438 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
2439 switch (bt) {
2440 case BoolTest::eq:
2441 return Assembler::eq;
2442 case BoolTest::ne:
2443 return Assembler::neq;
2444 case BoolTest::le:
2445 case BoolTest::ule:
2446 return Assembler::le;
2447 case BoolTest::ge:
2448 case BoolTest::uge:
2449 return Assembler::nlt;
2450 case BoolTest::lt:
2451 case BoolTest::ult:
2452 return Assembler::lt;
2453 case BoolTest::gt:
2454 case BoolTest::ugt:
2455 return Assembler::nle;
2456 default : ShouldNotReachHere(); return Assembler::_false;
2457 }
2458 }
2459
2460 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
2461 switch (bt) {
2462 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
2463 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
2464 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
2465 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
2466 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
2467 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
2468 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
2469 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
2470 }
2471 }
2472
2473 // Helper methods for MachSpillCopyNode::implementation().
2474 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2475 int src_hi, int dst_hi, uint ireg, outputStream* st) {
2476 assert(ireg == Op_VecS || // 32bit vector
2477 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
2478 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
2479 "no non-adjacent vector moves" );
2480 if (masm) {
2481 switch (ireg) {
2482 case Op_VecS: // copy whole register
2483 case Op_VecD:
2484 case Op_VecX:
2485 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2486 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2487 } else {
2488 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2489 }
2490 break;
2491 case Op_VecY:
2492 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2493 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
2494 } else {
2495 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
2496 }
2497 break;
2498 case Op_VecZ:
2499 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
2500 break;
2501 default:
2502 ShouldNotReachHere();
2503 }
2504 #ifndef PRODUCT
2505 } else {
2506 switch (ireg) {
2507 case Op_VecS:
2508 case Op_VecD:
2509 case Op_VecX:
2510 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2511 break;
2512 case Op_VecY:
2513 case Op_VecZ:
2514 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
2515 break;
2516 default:
2517 ShouldNotReachHere();
2518 }
2519 #endif
2520 }
2521 }
2522
2523 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2524 int stack_offset, int reg, uint ireg, outputStream* st) {
2525 if (masm) {
2526 if (is_load) {
2527 switch (ireg) {
2528 case Op_VecS:
2529 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2530 break;
2531 case Op_VecD:
2532 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2533 break;
2534 case Op_VecX:
2535 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2536 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2537 } else {
2538 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2539 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2540 }
2541 break;
2542 case Op_VecY:
2543 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2544 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
2545 } else {
2546 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2547 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
2548 }
2549 break;
2550 case Op_VecZ:
2551 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
2552 break;
2553 default:
2554 ShouldNotReachHere();
2555 }
2556 } else { // store
2557 switch (ireg) {
2558 case Op_VecS:
2559 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2560 break;
2561 case Op_VecD:
2562 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2563 break;
2564 case Op_VecX:
2565 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2566 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2567 }
2568 else {
2569 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2570 }
2571 break;
2572 case Op_VecY:
2573 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
2574 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
2575 }
2576 else {
2577 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
2578 }
2579 break;
2580 case Op_VecZ:
2581 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
2582 break;
2583 default:
2584 ShouldNotReachHere();
2585 }
2586 }
2587 #ifndef PRODUCT
2588 } else {
2589 if (is_load) {
2590 switch (ireg) {
2591 case Op_VecS:
2592 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2593 break;
2594 case Op_VecD:
2595 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2596 break;
2597 case Op_VecX:
2598 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2599 break;
2600 case Op_VecY:
2601 case Op_VecZ:
2602 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
2603 break;
2604 default:
2605 ShouldNotReachHere();
2606 }
2607 } else { // store
2608 switch (ireg) {
2609 case Op_VecS:
2610 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2611 break;
2612 case Op_VecD:
2613 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2614 break;
2615 case Op_VecX:
2616 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2617 break;
2618 case Op_VecY:
2619 case Op_VecZ:
2620 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
2621 break;
2622 default:
2623 ShouldNotReachHere();
2624 }
2625 }
2626 #endif
2627 }
2628 }
2629
2630 template <class T>
2631 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
2632 int size = type2aelembytes(bt) * len;
2633 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
2634 for (int i = 0; i < len; i++) {
2635 int offset = i * type2aelembytes(bt);
2636 switch (bt) {
2637 case T_BYTE: val->at(i) = con; break;
2638 case T_SHORT: {
2639 jshort c = con;
2640 memcpy(val->adr_at(offset), &c, sizeof(jshort));
2641 break;
2642 }
2643 case T_INT: {
2644 jint c = con;
2645 memcpy(val->adr_at(offset), &c, sizeof(jint));
2646 break;
2647 }
2648 case T_LONG: {
2649 jlong c = con;
2650 memcpy(val->adr_at(offset), &c, sizeof(jlong));
2651 break;
2652 }
2653 case T_FLOAT: {
2654 jfloat c = con;
2655 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
2656 break;
2657 }
2658 case T_DOUBLE: {
2659 jdouble c = con;
2660 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
2661 break;
2662 }
2663 default: assert(false, "%s", type2name(bt));
2664 }
2665 }
2666 return val;
2667 }
2668
2669 static inline jlong high_bit_set(BasicType bt) {
2670 switch (bt) {
2671 case T_BYTE: return 0x8080808080808080;
2672 case T_SHORT: return 0x8000800080008000;
2673 case T_INT: return 0x8000000080000000;
2674 case T_LONG: return 0x8000000000000000;
2675 default:
2676 ShouldNotReachHere();
2677 return 0;
2678 }
2679 }
2680
2681 #ifndef PRODUCT
2682 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2683 st->print("nop \t# %d bytes pad for loops and calls", _count);
2684 }
2685 #endif
2686
2687 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
2688 __ nop(_count);
2689 }
2690
2691 uint MachNopNode::size(PhaseRegAlloc*) const {
2692 return _count;
2693 }
2694
2695 #ifndef PRODUCT
2696 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2697 st->print("# breakpoint");
2698 }
2699 #endif
2700
2701 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
2702 __ int3();
2703 }
2704
2705 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2706 return MachNode::size(ra_);
2707 }
2708
2709 %}
2710
2711 encode %{
2712
2713 enc_class call_epilog %{
2714 if (VerifyStackAtCalls) {
2715 // Check that stack depth is unchanged: find majik cookie on stack
2716 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2717 Label L;
2718 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2719 __ jccb(Assembler::equal, L);
2720 // Die if stack mismatch
2721 __ int3();
2722 __ bind(L);
2723 }
2724 %}
2725
2726 %}
2727
2728 // Operands for bound floating pointer register arguments
2729 operand rxmm0() %{
2730 constraint(ALLOC_IN_RC(xmm0_reg));
2731 match(VecX);
2732 format%{%}
2733 interface(REG_INTER);
2734 %}
2735
2736 //----------OPERANDS-----------------------------------------------------------
2737 // Operand definitions must precede instruction definitions for correct parsing
2738 // in the ADLC because operands constitute user defined types which are used in
2739 // instruction definitions.
2740
2741 // Vectors
2742
2743 // Dummy generic vector class. Should be used for all vector operands.
2744 // Replaced with vec[SDXYZ] during post-selection pass.
2745 operand vec() %{
2746 constraint(ALLOC_IN_RC(dynamic));
2747 match(VecX);
2748 match(VecY);
2749 match(VecZ);
2750 match(VecS);
2751 match(VecD);
2752
2753 format %{ %}
2754 interface(REG_INTER);
2755 %}
2756
2757 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
2758 // Replaced with legVec[SDXYZ] during post-selection cleanup.
2759 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
2760 // runtime code generation via reg_class_dynamic.
2761 operand legVec() %{
2762 constraint(ALLOC_IN_RC(dynamic));
2763 match(VecX);
2764 match(VecY);
2765 match(VecZ);
2766 match(VecS);
2767 match(VecD);
2768
2769 format %{ %}
2770 interface(REG_INTER);
2771 %}
2772
2773 // Replaces vec during post-selection cleanup. See above.
2774 operand vecS() %{
2775 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
2776 match(VecS);
2777
2778 format %{ %}
2779 interface(REG_INTER);
2780 %}
2781
2782 // Replaces legVec during post-selection cleanup. See above.
2783 operand legVecS() %{
2784 constraint(ALLOC_IN_RC(vectors_reg_legacy));
2785 match(VecS);
2786
2787 format %{ %}
2788 interface(REG_INTER);
2789 %}
2790
2791 // Replaces vec during post-selection cleanup. See above.
2792 operand vecD() %{
2793 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
2794 match(VecD);
2795
2796 format %{ %}
2797 interface(REG_INTER);
2798 %}
2799
2800 // Replaces legVec during post-selection cleanup. See above.
2801 operand legVecD() %{
2802 constraint(ALLOC_IN_RC(vectord_reg_legacy));
2803 match(VecD);
2804
2805 format %{ %}
2806 interface(REG_INTER);
2807 %}
2808
2809 // Replaces vec during post-selection cleanup. See above.
2810 operand vecX() %{
2811 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
2812 match(VecX);
2813
2814 format %{ %}
2815 interface(REG_INTER);
2816 %}
2817
2818 // Replaces legVec during post-selection cleanup. See above.
2819 operand legVecX() %{
2820 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
2821 match(VecX);
2822
2823 format %{ %}
2824 interface(REG_INTER);
2825 %}
2826
2827 // Replaces vec during post-selection cleanup. See above.
2828 operand vecY() %{
2829 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
2830 match(VecY);
2831
2832 format %{ %}
2833 interface(REG_INTER);
2834 %}
2835
2836 // Replaces legVec during post-selection cleanup. See above.
2837 operand legVecY() %{
2838 constraint(ALLOC_IN_RC(vectory_reg_legacy));
2839 match(VecY);
2840
2841 format %{ %}
2842 interface(REG_INTER);
2843 %}
2844
2845 // Replaces vec during post-selection cleanup. See above.
2846 operand vecZ() %{
2847 constraint(ALLOC_IN_RC(vectorz_reg));
2848 match(VecZ);
2849
2850 format %{ %}
2851 interface(REG_INTER);
2852 %}
2853
2854 // Replaces legVec during post-selection cleanup. See above.
2855 operand legVecZ() %{
2856 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
2857 match(VecZ);
2858
2859 format %{ %}
2860 interface(REG_INTER);
2861 %}
2862
2863 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2864
2865 // ============================================================================
2866
2867 instruct ShouldNotReachHere() %{
2868 match(Halt);
2869 format %{ "stop\t# ShouldNotReachHere" %}
2870 ins_encode %{
2871 if (is_reachable()) {
2872 const char* str = __ code_string(_halt_reason);
2873 __ stop(str);
2874 }
2875 %}
2876 ins_pipe(pipe_slow);
2877 %}
2878
2879 // ============================================================================
2880
2881 instruct addF_reg(regF dst, regF src) %{
2882 predicate(UseAVX == 0);
2883 match(Set dst (AddF dst src));
2884
2885 format %{ "addss $dst, $src" %}
2886 ins_cost(150);
2887 ins_encode %{
2888 __ addss($dst$$XMMRegister, $src$$XMMRegister);
2889 %}
2890 ins_pipe(pipe_slow);
2891 %}
2892
2893 instruct addF_mem(regF dst, memory src) %{
2894 predicate(UseAVX == 0);
2895 match(Set dst (AddF dst (LoadF src)));
2896
2897 format %{ "addss $dst, $src" %}
2898 ins_cost(150);
2899 ins_encode %{
2900 __ addss($dst$$XMMRegister, $src$$Address);
2901 %}
2902 ins_pipe(pipe_slow);
2903 %}
2904
2905 instruct addF_imm(regF dst, immF con) %{
2906 predicate(UseAVX == 0);
2907 match(Set dst (AddF dst con));
2908 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
2909 ins_cost(150);
2910 ins_encode %{
2911 __ addss($dst$$XMMRegister, $constantaddress($con));
2912 %}
2913 ins_pipe(pipe_slow);
2914 %}
2915
2916 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
2917 predicate(UseAVX > 0);
2918 match(Set dst (AddF src1 src2));
2919
2920 format %{ "vaddss $dst, $src1, $src2" %}
2921 ins_cost(150);
2922 ins_encode %{
2923 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2924 %}
2925 ins_pipe(pipe_slow);
2926 %}
2927
2928 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
2929 predicate(UseAVX > 0);
2930 match(Set dst (AddF src1 (LoadF src2)));
2931
2932 format %{ "vaddss $dst, $src1, $src2" %}
2933 ins_cost(150);
2934 ins_encode %{
2935 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
2936 %}
2937 ins_pipe(pipe_slow);
2938 %}
2939
2940 instruct addF_reg_imm(regF dst, regF src, immF con) %{
2941 predicate(UseAVX > 0);
2942 match(Set dst (AddF src con));
2943
2944 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
2945 ins_cost(150);
2946 ins_encode %{
2947 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
2948 %}
2949 ins_pipe(pipe_slow);
2950 %}
2951
2952 instruct addD_reg(regD dst, regD src) %{
2953 predicate(UseAVX == 0);
2954 match(Set dst (AddD dst src));
2955
2956 format %{ "addsd $dst, $src" %}
2957 ins_cost(150);
2958 ins_encode %{
2959 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
2960 %}
2961 ins_pipe(pipe_slow);
2962 %}
2963
2964 instruct addD_mem(regD dst, memory src) %{
2965 predicate(UseAVX == 0);
2966 match(Set dst (AddD dst (LoadD src)));
2967
2968 format %{ "addsd $dst, $src" %}
2969 ins_cost(150);
2970 ins_encode %{
2971 __ addsd($dst$$XMMRegister, $src$$Address);
2972 %}
2973 ins_pipe(pipe_slow);
2974 %}
2975
2976 instruct addD_imm(regD dst, immD con) %{
2977 predicate(UseAVX == 0);
2978 match(Set dst (AddD dst con));
2979 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2980 ins_cost(150);
2981 ins_encode %{
2982 __ addsd($dst$$XMMRegister, $constantaddress($con));
2983 %}
2984 ins_pipe(pipe_slow);
2985 %}
2986
2987 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
2988 predicate(UseAVX > 0);
2989 match(Set dst (AddD src1 src2));
2990
2991 format %{ "vaddsd $dst, $src1, $src2" %}
2992 ins_cost(150);
2993 ins_encode %{
2994 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2995 %}
2996 ins_pipe(pipe_slow);
2997 %}
2998
2999 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
3000 predicate(UseAVX > 0);
3001 match(Set dst (AddD src1 (LoadD src2)));
3002
3003 format %{ "vaddsd $dst, $src1, $src2" %}
3004 ins_cost(150);
3005 ins_encode %{
3006 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3007 %}
3008 ins_pipe(pipe_slow);
3009 %}
3010
3011 instruct addD_reg_imm(regD dst, regD src, immD con) %{
3012 predicate(UseAVX > 0);
3013 match(Set dst (AddD src con));
3014
3015 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3016 ins_cost(150);
3017 ins_encode %{
3018 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3019 %}
3020 ins_pipe(pipe_slow);
3021 %}
3022
3023 instruct subF_reg(regF dst, regF src) %{
3024 predicate(UseAVX == 0);
3025 match(Set dst (SubF dst src));
3026
3027 format %{ "subss $dst, $src" %}
3028 ins_cost(150);
3029 ins_encode %{
3030 __ subss($dst$$XMMRegister, $src$$XMMRegister);
3031 %}
3032 ins_pipe(pipe_slow);
3033 %}
3034
3035 instruct subF_mem(regF dst, memory src) %{
3036 predicate(UseAVX == 0);
3037 match(Set dst (SubF dst (LoadF src)));
3038
3039 format %{ "subss $dst, $src" %}
3040 ins_cost(150);
3041 ins_encode %{
3042 __ subss($dst$$XMMRegister, $src$$Address);
3043 %}
3044 ins_pipe(pipe_slow);
3045 %}
3046
3047 instruct subF_imm(regF dst, immF con) %{
3048 predicate(UseAVX == 0);
3049 match(Set dst (SubF dst con));
3050 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3051 ins_cost(150);
3052 ins_encode %{
3053 __ subss($dst$$XMMRegister, $constantaddress($con));
3054 %}
3055 ins_pipe(pipe_slow);
3056 %}
3057
3058 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
3059 predicate(UseAVX > 0);
3060 match(Set dst (SubF src1 src2));
3061
3062 format %{ "vsubss $dst, $src1, $src2" %}
3063 ins_cost(150);
3064 ins_encode %{
3065 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3066 %}
3067 ins_pipe(pipe_slow);
3068 %}
3069
3070 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
3071 predicate(UseAVX > 0);
3072 match(Set dst (SubF src1 (LoadF src2)));
3073
3074 format %{ "vsubss $dst, $src1, $src2" %}
3075 ins_cost(150);
3076 ins_encode %{
3077 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3078 %}
3079 ins_pipe(pipe_slow);
3080 %}
3081
3082 instruct subF_reg_imm(regF dst, regF src, immF con) %{
3083 predicate(UseAVX > 0);
3084 match(Set dst (SubF src con));
3085
3086 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3087 ins_cost(150);
3088 ins_encode %{
3089 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3090 %}
3091 ins_pipe(pipe_slow);
3092 %}
3093
3094 instruct subD_reg(regD dst, regD src) %{
3095 predicate(UseAVX == 0);
3096 match(Set dst (SubD dst src));
3097
3098 format %{ "subsd $dst, $src" %}
3099 ins_cost(150);
3100 ins_encode %{
3101 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
3102 %}
3103 ins_pipe(pipe_slow);
3104 %}
3105
3106 instruct subD_mem(regD dst, memory src) %{
3107 predicate(UseAVX == 0);
3108 match(Set dst (SubD dst (LoadD src)));
3109
3110 format %{ "subsd $dst, $src" %}
3111 ins_cost(150);
3112 ins_encode %{
3113 __ subsd($dst$$XMMRegister, $src$$Address);
3114 %}
3115 ins_pipe(pipe_slow);
3116 %}
3117
3118 instruct subD_imm(regD dst, immD con) %{
3119 predicate(UseAVX == 0);
3120 match(Set dst (SubD dst con));
3121 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3122 ins_cost(150);
3123 ins_encode %{
3124 __ subsd($dst$$XMMRegister, $constantaddress($con));
3125 %}
3126 ins_pipe(pipe_slow);
3127 %}
3128
3129 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
3130 predicate(UseAVX > 0);
3131 match(Set dst (SubD src1 src2));
3132
3133 format %{ "vsubsd $dst, $src1, $src2" %}
3134 ins_cost(150);
3135 ins_encode %{
3136 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3137 %}
3138 ins_pipe(pipe_slow);
3139 %}
3140
3141 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
3142 predicate(UseAVX > 0);
3143 match(Set dst (SubD src1 (LoadD src2)));
3144
3145 format %{ "vsubsd $dst, $src1, $src2" %}
3146 ins_cost(150);
3147 ins_encode %{
3148 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3149 %}
3150 ins_pipe(pipe_slow);
3151 %}
3152
3153 instruct subD_reg_imm(regD dst, regD src, immD con) %{
3154 predicate(UseAVX > 0);
3155 match(Set dst (SubD src con));
3156
3157 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3158 ins_cost(150);
3159 ins_encode %{
3160 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3161 %}
3162 ins_pipe(pipe_slow);
3163 %}
3164
3165 instruct mulF_reg(regF dst, regF src) %{
3166 predicate(UseAVX == 0);
3167 match(Set dst (MulF dst src));
3168
3169 format %{ "mulss $dst, $src" %}
3170 ins_cost(150);
3171 ins_encode %{
3172 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
3173 %}
3174 ins_pipe(pipe_slow);
3175 %}
3176
3177 instruct mulF_mem(regF dst, memory src) %{
3178 predicate(UseAVX == 0);
3179 match(Set dst (MulF dst (LoadF src)));
3180
3181 format %{ "mulss $dst, $src" %}
3182 ins_cost(150);
3183 ins_encode %{
3184 __ mulss($dst$$XMMRegister, $src$$Address);
3185 %}
3186 ins_pipe(pipe_slow);
3187 %}
3188
3189 instruct mulF_imm(regF dst, immF con) %{
3190 predicate(UseAVX == 0);
3191 match(Set dst (MulF dst con));
3192 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3193 ins_cost(150);
3194 ins_encode %{
3195 __ mulss($dst$$XMMRegister, $constantaddress($con));
3196 %}
3197 ins_pipe(pipe_slow);
3198 %}
3199
3200 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
3201 predicate(UseAVX > 0);
3202 match(Set dst (MulF src1 src2));
3203
3204 format %{ "vmulss $dst, $src1, $src2" %}
3205 ins_cost(150);
3206 ins_encode %{
3207 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3208 %}
3209 ins_pipe(pipe_slow);
3210 %}
3211
3212 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
3213 predicate(UseAVX > 0);
3214 match(Set dst (MulF src1 (LoadF src2)));
3215
3216 format %{ "vmulss $dst, $src1, $src2" %}
3217 ins_cost(150);
3218 ins_encode %{
3219 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3220 %}
3221 ins_pipe(pipe_slow);
3222 %}
3223
3224 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
3225 predicate(UseAVX > 0);
3226 match(Set dst (MulF src con));
3227
3228 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3229 ins_cost(150);
3230 ins_encode %{
3231 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3232 %}
3233 ins_pipe(pipe_slow);
3234 %}
3235
3236 instruct mulD_reg(regD dst, regD src) %{
3237 predicate(UseAVX == 0);
3238 match(Set dst (MulD dst src));
3239
3240 format %{ "mulsd $dst, $src" %}
3241 ins_cost(150);
3242 ins_encode %{
3243 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
3244 %}
3245 ins_pipe(pipe_slow);
3246 %}
3247
3248 instruct mulD_mem(regD dst, memory src) %{
3249 predicate(UseAVX == 0);
3250 match(Set dst (MulD dst (LoadD src)));
3251
3252 format %{ "mulsd $dst, $src" %}
3253 ins_cost(150);
3254 ins_encode %{
3255 __ mulsd($dst$$XMMRegister, $src$$Address);
3256 %}
3257 ins_pipe(pipe_slow);
3258 %}
3259
3260 instruct mulD_imm(regD dst, immD con) %{
3261 predicate(UseAVX == 0);
3262 match(Set dst (MulD dst con));
3263 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3264 ins_cost(150);
3265 ins_encode %{
3266 __ mulsd($dst$$XMMRegister, $constantaddress($con));
3267 %}
3268 ins_pipe(pipe_slow);
3269 %}
3270
3271 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
3272 predicate(UseAVX > 0);
3273 match(Set dst (MulD src1 src2));
3274
3275 format %{ "vmulsd $dst, $src1, $src2" %}
3276 ins_cost(150);
3277 ins_encode %{
3278 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3279 %}
3280 ins_pipe(pipe_slow);
3281 %}
3282
3283 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
3284 predicate(UseAVX > 0);
3285 match(Set dst (MulD src1 (LoadD src2)));
3286
3287 format %{ "vmulsd $dst, $src1, $src2" %}
3288 ins_cost(150);
3289 ins_encode %{
3290 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3291 %}
3292 ins_pipe(pipe_slow);
3293 %}
3294
3295 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
3296 predicate(UseAVX > 0);
3297 match(Set dst (MulD src con));
3298
3299 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3300 ins_cost(150);
3301 ins_encode %{
3302 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3303 %}
3304 ins_pipe(pipe_slow);
3305 %}
3306
3307 instruct divF_reg(regF dst, regF src) %{
3308 predicate(UseAVX == 0);
3309 match(Set dst (DivF dst src));
3310
3311 format %{ "divss $dst, $src" %}
3312 ins_cost(150);
3313 ins_encode %{
3314 __ divss($dst$$XMMRegister, $src$$XMMRegister);
3315 %}
3316 ins_pipe(pipe_slow);
3317 %}
3318
3319 instruct divF_mem(regF dst, memory src) %{
3320 predicate(UseAVX == 0);
3321 match(Set dst (DivF dst (LoadF src)));
3322
3323 format %{ "divss $dst, $src" %}
3324 ins_cost(150);
3325 ins_encode %{
3326 __ divss($dst$$XMMRegister, $src$$Address);
3327 %}
3328 ins_pipe(pipe_slow);
3329 %}
3330
3331 instruct divF_imm(regF dst, immF con) %{
3332 predicate(UseAVX == 0);
3333 match(Set dst (DivF dst con));
3334 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3335 ins_cost(150);
3336 ins_encode %{
3337 __ divss($dst$$XMMRegister, $constantaddress($con));
3338 %}
3339 ins_pipe(pipe_slow);
3340 %}
3341
3342 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
3343 predicate(UseAVX > 0);
3344 match(Set dst (DivF src1 src2));
3345
3346 format %{ "vdivss $dst, $src1, $src2" %}
3347 ins_cost(150);
3348 ins_encode %{
3349 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3350 %}
3351 ins_pipe(pipe_slow);
3352 %}
3353
3354 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
3355 predicate(UseAVX > 0);
3356 match(Set dst (DivF src1 (LoadF src2)));
3357
3358 format %{ "vdivss $dst, $src1, $src2" %}
3359 ins_cost(150);
3360 ins_encode %{
3361 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3362 %}
3363 ins_pipe(pipe_slow);
3364 %}
3365
3366 instruct divF_reg_imm(regF dst, regF src, immF con) %{
3367 predicate(UseAVX > 0);
3368 match(Set dst (DivF src con));
3369
3370 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3371 ins_cost(150);
3372 ins_encode %{
3373 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3374 %}
3375 ins_pipe(pipe_slow);
3376 %}
3377
3378 instruct divD_reg(regD dst, regD src) %{
3379 predicate(UseAVX == 0);
3380 match(Set dst (DivD dst src));
3381
3382 format %{ "divsd $dst, $src" %}
3383 ins_cost(150);
3384 ins_encode %{
3385 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
3386 %}
3387 ins_pipe(pipe_slow);
3388 %}
3389
3390 instruct divD_mem(regD dst, memory src) %{
3391 predicate(UseAVX == 0);
3392 match(Set dst (DivD dst (LoadD src)));
3393
3394 format %{ "divsd $dst, $src" %}
3395 ins_cost(150);
3396 ins_encode %{
3397 __ divsd($dst$$XMMRegister, $src$$Address);
3398 %}
3399 ins_pipe(pipe_slow);
3400 %}
3401
3402 instruct divD_imm(regD dst, immD con) %{
3403 predicate(UseAVX == 0);
3404 match(Set dst (DivD dst con));
3405 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3406 ins_cost(150);
3407 ins_encode %{
3408 __ divsd($dst$$XMMRegister, $constantaddress($con));
3409 %}
3410 ins_pipe(pipe_slow);
3411 %}
3412
3413 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
3414 predicate(UseAVX > 0);
3415 match(Set dst (DivD src1 src2));
3416
3417 format %{ "vdivsd $dst, $src1, $src2" %}
3418 ins_cost(150);
3419 ins_encode %{
3420 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3421 %}
3422 ins_pipe(pipe_slow);
3423 %}
3424
3425 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
3426 predicate(UseAVX > 0);
3427 match(Set dst (DivD src1 (LoadD src2)));
3428
3429 format %{ "vdivsd $dst, $src1, $src2" %}
3430 ins_cost(150);
3431 ins_encode %{
3432 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3433 %}
3434 ins_pipe(pipe_slow);
3435 %}
3436
3437 instruct divD_reg_imm(regD dst, regD src, immD con) %{
3438 predicate(UseAVX > 0);
3439 match(Set dst (DivD src con));
3440
3441 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3442 ins_cost(150);
3443 ins_encode %{
3444 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3445 %}
3446 ins_pipe(pipe_slow);
3447 %}
3448
3449 instruct absF_reg(regF dst) %{
3450 predicate(UseAVX == 0);
3451 match(Set dst (AbsF dst));
3452 ins_cost(150);
3453 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
3454 ins_encode %{
3455 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
3456 %}
3457 ins_pipe(pipe_slow);
3458 %}
3459
3460 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
3461 predicate(UseAVX > 0);
3462 match(Set dst (AbsF src));
3463 ins_cost(150);
3464 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
3465 ins_encode %{
3466 int vlen_enc = Assembler::AVX_128bit;
3467 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
3468 ExternalAddress(float_signmask()), vlen_enc);
3469 %}
3470 ins_pipe(pipe_slow);
3471 %}
3472
3473 instruct absD_reg(regD dst) %{
3474 predicate(UseAVX == 0);
3475 match(Set dst (AbsD dst));
3476 ins_cost(150);
3477 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
3478 "# abs double by sign masking" %}
3479 ins_encode %{
3480 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
3481 %}
3482 ins_pipe(pipe_slow);
3483 %}
3484
3485 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
3486 predicate(UseAVX > 0);
3487 match(Set dst (AbsD src));
3488 ins_cost(150);
3489 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
3490 "# abs double by sign masking" %}
3491 ins_encode %{
3492 int vlen_enc = Assembler::AVX_128bit;
3493 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
3494 ExternalAddress(double_signmask()), vlen_enc);
3495 %}
3496 ins_pipe(pipe_slow);
3497 %}
3498
3499 instruct negF_reg(regF dst) %{
3500 predicate(UseAVX == 0);
3501 match(Set dst (NegF dst));
3502 ins_cost(150);
3503 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
3504 ins_encode %{
3505 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
3506 %}
3507 ins_pipe(pipe_slow);
3508 %}
3509
3510 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
3511 predicate(UseAVX > 0);
3512 match(Set dst (NegF src));
3513 ins_cost(150);
3514 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
3515 ins_encode %{
3516 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
3517 ExternalAddress(float_signflip()));
3518 %}
3519 ins_pipe(pipe_slow);
3520 %}
3521
3522 instruct negD_reg(regD dst) %{
3523 predicate(UseAVX == 0);
3524 match(Set dst (NegD dst));
3525 ins_cost(150);
3526 format %{ "xorpd $dst, [0x8000000000000000]\t"
3527 "# neg double by sign flipping" %}
3528 ins_encode %{
3529 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3530 %}
3531 ins_pipe(pipe_slow);
3532 %}
3533
3534 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
3535 predicate(UseAVX > 0);
3536 match(Set dst (NegD src));
3537 ins_cost(150);
3538 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
3539 "# neg double by sign flipping" %}
3540 ins_encode %{
3541 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
3542 ExternalAddress(double_signflip()));
3543 %}
3544 ins_pipe(pipe_slow);
3545 %}
3546
3547 // sqrtss instruction needs destination register to be pre initialized for best performance
3548 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3549 instruct sqrtF_reg(regF dst) %{
3550 match(Set dst (SqrtF dst));
3551 format %{ "sqrtss $dst, $dst" %}
3552 ins_encode %{
3553 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
3554 %}
3555 ins_pipe(pipe_slow);
3556 %}
3557
3558 // sqrtsd instruction needs destination register to be pre initialized for best performance
3559 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3560 instruct sqrtD_reg(regD dst) %{
3561 match(Set dst (SqrtD dst));
3562 format %{ "sqrtsd $dst, $dst" %}
3563 ins_encode %{
3564 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
3565 %}
3566 ins_pipe(pipe_slow);
3567 %}
3568
3569 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
3570 effect(TEMP tmp);
3571 match(Set dst (ConvF2HF src));
3572 ins_cost(125);
3573 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
3574 ins_encode %{
3575 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
3576 %}
3577 ins_pipe( pipe_slow );
3578 %}
3579
3580 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
3581 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
3582 effect(TEMP ktmp, TEMP rtmp);
3583 match(Set mem (StoreC mem (ConvF2HF src)));
3584 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
3585 ins_encode %{
3586 __ movl($rtmp$$Register, 0x1);
3587 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
3588 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
3589 %}
3590 ins_pipe( pipe_slow );
3591 %}
3592
3593 instruct vconvF2HF(vec dst, vec src) %{
3594 match(Set dst (VectorCastF2HF src));
3595 format %{ "vector_conv_F2HF $dst $src" %}
3596 ins_encode %{
3597 int vlen_enc = vector_length_encoding(this, $src);
3598 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
3599 %}
3600 ins_pipe( pipe_slow );
3601 %}
3602
3603 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
3604 predicate(n->as_StoreVector()->memory_size() >= 16);
3605 match(Set mem (StoreVector mem (VectorCastF2HF src)));
3606 format %{ "vcvtps2ph $mem,$src" %}
3607 ins_encode %{
3608 int vlen_enc = vector_length_encoding(this, $src);
3609 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
3610 %}
3611 ins_pipe( pipe_slow );
3612 %}
3613
3614 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
3615 match(Set dst (ConvHF2F src));
3616 format %{ "vcvtph2ps $dst,$src" %}
3617 ins_encode %{
3618 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
3619 %}
3620 ins_pipe( pipe_slow );
3621 %}
3622
3623 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
3624 match(Set dst (VectorCastHF2F (LoadVector mem)));
3625 format %{ "vcvtph2ps $dst,$mem" %}
3626 ins_encode %{
3627 int vlen_enc = vector_length_encoding(this);
3628 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
3629 %}
3630 ins_pipe( pipe_slow );
3631 %}
3632
3633 instruct vconvHF2F(vec dst, vec src) %{
3634 match(Set dst (VectorCastHF2F src));
3635 ins_cost(125);
3636 format %{ "vector_conv_HF2F $dst,$src" %}
3637 ins_encode %{
3638 int vlen_enc = vector_length_encoding(this);
3639 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
3640 %}
3641 ins_pipe( pipe_slow );
3642 %}
3643
3644 // ---------------------------------------- VectorReinterpret ------------------------------------
3645 instruct reinterpret_mask(kReg dst) %{
3646 predicate(n->bottom_type()->isa_vectmask() &&
3647 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
3648 match(Set dst (VectorReinterpret dst));
3649 ins_cost(125);
3650 format %{ "vector_reinterpret $dst\t!" %}
3651 ins_encode %{
3652 // empty
3653 %}
3654 ins_pipe( pipe_slow );
3655 %}
3656
3657 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
3658 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3659 n->bottom_type()->isa_vectmask() &&
3660 n->in(1)->bottom_type()->isa_vectmask() &&
3661 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
3662 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3663 match(Set dst (VectorReinterpret src));
3664 effect(TEMP xtmp);
3665 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
3666 ins_encode %{
3667 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
3668 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3669 assert(src_sz == dst_sz , "src and dst size mismatch");
3670 int vlen_enc = vector_length_encoding(src_sz);
3671 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3672 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3673 %}
3674 ins_pipe( pipe_slow );
3675 %}
3676
3677 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
3678 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3679 n->bottom_type()->isa_vectmask() &&
3680 n->in(1)->bottom_type()->isa_vectmask() &&
3681 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
3682 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
3683 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3684 match(Set dst (VectorReinterpret src));
3685 effect(TEMP xtmp);
3686 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
3687 ins_encode %{
3688 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
3689 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3690 assert(src_sz == dst_sz , "src and dst size mismatch");
3691 int vlen_enc = vector_length_encoding(src_sz);
3692 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3693 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3694 %}
3695 ins_pipe( pipe_slow );
3696 %}
3697
3698 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
3699 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3700 n->bottom_type()->isa_vectmask() &&
3701 n->in(1)->bottom_type()->isa_vectmask() &&
3702 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
3703 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
3704 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3705 match(Set dst (VectorReinterpret src));
3706 effect(TEMP xtmp);
3707 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
3708 ins_encode %{
3709 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
3710 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3711 assert(src_sz == dst_sz , "src and dst size mismatch");
3712 int vlen_enc = vector_length_encoding(src_sz);
3713 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3714 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3715 %}
3716 ins_pipe( pipe_slow );
3717 %}
3718
3719 instruct reinterpret(vec dst) %{
3720 predicate(!n->bottom_type()->isa_vectmask() &&
3721 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
3722 match(Set dst (VectorReinterpret dst));
3723 ins_cost(125);
3724 format %{ "vector_reinterpret $dst\t!" %}
3725 ins_encode %{
3726 // empty
3727 %}
3728 ins_pipe( pipe_slow );
3729 %}
3730
3731 instruct reinterpret_expand(vec dst, vec src) %{
3732 predicate(UseAVX == 0 &&
3733 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3734 match(Set dst (VectorReinterpret src));
3735 ins_cost(125);
3736 effect(TEMP dst);
3737 format %{ "vector_reinterpret_expand $dst,$src" %}
3738 ins_encode %{
3739 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
3740 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
3741
3742 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3743 if (src_vlen_in_bytes == 4) {
3744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
3745 } else {
3746 assert(src_vlen_in_bytes == 8, "");
3747 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
3748 }
3749 __ pand($dst$$XMMRegister, $src$$XMMRegister);
3750 %}
3751 ins_pipe( pipe_slow );
3752 %}
3753
3754 instruct vreinterpret_expand4(legVec dst, vec src) %{
3755 predicate(UseAVX > 0 &&
3756 !n->bottom_type()->isa_vectmask() &&
3757 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
3758 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3759 match(Set dst (VectorReinterpret src));
3760 ins_cost(125);
3761 format %{ "vector_reinterpret_expand $dst,$src" %}
3762 ins_encode %{
3763 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
3764 %}
3765 ins_pipe( pipe_slow );
3766 %}
3767
3768
3769 instruct vreinterpret_expand(legVec dst, vec src) %{
3770 predicate(UseAVX > 0 &&
3771 !n->bottom_type()->isa_vectmask() &&
3772 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
3773 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3774 match(Set dst (VectorReinterpret src));
3775 ins_cost(125);
3776 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
3777 ins_encode %{
3778 switch (Matcher::vector_length_in_bytes(this, $src)) {
3779 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3780 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3781 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3782 default: ShouldNotReachHere();
3783 }
3784 %}
3785 ins_pipe( pipe_slow );
3786 %}
3787
3788 instruct reinterpret_shrink(vec dst, legVec src) %{
3789 predicate(!n->bottom_type()->isa_vectmask() &&
3790 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
3791 match(Set dst (VectorReinterpret src));
3792 ins_cost(125);
3793 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
3794 ins_encode %{
3795 switch (Matcher::vector_length_in_bytes(this)) {
3796 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
3797 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
3798 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3799 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3800 default: ShouldNotReachHere();
3801 }
3802 %}
3803 ins_pipe( pipe_slow );
3804 %}
3805
3806 // ----------------------------------------------------------------------------------------------------
3807
3808 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3809 match(Set dst (RoundDoubleMode src rmode));
3810 format %{ "roundsd $dst,$src" %}
3811 ins_cost(150);
3812 ins_encode %{
3813 assert(UseSSE >= 4, "required");
3814 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
3815 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3816 }
3817 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
3818 %}
3819 ins_pipe(pipe_slow);
3820 %}
3821
3822 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
3823 match(Set dst (RoundDoubleMode con rmode));
3824 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
3825 ins_cost(150);
3826 ins_encode %{
3827 assert(UseSSE >= 4, "required");
3828 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
3829 %}
3830 ins_pipe(pipe_slow);
3831 %}
3832
3833 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
3834 predicate(Matcher::vector_length(n) < 8);
3835 match(Set dst (RoundDoubleModeV src rmode));
3836 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
3837 ins_encode %{
3838 assert(UseAVX > 0, "required");
3839 int vlen_enc = vector_length_encoding(this);
3840 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
3841 %}
3842 ins_pipe( pipe_slow );
3843 %}
3844
3845 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
3846 predicate(Matcher::vector_length(n) == 8);
3847 match(Set dst (RoundDoubleModeV src rmode));
3848 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
3849 ins_encode %{
3850 assert(UseAVX > 2, "required");
3851 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
3852 %}
3853 ins_pipe( pipe_slow );
3854 %}
3855
3856 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
3857 predicate(Matcher::vector_length(n) < 8);
3858 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3859 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
3860 ins_encode %{
3861 assert(UseAVX > 0, "required");
3862 int vlen_enc = vector_length_encoding(this);
3863 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
3864 %}
3865 ins_pipe( pipe_slow );
3866 %}
3867
3868 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
3869 predicate(Matcher::vector_length(n) == 8);
3870 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
3871 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
3872 ins_encode %{
3873 assert(UseAVX > 2, "required");
3874 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
3875 %}
3876 ins_pipe( pipe_slow );
3877 %}
3878
3879 instruct onspinwait() %{
3880 match(OnSpinWait);
3881 ins_cost(200);
3882
3883 format %{
3884 $$template
3885 $$emit$$"pause\t! membar_onspinwait"
3886 %}
3887 ins_encode %{
3888 __ pause();
3889 %}
3890 ins_pipe(pipe_slow);
3891 %}
3892
3893 // a * b + c
3894 instruct fmaD_reg(regD a, regD b, regD c) %{
3895 match(Set c (FmaD c (Binary a b)));
3896 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
3897 ins_cost(150);
3898 ins_encode %{
3899 assert(UseFMA, "Needs FMA instructions support.");
3900 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3901 %}
3902 ins_pipe( pipe_slow );
3903 %}
3904
3905 // a * b + c
3906 instruct fmaF_reg(regF a, regF b, regF c) %{
3907 match(Set c (FmaF c (Binary a b)));
3908 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
3909 ins_cost(150);
3910 ins_encode %{
3911 assert(UseFMA, "Needs FMA instructions support.");
3912 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
3913 %}
3914 ins_pipe( pipe_slow );
3915 %}
3916
3917 // ====================VECTOR INSTRUCTIONS=====================================
3918
3919 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
3920 instruct MoveVec2Leg(legVec dst, vec src) %{
3921 match(Set dst src);
3922 format %{ "" %}
3923 ins_encode %{
3924 ShouldNotReachHere();
3925 %}
3926 ins_pipe( fpu_reg_reg );
3927 %}
3928
3929 instruct MoveLeg2Vec(vec dst, legVec src) %{
3930 match(Set dst src);
3931 format %{ "" %}
3932 ins_encode %{
3933 ShouldNotReachHere();
3934 %}
3935 ins_pipe( fpu_reg_reg );
3936 %}
3937
3938 // ============================================================================
3939
3940 // Load vectors generic operand pattern
3941 instruct loadV(vec dst, memory mem) %{
3942 match(Set dst (LoadVector mem));
3943 ins_cost(125);
3944 format %{ "load_vector $dst,$mem" %}
3945 ins_encode %{
3946 BasicType bt = Matcher::vector_element_basic_type(this);
3947 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
3948 %}
3949 ins_pipe( pipe_slow );
3950 %}
3951
3952 // Store vectors generic operand pattern.
3953 instruct storeV(memory mem, vec src) %{
3954 match(Set mem (StoreVector mem src));
3955 ins_cost(145);
3956 format %{ "store_vector $mem,$src\n\t" %}
3957 ins_encode %{
3958 switch (Matcher::vector_length_in_bytes(this, $src)) {
3959 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
3960 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
3961 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
3962 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
3963 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3964 default: ShouldNotReachHere();
3965 }
3966 %}
3967 ins_pipe( pipe_slow );
3968 %}
3969
3970 // ---------------------------------------- Gather ------------------------------------
3971
3972 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
3973
3974 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
3975 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
3976 Matcher::vector_length_in_bytes(n) <= 32);
3977 match(Set dst (LoadVectorGather mem idx));
3978 effect(TEMP dst, TEMP tmp, TEMP mask);
3979 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
3980 ins_encode %{
3981 int vlen_enc = vector_length_encoding(this);
3982 BasicType elem_bt = Matcher::vector_element_basic_type(this);
3983 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3984 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
3985 __ lea($tmp$$Register, $mem$$Address);
3986 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
3987 %}
3988 ins_pipe( pipe_slow );
3989 %}
3990
3991
3992 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
3993 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
3994 !is_subword_type(Matcher::vector_element_basic_type(n)));
3995 match(Set dst (LoadVectorGather mem idx));
3996 effect(TEMP dst, TEMP tmp, TEMP ktmp);
3997 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
3998 ins_encode %{
3999 int vlen_enc = vector_length_encoding(this);
4000 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4001 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
4002 __ lea($tmp$$Register, $mem$$Address);
4003 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4004 %}
4005 ins_pipe( pipe_slow );
4006 %}
4007
4008 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4009 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
4010 !is_subword_type(Matcher::vector_element_basic_type(n)));
4011 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
4012 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
4013 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
4014 ins_encode %{
4015 assert(UseAVX > 2, "sanity");
4016 int vlen_enc = vector_length_encoding(this);
4017 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4018 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4019 // Note: Since gather instruction partially updates the opmask register used
4020 // for predication hense moving mask operand to a temporary.
4021 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4022 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4023 __ lea($tmp$$Register, $mem$$Address);
4024 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
4025 %}
4026 ins_pipe( pipe_slow );
4027 %}
4028
4029 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
4030 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4031 match(Set dst (LoadVectorGather mem idx_base));
4032 effect(TEMP tmp, TEMP rtmp);
4033 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
4034 ins_encode %{
4035 int vlen_enc = vector_length_encoding(this);
4036 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4037 __ lea($tmp$$Register, $mem$$Address);
4038 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
4039 %}
4040 ins_pipe( pipe_slow );
4041 %}
4042
4043 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
4044 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
4045 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4046 match(Set dst (LoadVectorGather mem idx_base));
4047 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
4048 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
4049 ins_encode %{
4050 int vlen_enc = vector_length_encoding(this);
4051 int vector_len = Matcher::vector_length(this);
4052 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4053 __ lea($tmp$$Register, $mem$$Address);
4054 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4055 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
4056 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
4057 %}
4058 ins_pipe( pipe_slow );
4059 %}
4060
4061 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
4062 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4063 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4064 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4065 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4066 ins_encode %{
4067 int vlen_enc = vector_length_encoding(this);
4068 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4069 __ xorq($mask_idx$$Register, $mask_idx$$Register);
4070 __ lea($tmp$$Register, $mem$$Address);
4071 __ kmovql($rtmp2$$Register, $mask$$KRegister);
4072 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4073 %}
4074 ins_pipe( pipe_slow );
4075 %}
4076
4077 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
4078 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
4079 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4080 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4081 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4082 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4083 ins_encode %{
4084 int vlen_enc = vector_length_encoding(this);
4085 int vector_len = Matcher::vector_length(this);
4086 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4087 __ xorq($mask_idx$$Register, $mask_idx$$Register);
4088 __ lea($tmp$$Register, $mem$$Address);
4089 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4090 __ kmovql($rtmp2$$Register, $mask$$KRegister);
4091 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4092 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4093 %}
4094 ins_pipe( pipe_slow );
4095 %}
4096
4097 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
4098 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
4099 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4100 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
4101 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
4102 ins_encode %{
4103 int vlen_enc = vector_length_encoding(this);
4104 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4105 __ lea($tmp$$Register, $mem$$Address);
4106 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4107 if (elem_bt == T_SHORT) {
4108 __ movl($mask_idx$$Register, 0x55555555);
4109 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4110 }
4111 __ xorl($mask_idx$$Register, $mask_idx$$Register);
4112 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
4113 %}
4114 ins_pipe( pipe_slow );
4115 %}
4116
4117 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
4118 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
4119 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
4120 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
4121 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
4122 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
4123 ins_encode %{
4124 int vlen_enc = vector_length_encoding(this);
4125 int vector_len = Matcher::vector_length(this);
4126 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4127 __ lea($tmp$$Register, $mem$$Address);
4128 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
4129 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
4130 if (elem_bt == T_SHORT) {
4131 __ movl($mask_idx$$Register, 0x55555555);
4132 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
4133 }
4134 __ xorl($mask_idx$$Register, $mask_idx$$Register);
4135 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
4136 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
4137 %}
4138 ins_pipe( pipe_slow );
4139 %}
4140
4141 // ====================Scatter=======================================
4142
4143 // Scatter INT, LONG, FLOAT, DOUBLE
4144
4145 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
4146 predicate(UseAVX > 2);
4147 match(Set mem (StoreVectorScatter mem (Binary src idx)));
4148 effect(TEMP tmp, TEMP ktmp);
4149 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
4150 ins_encode %{
4151 int vlen_enc = vector_length_encoding(this, $src);
4152 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4153
4154 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4155 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4156
4157 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
4158 __ lea($tmp$$Register, $mem$$Address);
4159 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4160 %}
4161 ins_pipe( pipe_slow );
4162 %}
4163
4164 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
4165 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
4166 effect(TEMP tmp, TEMP ktmp);
4167 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
4168 ins_encode %{
4169 int vlen_enc = vector_length_encoding(this, $src);
4170 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
4171 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
4172 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
4173 // Note: Since scatter instruction partially updates the opmask register used
4174 // for predication hense moving mask operand to a temporary.
4175 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
4176 __ lea($tmp$$Register, $mem$$Address);
4177 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
4178 %}
4179 ins_pipe( pipe_slow );
4180 %}
4181
4182 // ====================REPLICATE=======================================
4183
4184 // Replicate byte scalar to be vector
4185 instruct vReplB_reg(vec dst, rRegI src) %{
4186 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
4187 match(Set dst (Replicate src));
4188 format %{ "replicateB $dst,$src" %}
4189 ins_encode %{
4190 uint vlen = Matcher::vector_length(this);
4191 if (UseAVX >= 2) {
4192 int vlen_enc = vector_length_encoding(this);
4193 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4194 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
4195 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
4196 } else {
4197 __ movdl($dst$$XMMRegister, $src$$Register);
4198 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4199 }
4200 } else {
4201 assert(UseAVX < 2, "");
4202 __ movdl($dst$$XMMRegister, $src$$Register);
4203 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
4204 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4205 if (vlen >= 16) {
4206 assert(vlen == 16, "");
4207 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4208 }
4209 }
4210 %}
4211 ins_pipe( pipe_slow );
4212 %}
4213
4214 instruct ReplB_mem(vec dst, memory mem) %{
4215 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
4216 match(Set dst (Replicate (LoadB mem)));
4217 format %{ "replicateB $dst,$mem" %}
4218 ins_encode %{
4219 int vlen_enc = vector_length_encoding(this);
4220 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
4221 %}
4222 ins_pipe( pipe_slow );
4223 %}
4224
4225 // ====================ReplicateS=======================================
4226
4227 instruct vReplS_reg(vec dst, rRegI src) %{
4228 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
4229 match(Set dst (Replicate src));
4230 format %{ "replicateS $dst,$src" %}
4231 ins_encode %{
4232 uint vlen = Matcher::vector_length(this);
4233 int vlen_enc = vector_length_encoding(this);
4234 if (UseAVX >= 2) {
4235 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
4236 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
4237 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
4238 } else {
4239 __ movdl($dst$$XMMRegister, $src$$Register);
4240 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4241 }
4242 } else {
4243 assert(UseAVX < 2, "");
4244 __ movdl($dst$$XMMRegister, $src$$Register);
4245 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4246 if (vlen >= 8) {
4247 assert(vlen == 8, "");
4248 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4249 }
4250 }
4251 %}
4252 ins_pipe( pipe_slow );
4253 %}
4254
4255 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
4256 match(Set dst (Replicate con));
4257 effect(TEMP rtmp);
4258 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
4259 ins_encode %{
4260 int vlen_enc = vector_length_encoding(this);
4261 BasicType bt = Matcher::vector_element_basic_type(this);
4262 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
4263 __ movl($rtmp$$Register, $con$$constant);
4264 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4265 %}
4266 ins_pipe( pipe_slow );
4267 %}
4268
4269 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
4270 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
4271 match(Set dst (Replicate src));
4272 effect(TEMP rtmp);
4273 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
4274 ins_encode %{
4275 int vlen_enc = vector_length_encoding(this);
4276 __ vmovw($rtmp$$Register, $src$$XMMRegister);
4277 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
4278 %}
4279 ins_pipe( pipe_slow );
4280 %}
4281
4282 instruct ReplS_mem(vec dst, memory mem) %{
4283 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
4284 match(Set dst (Replicate (LoadS mem)));
4285 format %{ "replicateS $dst,$mem" %}
4286 ins_encode %{
4287 int vlen_enc = vector_length_encoding(this);
4288 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
4289 %}
4290 ins_pipe( pipe_slow );
4291 %}
4292
4293 // ====================ReplicateI=======================================
4294
4295 instruct ReplI_reg(vec dst, rRegI src) %{
4296 predicate(Matcher::vector_element_basic_type(n) == T_INT);
4297 match(Set dst (Replicate src));
4298 format %{ "replicateI $dst,$src" %}
4299 ins_encode %{
4300 uint vlen = Matcher::vector_length(this);
4301 int vlen_enc = vector_length_encoding(this);
4302 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4303 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
4304 } else if (VM_Version::supports_avx2()) {
4305 __ movdl($dst$$XMMRegister, $src$$Register);
4306 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4307 } else {
4308 __ movdl($dst$$XMMRegister, $src$$Register);
4309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4310 }
4311 %}
4312 ins_pipe( pipe_slow );
4313 %}
4314
4315 instruct ReplI_mem(vec dst, memory mem) %{
4316 predicate(Matcher::vector_element_basic_type(n) == T_INT);
4317 match(Set dst (Replicate (LoadI mem)));
4318 format %{ "replicateI $dst,$mem" %}
4319 ins_encode %{
4320 int vlen_enc = vector_length_encoding(this);
4321 if (VM_Version::supports_avx2()) {
4322 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4323 } else if (VM_Version::supports_avx()) {
4324 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4325 } else {
4326 __ movdl($dst$$XMMRegister, $mem$$Address);
4327 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
4328 }
4329 %}
4330 ins_pipe( pipe_slow );
4331 %}
4332
4333 instruct ReplI_imm(vec dst, immI con) %{
4334 predicate(Matcher::is_non_long_integral_vector(n));
4335 match(Set dst (Replicate con));
4336 format %{ "replicateI $dst,$con" %}
4337 ins_encode %{
4338 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
4339 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
4340 type2aelembytes(Matcher::vector_element_basic_type(this))));
4341 BasicType bt = Matcher::vector_element_basic_type(this);
4342 int vlen = Matcher::vector_length_in_bytes(this);
4343 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
4344 %}
4345 ins_pipe( pipe_slow );
4346 %}
4347
4348 // Replicate scalar zero to be vector
4349 instruct ReplI_zero(vec dst, immI_0 zero) %{
4350 predicate(Matcher::is_non_long_integral_vector(n));
4351 match(Set dst (Replicate zero));
4352 format %{ "replicateI $dst,$zero" %}
4353 ins_encode %{
4354 int vlen_enc = vector_length_encoding(this);
4355 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4356 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4357 } else {
4358 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4359 }
4360 %}
4361 ins_pipe( fpu_reg_reg );
4362 %}
4363
4364 instruct ReplI_M1(vec dst, immI_M1 con) %{
4365 predicate(Matcher::is_non_long_integral_vector(n));
4366 match(Set dst (Replicate con));
4367 format %{ "vallones $dst" %}
4368 ins_encode %{
4369 int vector_len = vector_length_encoding(this);
4370 __ vallones($dst$$XMMRegister, vector_len);
4371 %}
4372 ins_pipe( pipe_slow );
4373 %}
4374
4375 // ====================ReplicateL=======================================
4376
4377 // Replicate long (8 byte) scalar to be vector
4378 instruct ReplL_reg(vec dst, rRegL src) %{
4379 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4380 match(Set dst (Replicate src));
4381 format %{ "replicateL $dst,$src" %}
4382 ins_encode %{
4383 int vlen = Matcher::vector_length(this);
4384 int vlen_enc = vector_length_encoding(this);
4385 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
4386 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
4387 } else if (VM_Version::supports_avx2()) {
4388 __ movdq($dst$$XMMRegister, $src$$Register);
4389 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4390 } else {
4391 __ movdq($dst$$XMMRegister, $src$$Register);
4392 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4393 }
4394 %}
4395 ins_pipe( pipe_slow );
4396 %}
4397
4398 instruct ReplL_mem(vec dst, memory mem) %{
4399 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4400 match(Set dst (Replicate (LoadL mem)));
4401 format %{ "replicateL $dst,$mem" %}
4402 ins_encode %{
4403 int vlen_enc = vector_length_encoding(this);
4404 if (VM_Version::supports_avx2()) {
4405 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
4406 } else if (VM_Version::supports_sse3()) {
4407 __ movddup($dst$$XMMRegister, $mem$$Address);
4408 } else {
4409 __ movq($dst$$XMMRegister, $mem$$Address);
4410 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
4411 }
4412 %}
4413 ins_pipe( pipe_slow );
4414 %}
4415
4416 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
4417 instruct ReplL_imm(vec dst, immL con) %{
4418 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4419 match(Set dst (Replicate con));
4420 format %{ "replicateL $dst,$con" %}
4421 ins_encode %{
4422 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
4423 int vlen = Matcher::vector_length_in_bytes(this);
4424 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
4425 %}
4426 ins_pipe( pipe_slow );
4427 %}
4428
4429 instruct ReplL_zero(vec dst, immL0 zero) %{
4430 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4431 match(Set dst (Replicate zero));
4432 format %{ "replicateL $dst,$zero" %}
4433 ins_encode %{
4434 int vlen_enc = vector_length_encoding(this);
4435 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
4436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4437 } else {
4438 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4439 }
4440 %}
4441 ins_pipe( fpu_reg_reg );
4442 %}
4443
4444 instruct ReplL_M1(vec dst, immL_M1 con) %{
4445 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
4446 match(Set dst (Replicate con));
4447 format %{ "vallones $dst" %}
4448 ins_encode %{
4449 int vector_len = vector_length_encoding(this);
4450 __ vallones($dst$$XMMRegister, vector_len);
4451 %}
4452 ins_pipe( pipe_slow );
4453 %}
4454
4455 // ====================ReplicateF=======================================
4456
4457 instruct vReplF_reg(vec dst, vlRegF src) %{
4458 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4459 match(Set dst (Replicate src));
4460 format %{ "replicateF $dst,$src" %}
4461 ins_encode %{
4462 uint vlen = Matcher::vector_length(this);
4463 int vlen_enc = vector_length_encoding(this);
4464 if (vlen <= 4) {
4465 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4466 } else if (VM_Version::supports_avx2()) {
4467 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4468 } else {
4469 assert(vlen == 8, "sanity");
4470 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
4471 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4472 }
4473 %}
4474 ins_pipe( pipe_slow );
4475 %}
4476
4477 instruct ReplF_reg(vec dst, vlRegF src) %{
4478 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4479 match(Set dst (Replicate src));
4480 format %{ "replicateF $dst,$src" %}
4481 ins_encode %{
4482 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
4483 %}
4484 ins_pipe( pipe_slow );
4485 %}
4486
4487 instruct ReplF_mem(vec dst, memory mem) %{
4488 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
4489 match(Set dst (Replicate (LoadF mem)));
4490 format %{ "replicateF $dst,$mem" %}
4491 ins_encode %{
4492 int vlen_enc = vector_length_encoding(this);
4493 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
4494 %}
4495 ins_pipe( pipe_slow );
4496 %}
4497
4498 // Replicate float scalar immediate to be vector by loading from const table.
4499 instruct ReplF_imm(vec dst, immF con) %{
4500 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4501 match(Set dst (Replicate con));
4502 format %{ "replicateF $dst,$con" %}
4503 ins_encode %{
4504 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
4505 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
4506 int vlen = Matcher::vector_length_in_bytes(this);
4507 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
4508 %}
4509 ins_pipe( pipe_slow );
4510 %}
4511
4512 instruct ReplF_zero(vec dst, immF0 zero) %{
4513 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
4514 match(Set dst (Replicate zero));
4515 format %{ "replicateF $dst,$zero" %}
4516 ins_encode %{
4517 int vlen_enc = vector_length_encoding(this);
4518 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4519 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4520 } else {
4521 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4522 }
4523 %}
4524 ins_pipe( fpu_reg_reg );
4525 %}
4526
4527 // ====================ReplicateD=======================================
4528
4529 // Replicate double (8 bytes) scalar to be vector
4530 instruct vReplD_reg(vec dst, vlRegD src) %{
4531 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4532 match(Set dst (Replicate src));
4533 format %{ "replicateD $dst,$src" %}
4534 ins_encode %{
4535 uint vlen = Matcher::vector_length(this);
4536 int vlen_enc = vector_length_encoding(this);
4537 if (vlen <= 2) {
4538 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
4539 } else if (VM_Version::supports_avx2()) {
4540 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
4541 } else {
4542 assert(vlen == 4, "sanity");
4543 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
4544 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
4545 }
4546 %}
4547 ins_pipe( pipe_slow );
4548 %}
4549
4550 instruct ReplD_reg(vec dst, vlRegD src) %{
4551 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4552 match(Set dst (Replicate src));
4553 format %{ "replicateD $dst,$src" %}
4554 ins_encode %{
4555 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
4556 %}
4557 ins_pipe( pipe_slow );
4558 %}
4559
4560 instruct ReplD_mem(vec dst, memory mem) %{
4561 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
4562 match(Set dst (Replicate (LoadD mem)));
4563 format %{ "replicateD $dst,$mem" %}
4564 ins_encode %{
4565 if (Matcher::vector_length(this) >= 4) {
4566 int vlen_enc = vector_length_encoding(this);
4567 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
4568 } else {
4569 __ movddup($dst$$XMMRegister, $mem$$Address);
4570 }
4571 %}
4572 ins_pipe( pipe_slow );
4573 %}
4574
4575 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
4576 instruct ReplD_imm(vec dst, immD con) %{
4577 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4578 match(Set dst (Replicate con));
4579 format %{ "replicateD $dst,$con" %}
4580 ins_encode %{
4581 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
4582 int vlen = Matcher::vector_length_in_bytes(this);
4583 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
4584 %}
4585 ins_pipe( pipe_slow );
4586 %}
4587
4588 instruct ReplD_zero(vec dst, immD0 zero) %{
4589 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
4590 match(Set dst (Replicate zero));
4591 format %{ "replicateD $dst,$zero" %}
4592 ins_encode %{
4593 int vlen_enc = vector_length_encoding(this);
4594 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
4595 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4596 } else {
4597 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
4598 }
4599 %}
4600 ins_pipe( fpu_reg_reg );
4601 %}
4602
4603 // ====================VECTOR INSERT=======================================
4604
4605 instruct insert(vec dst, rRegI val, immU8 idx) %{
4606 predicate(Matcher::vector_length_in_bytes(n) < 32);
4607 match(Set dst (VectorInsert (Binary dst val) idx));
4608 format %{ "vector_insert $dst,$val,$idx" %}
4609 ins_encode %{
4610 assert(UseSSE >= 4, "required");
4611 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
4612
4613 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4614
4615 assert(is_integral_type(elem_bt), "");
4616 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4617
4618 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
4619 %}
4620 ins_pipe( pipe_slow );
4621 %}
4622
4623 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
4624 predicate(Matcher::vector_length_in_bytes(n) == 32);
4625 match(Set dst (VectorInsert (Binary src val) idx));
4626 effect(TEMP vtmp);
4627 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4628 ins_encode %{
4629 int vlen_enc = Assembler::AVX_256bit;
4630 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4631 int elem_per_lane = 16/type2aelembytes(elem_bt);
4632 int log2epr = log2(elem_per_lane);
4633
4634 assert(is_integral_type(elem_bt), "sanity");
4635 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4636
4637 uint x_idx = $idx$$constant & right_n_bits(log2epr);
4638 uint y_idx = ($idx$$constant >> log2epr) & 1;
4639 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4640 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4641 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4642 %}
4643 ins_pipe( pipe_slow );
4644 %}
4645
4646 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
4647 predicate(Matcher::vector_length_in_bytes(n) == 64);
4648 match(Set dst (VectorInsert (Binary src val) idx));
4649 effect(TEMP vtmp);
4650 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4651 ins_encode %{
4652 assert(UseAVX > 2, "sanity");
4653
4654 BasicType elem_bt = Matcher::vector_element_basic_type(this);
4655 int elem_per_lane = 16/type2aelembytes(elem_bt);
4656 int log2epr = log2(elem_per_lane);
4657
4658 assert(is_integral_type(elem_bt), "");
4659 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4660
4661 uint x_idx = $idx$$constant & right_n_bits(log2epr);
4662 uint y_idx = ($idx$$constant >> log2epr) & 3;
4663 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4664 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4665 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4666 %}
4667 ins_pipe( pipe_slow );
4668 %}
4669
4670 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
4671 predicate(Matcher::vector_length(n) == 2);
4672 match(Set dst (VectorInsert (Binary dst val) idx));
4673 format %{ "vector_insert $dst,$val,$idx" %}
4674 ins_encode %{
4675 assert(UseSSE >= 4, "required");
4676 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4677 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4678
4679 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
4680 %}
4681 ins_pipe( pipe_slow );
4682 %}
4683
4684 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
4685 predicate(Matcher::vector_length(n) == 4);
4686 match(Set dst (VectorInsert (Binary src val) idx));
4687 effect(TEMP vtmp);
4688 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4689 ins_encode %{
4690 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
4691 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4692
4693 uint x_idx = $idx$$constant & right_n_bits(1);
4694 uint y_idx = ($idx$$constant >> 1) & 1;
4695 int vlen_enc = Assembler::AVX_256bit;
4696 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4697 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4698 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4699 %}
4700 ins_pipe( pipe_slow );
4701 %}
4702
4703 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
4704 predicate(Matcher::vector_length(n) == 8);
4705 match(Set dst (VectorInsert (Binary src val) idx));
4706 effect(TEMP vtmp);
4707 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4708 ins_encode %{
4709 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
4710 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4711
4712 uint x_idx = $idx$$constant & right_n_bits(1);
4713 uint y_idx = ($idx$$constant >> 1) & 3;
4714 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4715 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
4716 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4717 %}
4718 ins_pipe( pipe_slow );
4719 %}
4720
4721 instruct insertF(vec dst, regF val, immU8 idx) %{
4722 predicate(Matcher::vector_length(n) < 8);
4723 match(Set dst (VectorInsert (Binary dst val) idx));
4724 format %{ "vector_insert $dst,$val,$idx" %}
4725 ins_encode %{
4726 assert(UseSSE >= 4, "sanity");
4727
4728 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4729 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4730
4731 uint x_idx = $idx$$constant & right_n_bits(2);
4732 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4733 %}
4734 ins_pipe( pipe_slow );
4735 %}
4736
4737 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
4738 predicate(Matcher::vector_length(n) >= 8);
4739 match(Set dst (VectorInsert (Binary src val) idx));
4740 effect(TEMP vtmp);
4741 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4742 ins_encode %{
4743 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
4744 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4745
4746 int vlen = Matcher::vector_length(this);
4747 uint x_idx = $idx$$constant & right_n_bits(2);
4748 if (vlen == 8) {
4749 uint y_idx = ($idx$$constant >> 2) & 1;
4750 int vlen_enc = Assembler::AVX_256bit;
4751 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4752 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4753 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4754 } else {
4755 assert(vlen == 16, "sanity");
4756 uint y_idx = ($idx$$constant >> 2) & 3;
4757 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4758 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
4759 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4760 }
4761 %}
4762 ins_pipe( pipe_slow );
4763 %}
4764
4765 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
4766 predicate(Matcher::vector_length(n) == 2);
4767 match(Set dst (VectorInsert (Binary dst val) idx));
4768 effect(TEMP tmp);
4769 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
4770 ins_encode %{
4771 assert(UseSSE >= 4, "sanity");
4772 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4773 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4774
4775 __ movq($tmp$$Register, $val$$XMMRegister);
4776 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
4777 %}
4778 ins_pipe( pipe_slow );
4779 %}
4780
4781 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
4782 predicate(Matcher::vector_length(n) == 4);
4783 match(Set dst (VectorInsert (Binary src val) idx));
4784 effect(TEMP vtmp, TEMP tmp);
4785 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
4786 ins_encode %{
4787 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4788 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4789
4790 uint x_idx = $idx$$constant & right_n_bits(1);
4791 uint y_idx = ($idx$$constant >> 1) & 1;
4792 int vlen_enc = Assembler::AVX_256bit;
4793 __ movq($tmp$$Register, $val$$XMMRegister);
4794 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4795 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
4796 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4797 %}
4798 ins_pipe( pipe_slow );
4799 %}
4800
4801 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
4802 predicate(Matcher::vector_length(n) == 8);
4803 match(Set dst (VectorInsert (Binary src val) idx));
4804 effect(TEMP tmp, TEMP vtmp);
4805 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
4806 ins_encode %{
4807 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
4808 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
4809
4810 uint x_idx = $idx$$constant & right_n_bits(1);
4811 uint y_idx = ($idx$$constant >> 1) & 3;
4812 __ movq($tmp$$Register, $val$$XMMRegister);
4813 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
4814 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
4815 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
4816 %}
4817 ins_pipe( pipe_slow );
4818 %}
4819
4820 // ====================REDUCTION ARITHMETIC=======================================
4821
4822 // =======================Int Reduction==========================================
4823
4824 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
4825 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
4826 match(Set dst (AddReductionVI src1 src2));
4827 match(Set dst (MulReductionVI src1 src2));
4828 match(Set dst (AndReductionV src1 src2));
4829 match(Set dst ( OrReductionV src1 src2));
4830 match(Set dst (XorReductionV src1 src2));
4831 match(Set dst (MinReductionV src1 src2));
4832 match(Set dst (MaxReductionV src1 src2));
4833 effect(TEMP vtmp1, TEMP vtmp2);
4834 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4835 ins_encode %{
4836 int opcode = this->ideal_Opcode();
4837 int vlen = Matcher::vector_length(this, $src2);
4838 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4839 %}
4840 ins_pipe( pipe_slow );
4841 %}
4842
4843 // =======================Long Reduction==========================================
4844
4845 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
4846 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
4847 match(Set dst (AddReductionVL src1 src2));
4848 match(Set dst (MulReductionVL src1 src2));
4849 match(Set dst (AndReductionV src1 src2));
4850 match(Set dst ( OrReductionV src1 src2));
4851 match(Set dst (XorReductionV src1 src2));
4852 match(Set dst (MinReductionV src1 src2));
4853 match(Set dst (MaxReductionV src1 src2));
4854 effect(TEMP vtmp1, TEMP vtmp2);
4855 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4856 ins_encode %{
4857 int opcode = this->ideal_Opcode();
4858 int vlen = Matcher::vector_length(this, $src2);
4859 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4860 %}
4861 ins_pipe( pipe_slow );
4862 %}
4863
4864 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
4865 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
4866 match(Set dst (AddReductionVL src1 src2));
4867 match(Set dst (MulReductionVL src1 src2));
4868 match(Set dst (AndReductionV src1 src2));
4869 match(Set dst ( OrReductionV src1 src2));
4870 match(Set dst (XorReductionV src1 src2));
4871 match(Set dst (MinReductionV src1 src2));
4872 match(Set dst (MaxReductionV src1 src2));
4873 effect(TEMP vtmp1, TEMP vtmp2);
4874 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4875 ins_encode %{
4876 int opcode = this->ideal_Opcode();
4877 int vlen = Matcher::vector_length(this, $src2);
4878 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4879 %}
4880 ins_pipe( pipe_slow );
4881 %}
4882
4883 // =======================Float Reduction==========================================
4884
4885 instruct reductionF128(regF dst, vec src, vec vtmp) %{
4886 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
4887 match(Set dst (AddReductionVF dst src));
4888 match(Set dst (MulReductionVF dst src));
4889 effect(TEMP dst, TEMP vtmp);
4890 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
4891 ins_encode %{
4892 int opcode = this->ideal_Opcode();
4893 int vlen = Matcher::vector_length(this, $src);
4894 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
4895 %}
4896 ins_pipe( pipe_slow );
4897 %}
4898
4899 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
4900 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
4901 match(Set dst (AddReductionVF dst src));
4902 match(Set dst (MulReductionVF dst src));
4903 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4904 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
4905 ins_encode %{
4906 int opcode = this->ideal_Opcode();
4907 int vlen = Matcher::vector_length(this, $src);
4908 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4909 %}
4910 ins_pipe( pipe_slow );
4911 %}
4912
4913 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
4914 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
4915 match(Set dst (AddReductionVF dst src));
4916 match(Set dst (MulReductionVF dst src));
4917 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4918 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
4919 ins_encode %{
4920 int opcode = this->ideal_Opcode();
4921 int vlen = Matcher::vector_length(this, $src);
4922 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4923 %}
4924 ins_pipe( pipe_slow );
4925 %}
4926
4927
4928 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
4929 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4930 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4931 // src1 contains reduction identity
4932 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
4933 match(Set dst (AddReductionVF src1 src2));
4934 match(Set dst (MulReductionVF src1 src2));
4935 effect(TEMP dst);
4936 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
4937 ins_encode %{
4938 int opcode = this->ideal_Opcode();
4939 int vlen = Matcher::vector_length(this, $src2);
4940 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
4941 %}
4942 ins_pipe( pipe_slow );
4943 %}
4944
4945 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
4946 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4947 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4948 // src1 contains reduction identity
4949 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
4950 match(Set dst (AddReductionVF src1 src2));
4951 match(Set dst (MulReductionVF src1 src2));
4952 effect(TEMP dst, TEMP vtmp);
4953 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
4954 ins_encode %{
4955 int opcode = this->ideal_Opcode();
4956 int vlen = Matcher::vector_length(this, $src2);
4957 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
4958 %}
4959 ins_pipe( pipe_slow );
4960 %}
4961
4962 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
4963 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4964 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4965 // src1 contains reduction identity
4966 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
4967 match(Set dst (AddReductionVF src1 src2));
4968 match(Set dst (MulReductionVF src1 src2));
4969 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4970 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4971 ins_encode %{
4972 int opcode = this->ideal_Opcode();
4973 int vlen = Matcher::vector_length(this, $src2);
4974 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4975 %}
4976 ins_pipe( pipe_slow );
4977 %}
4978
4979 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
4980 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
4981 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
4982 // src1 contains reduction identity
4983 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
4984 match(Set dst (AddReductionVF src1 src2));
4985 match(Set dst (MulReductionVF src1 src2));
4986 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
4987 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
4988 ins_encode %{
4989 int opcode = this->ideal_Opcode();
4990 int vlen = Matcher::vector_length(this, $src2);
4991 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
4992 %}
4993 ins_pipe( pipe_slow );
4994 %}
4995
4996 // =======================Double Reduction==========================================
4997
4998 instruct reduction2D(regD dst, vec src, vec vtmp) %{
4999 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
5000 match(Set dst (AddReductionVD dst src));
5001 match(Set dst (MulReductionVD dst src));
5002 effect(TEMP dst, TEMP vtmp);
5003 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
5004 ins_encode %{
5005 int opcode = this->ideal_Opcode();
5006 int vlen = Matcher::vector_length(this, $src);
5007 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
5008 %}
5009 ins_pipe( pipe_slow );
5010 %}
5011
5012 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5013 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
5014 match(Set dst (AddReductionVD dst src));
5015 match(Set dst (MulReductionVD dst src));
5016 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5017 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5018 ins_encode %{
5019 int opcode = this->ideal_Opcode();
5020 int vlen = Matcher::vector_length(this, $src);
5021 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5022 %}
5023 ins_pipe( pipe_slow );
5024 %}
5025
5026 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5027 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
5028 match(Set dst (AddReductionVD dst src));
5029 match(Set dst (MulReductionVD dst src));
5030 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5031 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
5032 ins_encode %{
5033 int opcode = this->ideal_Opcode();
5034 int vlen = Matcher::vector_length(this, $src);
5035 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5036 %}
5037 ins_pipe( pipe_slow );
5038 %}
5039
5040 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
5041 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5042 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5043 // src1 contains reduction identity
5044 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5045 match(Set dst (AddReductionVD src1 src2));
5046 match(Set dst (MulReductionVD src1 src2));
5047 effect(TEMP dst);
5048 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
5049 ins_encode %{
5050 int opcode = this->ideal_Opcode();
5051 int vlen = Matcher::vector_length(this, $src2);
5052 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5053 %}
5054 ins_pipe( pipe_slow );
5055 %}
5056
5057 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
5058 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5059 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5060 // src1 contains reduction identity
5061 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5062 match(Set dst (AddReductionVD src1 src2));
5063 match(Set dst (MulReductionVD src1 src2));
5064 effect(TEMP dst, TEMP vtmp);
5065 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5066 ins_encode %{
5067 int opcode = this->ideal_Opcode();
5068 int vlen = Matcher::vector_length(this, $src2);
5069 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5070 %}
5071 ins_pipe( pipe_slow );
5072 %}
5073
5074 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5075 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5076 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5077 // src1 contains reduction identity
5078 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5079 match(Set dst (AddReductionVD src1 src2));
5080 match(Set dst (MulReductionVD src1 src2));
5081 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5082 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5083 ins_encode %{
5084 int opcode = this->ideal_Opcode();
5085 int vlen = Matcher::vector_length(this, $src2);
5086 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5087 %}
5088 ins_pipe( pipe_slow );
5089 %}
5090
5091 // =======================Byte Reduction==========================================
5092
5093 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5094 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
5095 match(Set dst (AddReductionVI src1 src2));
5096 match(Set dst (AndReductionV src1 src2));
5097 match(Set dst ( OrReductionV src1 src2));
5098 match(Set dst (XorReductionV src1 src2));
5099 match(Set dst (MinReductionV src1 src2));
5100 match(Set dst (MaxReductionV src1 src2));
5101 effect(TEMP vtmp1, TEMP vtmp2);
5102 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5103 ins_encode %{
5104 int opcode = this->ideal_Opcode();
5105 int vlen = Matcher::vector_length(this, $src2);
5106 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5107 %}
5108 ins_pipe( pipe_slow );
5109 %}
5110
5111 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5112 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
5113 match(Set dst (AddReductionVI src1 src2));
5114 match(Set dst (AndReductionV src1 src2));
5115 match(Set dst ( OrReductionV src1 src2));
5116 match(Set dst (XorReductionV src1 src2));
5117 match(Set dst (MinReductionV src1 src2));
5118 match(Set dst (MaxReductionV src1 src2));
5119 effect(TEMP vtmp1, TEMP vtmp2);
5120 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5121 ins_encode %{
5122 int opcode = this->ideal_Opcode();
5123 int vlen = Matcher::vector_length(this, $src2);
5124 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5125 %}
5126 ins_pipe( pipe_slow );
5127 %}
5128
5129 // =======================Short Reduction==========================================
5130
5131 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5132 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
5133 match(Set dst (AddReductionVI src1 src2));
5134 match(Set dst (MulReductionVI src1 src2));
5135 match(Set dst (AndReductionV src1 src2));
5136 match(Set dst ( OrReductionV src1 src2));
5137 match(Set dst (XorReductionV src1 src2));
5138 match(Set dst (MinReductionV src1 src2));
5139 match(Set dst (MaxReductionV src1 src2));
5140 effect(TEMP vtmp1, TEMP vtmp2);
5141 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5142 ins_encode %{
5143 int opcode = this->ideal_Opcode();
5144 int vlen = Matcher::vector_length(this, $src2);
5145 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5146 %}
5147 ins_pipe( pipe_slow );
5148 %}
5149
5150 // =======================Mul Reduction==========================================
5151
5152 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
5153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5154 Matcher::vector_length(n->in(2)) <= 32); // src2
5155 match(Set dst (MulReductionVI src1 src2));
5156 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5157 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5158 ins_encode %{
5159 int opcode = this->ideal_Opcode();
5160 int vlen = Matcher::vector_length(this, $src2);
5161 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5162 %}
5163 ins_pipe( pipe_slow );
5164 %}
5165
5166 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5167 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
5168 Matcher::vector_length(n->in(2)) == 64); // src2
5169 match(Set dst (MulReductionVI src1 src2));
5170 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5171 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
5172 ins_encode %{
5173 int opcode = this->ideal_Opcode();
5174 int vlen = Matcher::vector_length(this, $src2);
5175 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5176 %}
5177 ins_pipe( pipe_slow );
5178 %}
5179
5180 //--------------------Min/Max Float Reduction --------------------
5181 // Float Min Reduction
5182 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
5183 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5184 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5185 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5186 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5187 Matcher::vector_length(n->in(2)) == 2);
5188 match(Set dst (MinReductionV src1 src2));
5189 match(Set dst (MaxReductionV src1 src2));
5190 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5191 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5192 ins_encode %{
5193 assert(UseAVX > 0, "sanity");
5194
5195 int opcode = this->ideal_Opcode();
5196 int vlen = Matcher::vector_length(this, $src2);
5197 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5198 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5199 %}
5200 ins_pipe( pipe_slow );
5201 %}
5202
5203 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
5204 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5205 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5206 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5207 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5208 Matcher::vector_length(n->in(2)) >= 4);
5209 match(Set dst (MinReductionV src1 src2));
5210 match(Set dst (MaxReductionV src1 src2));
5211 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5212 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5213 ins_encode %{
5214 assert(UseAVX > 0, "sanity");
5215
5216 int opcode = this->ideal_Opcode();
5217 int vlen = Matcher::vector_length(this, $src2);
5218 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
5219 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5220 %}
5221 ins_pipe( pipe_slow );
5222 %}
5223
5224 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
5225 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
5226 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5227 Matcher::vector_length(n->in(2)) == 2);
5228 match(Set dst (MinReductionV dst src));
5229 match(Set dst (MaxReductionV dst src));
5230 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
5231 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
5232 ins_encode %{
5233 assert(UseAVX > 0, "sanity");
5234
5235 int opcode = this->ideal_Opcode();
5236 int vlen = Matcher::vector_length(this, $src);
5237 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5238 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
5239 %}
5240 ins_pipe( pipe_slow );
5241 %}
5242
5243
5244 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
5245 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
5246 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5247 Matcher::vector_length(n->in(2)) >= 4);
5248 match(Set dst (MinReductionV dst src));
5249 match(Set dst (MaxReductionV dst src));
5250 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
5251 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
5252 ins_encode %{
5253 assert(UseAVX > 0, "sanity");
5254
5255 int opcode = this->ideal_Opcode();
5256 int vlen = Matcher::vector_length(this, $src);
5257 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
5258 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
5259 %}
5260 ins_pipe( pipe_slow );
5261 %}
5262
5263 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
5264 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5265 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5266 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5267 Matcher::vector_length(n->in(2)) == 2);
5268 match(Set dst (MinReductionV src1 src2));
5269 match(Set dst (MaxReductionV src1 src2));
5270 effect(TEMP dst, TEMP xtmp1);
5271 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
5272 ins_encode %{
5273 int opcode = this->ideal_Opcode();
5274 int vlen = Matcher::vector_length(this, $src2);
5275 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5276 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
5277 %}
5278 ins_pipe( pipe_slow );
5279 %}
5280
5281 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
5282 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5283 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
5284 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
5285 Matcher::vector_length(n->in(2)) >= 4);
5286 match(Set dst (MinReductionV src1 src2));
5287 match(Set dst (MaxReductionV src1 src2));
5288 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5289 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
5290 ins_encode %{
5291 int opcode = this->ideal_Opcode();
5292 int vlen = Matcher::vector_length(this, $src2);
5293 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
5294 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5295 %}
5296 ins_pipe( pipe_slow );
5297 %}
5298
5299 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
5300 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5301 Matcher::vector_length(n->in(2)) == 2);
5302 match(Set dst (MinReductionV dst src));
5303 match(Set dst (MaxReductionV dst src));
5304 effect(TEMP dst, TEMP xtmp1);
5305 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
5306 ins_encode %{
5307 int opcode = this->ideal_Opcode();
5308 int vlen = Matcher::vector_length(this, $src);
5309 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
5310 $xtmp1$$XMMRegister);
5311 %}
5312 ins_pipe( pipe_slow );
5313 %}
5314
5315 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
5316 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
5317 Matcher::vector_length(n->in(2)) >= 4);
5318 match(Set dst (MinReductionV dst src));
5319 match(Set dst (MaxReductionV dst src));
5320 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5321 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
5322 ins_encode %{
5323 int opcode = this->ideal_Opcode();
5324 int vlen = Matcher::vector_length(this, $src);
5325 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
5326 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5327 %}
5328 ins_pipe( pipe_slow );
5329 %}
5330
5331 //--------------------Min Double Reduction --------------------
5332 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
5333 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
5334 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5335 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5336 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5337 Matcher::vector_length(n->in(2)) == 2);
5338 match(Set dst (MinReductionV src1 src2));
5339 match(Set dst (MaxReductionV src1 src2));
5340 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5341 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5342 ins_encode %{
5343 assert(UseAVX > 0, "sanity");
5344
5345 int opcode = this->ideal_Opcode();
5346 int vlen = Matcher::vector_length(this, $src2);
5347 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5348 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5349 %}
5350 ins_pipe( pipe_slow );
5351 %}
5352
5353 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
5354 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
5355 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5356 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5357 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5358 Matcher::vector_length(n->in(2)) >= 4);
5359 match(Set dst (MinReductionV src1 src2));
5360 match(Set dst (MaxReductionV src1 src2));
5361 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5362 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5363 ins_encode %{
5364 assert(UseAVX > 0, "sanity");
5365
5366 int opcode = this->ideal_Opcode();
5367 int vlen = Matcher::vector_length(this, $src2);
5368 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
5369 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5370 %}
5371 ins_pipe( pipe_slow );
5372 %}
5373
5374
5375 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
5376 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
5377 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5378 Matcher::vector_length(n->in(2)) == 2);
5379 match(Set dst (MinReductionV dst src));
5380 match(Set dst (MaxReductionV dst src));
5381 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
5382 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
5383 ins_encode %{
5384 assert(UseAVX > 0, "sanity");
5385
5386 int opcode = this->ideal_Opcode();
5387 int vlen = Matcher::vector_length(this, $src);
5388 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5389 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
5390 %}
5391 ins_pipe( pipe_slow );
5392 %}
5393
5394 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
5395 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
5396 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5397 Matcher::vector_length(n->in(2)) >= 4);
5398 match(Set dst (MinReductionV dst src));
5399 match(Set dst (MaxReductionV dst src));
5400 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
5401 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
5402 ins_encode %{
5403 assert(UseAVX > 0, "sanity");
5404
5405 int opcode = this->ideal_Opcode();
5406 int vlen = Matcher::vector_length(this, $src);
5407 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5408 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
5409 %}
5410 ins_pipe( pipe_slow );
5411 %}
5412
5413 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
5414 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5415 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5416 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5417 Matcher::vector_length(n->in(2)) == 2);
5418 match(Set dst (MinReductionV src1 src2));
5419 match(Set dst (MaxReductionV src1 src2));
5420 effect(TEMP dst, TEMP xtmp1);
5421 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
5422 ins_encode %{
5423 int opcode = this->ideal_Opcode();
5424 int vlen = Matcher::vector_length(this, $src2);
5425 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
5426 xnoreg, xnoreg, $xtmp1$$XMMRegister);
5427 %}
5428 ins_pipe( pipe_slow );
5429 %}
5430
5431 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
5432 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5433 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
5434 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
5435 Matcher::vector_length(n->in(2)) >= 4);
5436 match(Set dst (MinReductionV src1 src2));
5437 match(Set dst (MaxReductionV src1 src2));
5438 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5439 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
5440 ins_encode %{
5441 int opcode = this->ideal_Opcode();
5442 int vlen = Matcher::vector_length(this, $src2);
5443 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
5444 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5445 %}
5446 ins_pipe( pipe_slow );
5447 %}
5448
5449
5450 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
5451 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5452 Matcher::vector_length(n->in(2)) == 2);
5453 match(Set dst (MinReductionV dst src));
5454 match(Set dst (MaxReductionV dst src));
5455 effect(TEMP dst, TEMP xtmp1);
5456 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
5457 ins_encode %{
5458 int opcode = this->ideal_Opcode();
5459 int vlen = Matcher::vector_length(this, $src);
5460 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5461 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
5462 %}
5463 ins_pipe( pipe_slow );
5464 %}
5465
5466 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
5467 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
5468 Matcher::vector_length(n->in(2)) >= 4);
5469 match(Set dst (MinReductionV dst src));
5470 match(Set dst (MaxReductionV dst src));
5471 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
5472 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
5473 ins_encode %{
5474 int opcode = this->ideal_Opcode();
5475 int vlen = Matcher::vector_length(this, $src);
5476 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
5477 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
5478 %}
5479 ins_pipe( pipe_slow );
5480 %}
5481
5482 // ====================VECTOR ARITHMETIC=======================================
5483
5484 // --------------------------------- ADD --------------------------------------
5485
5486 // Bytes vector add
5487 instruct vaddB(vec dst, vec src) %{
5488 predicate(UseAVX == 0);
5489 match(Set dst (AddVB dst src));
5490 format %{ "paddb $dst,$src\t! add packedB" %}
5491 ins_encode %{
5492 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5493 %}
5494 ins_pipe( pipe_slow );
5495 %}
5496
5497 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
5498 predicate(UseAVX > 0);
5499 match(Set dst (AddVB src1 src2));
5500 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
5501 ins_encode %{
5502 int vlen_enc = vector_length_encoding(this);
5503 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5504 %}
5505 ins_pipe( pipe_slow );
5506 %}
5507
5508 instruct vaddB_mem(vec dst, vec src, memory mem) %{
5509 predicate((UseAVX > 0) &&
5510 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5511 match(Set dst (AddVB src (LoadVector mem)));
5512 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
5513 ins_encode %{
5514 int vlen_enc = vector_length_encoding(this);
5515 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5516 %}
5517 ins_pipe( pipe_slow );
5518 %}
5519
5520 // Shorts/Chars vector add
5521 instruct vaddS(vec dst, vec src) %{
5522 predicate(UseAVX == 0);
5523 match(Set dst (AddVS dst src));
5524 format %{ "paddw $dst,$src\t! add packedS" %}
5525 ins_encode %{
5526 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5527 %}
5528 ins_pipe( pipe_slow );
5529 %}
5530
5531 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
5532 predicate(UseAVX > 0);
5533 match(Set dst (AddVS src1 src2));
5534 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
5535 ins_encode %{
5536 int vlen_enc = vector_length_encoding(this);
5537 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5538 %}
5539 ins_pipe( pipe_slow );
5540 %}
5541
5542 instruct vaddS_mem(vec dst, vec src, memory mem) %{
5543 predicate((UseAVX > 0) &&
5544 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5545 match(Set dst (AddVS src (LoadVector mem)));
5546 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
5547 ins_encode %{
5548 int vlen_enc = vector_length_encoding(this);
5549 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5550 %}
5551 ins_pipe( pipe_slow );
5552 %}
5553
5554 // Integers vector add
5555 instruct vaddI(vec dst, vec src) %{
5556 predicate(UseAVX == 0);
5557 match(Set dst (AddVI dst src));
5558 format %{ "paddd $dst,$src\t! add packedI" %}
5559 ins_encode %{
5560 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5561 %}
5562 ins_pipe( pipe_slow );
5563 %}
5564
5565 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
5566 predicate(UseAVX > 0);
5567 match(Set dst (AddVI src1 src2));
5568 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
5569 ins_encode %{
5570 int vlen_enc = vector_length_encoding(this);
5571 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5572 %}
5573 ins_pipe( pipe_slow );
5574 %}
5575
5576
5577 instruct vaddI_mem(vec dst, vec src, memory mem) %{
5578 predicate((UseAVX > 0) &&
5579 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5580 match(Set dst (AddVI src (LoadVector mem)));
5581 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
5582 ins_encode %{
5583 int vlen_enc = vector_length_encoding(this);
5584 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5585 %}
5586 ins_pipe( pipe_slow );
5587 %}
5588
5589 // Longs vector add
5590 instruct vaddL(vec dst, vec src) %{
5591 predicate(UseAVX == 0);
5592 match(Set dst (AddVL dst src));
5593 format %{ "paddq $dst,$src\t! add packedL" %}
5594 ins_encode %{
5595 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5596 %}
5597 ins_pipe( pipe_slow );
5598 %}
5599
5600 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
5601 predicate(UseAVX > 0);
5602 match(Set dst (AddVL src1 src2));
5603 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
5604 ins_encode %{
5605 int vlen_enc = vector_length_encoding(this);
5606 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5607 %}
5608 ins_pipe( pipe_slow );
5609 %}
5610
5611 instruct vaddL_mem(vec dst, vec src, memory mem) %{
5612 predicate((UseAVX > 0) &&
5613 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5614 match(Set dst (AddVL src (LoadVector mem)));
5615 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
5616 ins_encode %{
5617 int vlen_enc = vector_length_encoding(this);
5618 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5619 %}
5620 ins_pipe( pipe_slow );
5621 %}
5622
5623 // Floats vector add
5624 instruct vaddF(vec dst, vec src) %{
5625 predicate(UseAVX == 0);
5626 match(Set dst (AddVF dst src));
5627 format %{ "addps $dst,$src\t! add packedF" %}
5628 ins_encode %{
5629 __ addps($dst$$XMMRegister, $src$$XMMRegister);
5630 %}
5631 ins_pipe( pipe_slow );
5632 %}
5633
5634 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
5635 predicate(UseAVX > 0);
5636 match(Set dst (AddVF src1 src2));
5637 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
5638 ins_encode %{
5639 int vlen_enc = vector_length_encoding(this);
5640 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5641 %}
5642 ins_pipe( pipe_slow );
5643 %}
5644
5645 instruct vaddF_mem(vec dst, vec src, memory mem) %{
5646 predicate((UseAVX > 0) &&
5647 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5648 match(Set dst (AddVF src (LoadVector mem)));
5649 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
5650 ins_encode %{
5651 int vlen_enc = vector_length_encoding(this);
5652 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5653 %}
5654 ins_pipe( pipe_slow );
5655 %}
5656
5657 // Doubles vector add
5658 instruct vaddD(vec dst, vec src) %{
5659 predicate(UseAVX == 0);
5660 match(Set dst (AddVD dst src));
5661 format %{ "addpd $dst,$src\t! add packedD" %}
5662 ins_encode %{
5663 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5664 %}
5665 ins_pipe( pipe_slow );
5666 %}
5667
5668 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
5669 predicate(UseAVX > 0);
5670 match(Set dst (AddVD src1 src2));
5671 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
5672 ins_encode %{
5673 int vlen_enc = vector_length_encoding(this);
5674 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5675 %}
5676 ins_pipe( pipe_slow );
5677 %}
5678
5679 instruct vaddD_mem(vec dst, vec src, memory mem) %{
5680 predicate((UseAVX > 0) &&
5681 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5682 match(Set dst (AddVD src (LoadVector mem)));
5683 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
5684 ins_encode %{
5685 int vlen_enc = vector_length_encoding(this);
5686 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5687 %}
5688 ins_pipe( pipe_slow );
5689 %}
5690
5691 // --------------------------------- SUB --------------------------------------
5692
5693 // Bytes vector sub
5694 instruct vsubB(vec dst, vec src) %{
5695 predicate(UseAVX == 0);
5696 match(Set dst (SubVB dst src));
5697 format %{ "psubb $dst,$src\t! sub packedB" %}
5698 ins_encode %{
5699 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5700 %}
5701 ins_pipe( pipe_slow );
5702 %}
5703
5704 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
5705 predicate(UseAVX > 0);
5706 match(Set dst (SubVB src1 src2));
5707 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
5708 ins_encode %{
5709 int vlen_enc = vector_length_encoding(this);
5710 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5711 %}
5712 ins_pipe( pipe_slow );
5713 %}
5714
5715 instruct vsubB_mem(vec dst, vec src, memory mem) %{
5716 predicate((UseAVX > 0) &&
5717 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5718 match(Set dst (SubVB src (LoadVector mem)));
5719 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
5720 ins_encode %{
5721 int vlen_enc = vector_length_encoding(this);
5722 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5723 %}
5724 ins_pipe( pipe_slow );
5725 %}
5726
5727 // Shorts/Chars vector sub
5728 instruct vsubS(vec dst, vec src) %{
5729 predicate(UseAVX == 0);
5730 match(Set dst (SubVS dst src));
5731 format %{ "psubw $dst,$src\t! sub packedS" %}
5732 ins_encode %{
5733 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5734 %}
5735 ins_pipe( pipe_slow );
5736 %}
5737
5738
5739 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
5740 predicate(UseAVX > 0);
5741 match(Set dst (SubVS src1 src2));
5742 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
5743 ins_encode %{
5744 int vlen_enc = vector_length_encoding(this);
5745 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5746 %}
5747 ins_pipe( pipe_slow );
5748 %}
5749
5750 instruct vsubS_mem(vec dst, vec src, memory mem) %{
5751 predicate((UseAVX > 0) &&
5752 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5753 match(Set dst (SubVS src (LoadVector mem)));
5754 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
5755 ins_encode %{
5756 int vlen_enc = vector_length_encoding(this);
5757 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5758 %}
5759 ins_pipe( pipe_slow );
5760 %}
5761
5762 // Integers vector sub
5763 instruct vsubI(vec dst, vec src) %{
5764 predicate(UseAVX == 0);
5765 match(Set dst (SubVI dst src));
5766 format %{ "psubd $dst,$src\t! sub packedI" %}
5767 ins_encode %{
5768 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5769 %}
5770 ins_pipe( pipe_slow );
5771 %}
5772
5773 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
5774 predicate(UseAVX > 0);
5775 match(Set dst (SubVI src1 src2));
5776 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
5777 ins_encode %{
5778 int vlen_enc = vector_length_encoding(this);
5779 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5780 %}
5781 ins_pipe( pipe_slow );
5782 %}
5783
5784 instruct vsubI_mem(vec dst, vec src, memory mem) %{
5785 predicate((UseAVX > 0) &&
5786 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5787 match(Set dst (SubVI src (LoadVector mem)));
5788 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
5789 ins_encode %{
5790 int vlen_enc = vector_length_encoding(this);
5791 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5792 %}
5793 ins_pipe( pipe_slow );
5794 %}
5795
5796 // Longs vector sub
5797 instruct vsubL(vec dst, vec src) %{
5798 predicate(UseAVX == 0);
5799 match(Set dst (SubVL dst src));
5800 format %{ "psubq $dst,$src\t! sub packedL" %}
5801 ins_encode %{
5802 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
5803 %}
5804 ins_pipe( pipe_slow );
5805 %}
5806
5807 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
5808 predicate(UseAVX > 0);
5809 match(Set dst (SubVL src1 src2));
5810 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
5811 ins_encode %{
5812 int vlen_enc = vector_length_encoding(this);
5813 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5814 %}
5815 ins_pipe( pipe_slow );
5816 %}
5817
5818
5819 instruct vsubL_mem(vec dst, vec src, memory mem) %{
5820 predicate((UseAVX > 0) &&
5821 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5822 match(Set dst (SubVL src (LoadVector mem)));
5823 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
5824 ins_encode %{
5825 int vlen_enc = vector_length_encoding(this);
5826 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5827 %}
5828 ins_pipe( pipe_slow );
5829 %}
5830
5831 // Floats vector sub
5832 instruct vsubF(vec dst, vec src) %{
5833 predicate(UseAVX == 0);
5834 match(Set dst (SubVF dst src));
5835 format %{ "subps $dst,$src\t! sub packedF" %}
5836 ins_encode %{
5837 __ subps($dst$$XMMRegister, $src$$XMMRegister);
5838 %}
5839 ins_pipe( pipe_slow );
5840 %}
5841
5842 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
5843 predicate(UseAVX > 0);
5844 match(Set dst (SubVF src1 src2));
5845 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
5846 ins_encode %{
5847 int vlen_enc = vector_length_encoding(this);
5848 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5849 %}
5850 ins_pipe( pipe_slow );
5851 %}
5852
5853 instruct vsubF_mem(vec dst, vec src, memory mem) %{
5854 predicate((UseAVX > 0) &&
5855 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5856 match(Set dst (SubVF src (LoadVector mem)));
5857 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
5858 ins_encode %{
5859 int vlen_enc = vector_length_encoding(this);
5860 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5861 %}
5862 ins_pipe( pipe_slow );
5863 %}
5864
5865 // Doubles vector sub
5866 instruct vsubD(vec dst, vec src) %{
5867 predicate(UseAVX == 0);
5868 match(Set dst (SubVD dst src));
5869 format %{ "subpd $dst,$src\t! sub packedD" %}
5870 ins_encode %{
5871 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
5872 %}
5873 ins_pipe( pipe_slow );
5874 %}
5875
5876 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
5877 predicate(UseAVX > 0);
5878 match(Set dst (SubVD src1 src2));
5879 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
5880 ins_encode %{
5881 int vlen_enc = vector_length_encoding(this);
5882 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5883 %}
5884 ins_pipe( pipe_slow );
5885 %}
5886
5887 instruct vsubD_mem(vec dst, vec src, memory mem) %{
5888 predicate((UseAVX > 0) &&
5889 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5890 match(Set dst (SubVD src (LoadVector mem)));
5891 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
5892 ins_encode %{
5893 int vlen_enc = vector_length_encoding(this);
5894 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5895 %}
5896 ins_pipe( pipe_slow );
5897 %}
5898
5899 // --------------------------------- MUL --------------------------------------
5900
5901 // Byte vector mul
5902 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
5903 predicate(Matcher::vector_length_in_bytes(n) <= 8);
5904 match(Set dst (MulVB src1 src2));
5905 effect(TEMP dst, TEMP xtmp);
5906 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5907 ins_encode %{
5908 assert(UseSSE > 3, "required");
5909 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
5910 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
5911 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5912 __ psllw($dst$$XMMRegister, 8);
5913 __ psrlw($dst$$XMMRegister, 8);
5914 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
5915 %}
5916 ins_pipe( pipe_slow );
5917 %}
5918
5919 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
5920 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
5921 match(Set dst (MulVB src1 src2));
5922 effect(TEMP dst, TEMP xtmp);
5923 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
5924 ins_encode %{
5925 assert(UseSSE > 3, "required");
5926 // Odd-index elements
5927 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
5928 __ psrlw($dst$$XMMRegister, 8);
5929 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
5930 __ psrlw($xtmp$$XMMRegister, 8);
5931 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
5932 __ psllw($dst$$XMMRegister, 8);
5933 // Even-index elements
5934 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
5935 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
5936 __ psllw($xtmp$$XMMRegister, 8);
5937 __ psrlw($xtmp$$XMMRegister, 8);
5938 // Combine
5939 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
5940 %}
5941 ins_pipe( pipe_slow );
5942 %}
5943
5944 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
5945 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
5946 match(Set dst (MulVB src1 src2));
5947 effect(TEMP xtmp1, TEMP xtmp2);
5948 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
5949 ins_encode %{
5950 int vlen_enc = vector_length_encoding(this);
5951 // Odd-index elements
5952 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
5953 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
5954 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5955 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
5956 // Even-index elements
5957 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5958 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5959 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
5960 // Combine
5961 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
5962 %}
5963 ins_pipe( pipe_slow );
5964 %}
5965
5966 // Shorts/Chars vector mul
5967 instruct vmulS(vec dst, vec src) %{
5968 predicate(UseAVX == 0);
5969 match(Set dst (MulVS dst src));
5970 format %{ "pmullw $dst,$src\t! mul packedS" %}
5971 ins_encode %{
5972 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
5973 %}
5974 ins_pipe( pipe_slow );
5975 %}
5976
5977 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
5978 predicate(UseAVX > 0);
5979 match(Set dst (MulVS src1 src2));
5980 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
5981 ins_encode %{
5982 int vlen_enc = vector_length_encoding(this);
5983 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
5984 %}
5985 ins_pipe( pipe_slow );
5986 %}
5987
5988 instruct vmulS_mem(vec dst, vec src, memory mem) %{
5989 predicate((UseAVX > 0) &&
5990 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
5991 match(Set dst (MulVS src (LoadVector mem)));
5992 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
5993 ins_encode %{
5994 int vlen_enc = vector_length_encoding(this);
5995 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
5996 %}
5997 ins_pipe( pipe_slow );
5998 %}
5999
6000 // Integers vector mul
6001 instruct vmulI(vec dst, vec src) %{
6002 predicate(UseAVX == 0);
6003 match(Set dst (MulVI dst src));
6004 format %{ "pmulld $dst,$src\t! mul packedI" %}
6005 ins_encode %{
6006 assert(UseSSE > 3, "required");
6007 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6008 %}
6009 ins_pipe( pipe_slow );
6010 %}
6011
6012 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
6013 predicate(UseAVX > 0);
6014 match(Set dst (MulVI src1 src2));
6015 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
6016 ins_encode %{
6017 int vlen_enc = vector_length_encoding(this);
6018 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6019 %}
6020 ins_pipe( pipe_slow );
6021 %}
6022
6023 instruct vmulI_mem(vec dst, vec src, memory mem) %{
6024 predicate((UseAVX > 0) &&
6025 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6026 match(Set dst (MulVI src (LoadVector mem)));
6027 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
6028 ins_encode %{
6029 int vlen_enc = vector_length_encoding(this);
6030 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6031 %}
6032 ins_pipe( pipe_slow );
6033 %}
6034
6035 // Longs vector mul
6036 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
6037 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6038 VM_Version::supports_avx512dq()) ||
6039 VM_Version::supports_avx512vldq());
6040 match(Set dst (MulVL src1 src2));
6041 ins_cost(500);
6042 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
6043 ins_encode %{
6044 assert(UseAVX > 2, "required");
6045 int vlen_enc = vector_length_encoding(this);
6046 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6047 %}
6048 ins_pipe( pipe_slow );
6049 %}
6050
6051 instruct evmulL_mem(vec dst, vec src, memory mem) %{
6052 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
6053 VM_Version::supports_avx512dq()) ||
6054 (Matcher::vector_length_in_bytes(n) > 8 &&
6055 VM_Version::supports_avx512vldq()));
6056 match(Set dst (MulVL src (LoadVector mem)));
6057 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
6058 ins_cost(500);
6059 ins_encode %{
6060 assert(UseAVX > 2, "required");
6061 int vlen_enc = vector_length_encoding(this);
6062 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6063 %}
6064 ins_pipe( pipe_slow );
6065 %}
6066
6067 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
6068 predicate(UseAVX == 0);
6069 match(Set dst (MulVL src1 src2));
6070 ins_cost(500);
6071 effect(TEMP dst, TEMP xtmp);
6072 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
6073 ins_encode %{
6074 assert(VM_Version::supports_sse4_1(), "required");
6075 // Get the lo-hi products, only the lower 32 bits is in concerns
6076 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
6077 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
6078 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
6079 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
6080 __ psllq($dst$$XMMRegister, 32);
6081 // Get the lo-lo products
6082 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
6083 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
6084 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
6085 %}
6086 ins_pipe( pipe_slow );
6087 %}
6088
6089 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
6090 predicate(UseAVX > 0 &&
6091 ((Matcher::vector_length_in_bytes(n) == 64 &&
6092 !VM_Version::supports_avx512dq()) ||
6093 (Matcher::vector_length_in_bytes(n) < 64 &&
6094 !VM_Version::supports_avx512vldq())));
6095 match(Set dst (MulVL src1 src2));
6096 effect(TEMP xtmp1, TEMP xtmp2);
6097 ins_cost(500);
6098 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
6099 ins_encode %{
6100 int vlen_enc = vector_length_encoding(this);
6101 // Get the lo-hi products, only the lower 32 bits is in concerns
6102 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
6103 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6104 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
6105 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
6106 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
6107 // Get the lo-lo products
6108 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6109 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
6110 %}
6111 ins_pipe( pipe_slow );
6112 %}
6113
6114 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
6115 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
6116 match(Set dst (MulVL src1 src2));
6117 ins_cost(100);
6118 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
6119 ins_encode %{
6120 int vlen_enc = vector_length_encoding(this);
6121 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6122 %}
6123 ins_pipe( pipe_slow );
6124 %}
6125
6126 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
6127 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
6128 match(Set dst (MulVL src1 src2));
6129 ins_cost(100);
6130 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
6131 ins_encode %{
6132 int vlen_enc = vector_length_encoding(this);
6133 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6134 %}
6135 ins_pipe( pipe_slow );
6136 %}
6137
6138 // Floats vector mul
6139 instruct vmulF(vec dst, vec src) %{
6140 predicate(UseAVX == 0);
6141 match(Set dst (MulVF dst src));
6142 format %{ "mulps $dst,$src\t! mul packedF" %}
6143 ins_encode %{
6144 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6145 %}
6146 ins_pipe( pipe_slow );
6147 %}
6148
6149 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
6150 predicate(UseAVX > 0);
6151 match(Set dst (MulVF src1 src2));
6152 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
6153 ins_encode %{
6154 int vlen_enc = vector_length_encoding(this);
6155 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6156 %}
6157 ins_pipe( pipe_slow );
6158 %}
6159
6160 instruct vmulF_mem(vec dst, vec src, memory mem) %{
6161 predicate((UseAVX > 0) &&
6162 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6163 match(Set dst (MulVF src (LoadVector mem)));
6164 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
6165 ins_encode %{
6166 int vlen_enc = vector_length_encoding(this);
6167 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6168 %}
6169 ins_pipe( pipe_slow );
6170 %}
6171
6172 // Doubles vector mul
6173 instruct vmulD(vec dst, vec src) %{
6174 predicate(UseAVX == 0);
6175 match(Set dst (MulVD dst src));
6176 format %{ "mulpd $dst,$src\t! mul packedD" %}
6177 ins_encode %{
6178 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6179 %}
6180 ins_pipe( pipe_slow );
6181 %}
6182
6183 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
6184 predicate(UseAVX > 0);
6185 match(Set dst (MulVD src1 src2));
6186 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
6187 ins_encode %{
6188 int vlen_enc = vector_length_encoding(this);
6189 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6190 %}
6191 ins_pipe( pipe_slow );
6192 %}
6193
6194 instruct vmulD_mem(vec dst, vec src, memory mem) %{
6195 predicate((UseAVX > 0) &&
6196 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6197 match(Set dst (MulVD src (LoadVector mem)));
6198 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
6199 ins_encode %{
6200 int vlen_enc = vector_length_encoding(this);
6201 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6202 %}
6203 ins_pipe( pipe_slow );
6204 %}
6205
6206 // --------------------------------- DIV --------------------------------------
6207
6208 // Floats vector div
6209 instruct vdivF(vec dst, vec src) %{
6210 predicate(UseAVX == 0);
6211 match(Set dst (DivVF dst src));
6212 format %{ "divps $dst,$src\t! div packedF" %}
6213 ins_encode %{
6214 __ divps($dst$$XMMRegister, $src$$XMMRegister);
6215 %}
6216 ins_pipe( pipe_slow );
6217 %}
6218
6219 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
6220 predicate(UseAVX > 0);
6221 match(Set dst (DivVF src1 src2));
6222 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
6223 ins_encode %{
6224 int vlen_enc = vector_length_encoding(this);
6225 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6226 %}
6227 ins_pipe( pipe_slow );
6228 %}
6229
6230 instruct vdivF_mem(vec dst, vec src, memory mem) %{
6231 predicate((UseAVX > 0) &&
6232 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6233 match(Set dst (DivVF src (LoadVector mem)));
6234 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
6235 ins_encode %{
6236 int vlen_enc = vector_length_encoding(this);
6237 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6238 %}
6239 ins_pipe( pipe_slow );
6240 %}
6241
6242 // Doubles vector div
6243 instruct vdivD(vec dst, vec src) %{
6244 predicate(UseAVX == 0);
6245 match(Set dst (DivVD dst src));
6246 format %{ "divpd $dst,$src\t! div packedD" %}
6247 ins_encode %{
6248 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
6249 %}
6250 ins_pipe( pipe_slow );
6251 %}
6252
6253 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
6254 predicate(UseAVX > 0);
6255 match(Set dst (DivVD src1 src2));
6256 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
6257 ins_encode %{
6258 int vlen_enc = vector_length_encoding(this);
6259 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6260 %}
6261 ins_pipe( pipe_slow );
6262 %}
6263
6264 instruct vdivD_mem(vec dst, vec src, memory mem) %{
6265 predicate((UseAVX > 0) &&
6266 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
6267 match(Set dst (DivVD src (LoadVector mem)));
6268 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
6269 ins_encode %{
6270 int vlen_enc = vector_length_encoding(this);
6271 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
6272 %}
6273 ins_pipe( pipe_slow );
6274 %}
6275
6276 // ------------------------------ MinMax ---------------------------------------
6277
6278 // Byte, Short, Int vector Min/Max
6279 instruct minmax_reg_sse(vec dst, vec src) %{
6280 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6281 UseAVX == 0);
6282 match(Set dst (MinV dst src));
6283 match(Set dst (MaxV dst src));
6284 format %{ "vector_minmax $dst,$src\t! " %}
6285 ins_encode %{
6286 assert(UseSSE >= 4, "required");
6287
6288 int opcode = this->ideal_Opcode();
6289 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6290 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
6291 %}
6292 ins_pipe( pipe_slow );
6293 %}
6294
6295 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
6296 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
6297 UseAVX > 0);
6298 match(Set dst (MinV src1 src2));
6299 match(Set dst (MaxV src1 src2));
6300 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
6301 ins_encode %{
6302 int opcode = this->ideal_Opcode();
6303 int vlen_enc = vector_length_encoding(this);
6304 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6305
6306 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6307 %}
6308 ins_pipe( pipe_slow );
6309 %}
6310
6311 // Long vector Min/Max
6312 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
6313 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
6314 UseAVX == 0);
6315 match(Set dst (MinV dst src));
6316 match(Set dst (MaxV src dst));
6317 effect(TEMP dst, TEMP tmp);
6318 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
6319 ins_encode %{
6320 assert(UseSSE >= 4, "required");
6321
6322 int opcode = this->ideal_Opcode();
6323 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6324 assert(elem_bt == T_LONG, "sanity");
6325
6326 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
6327 %}
6328 ins_pipe( pipe_slow );
6329 %}
6330
6331 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
6332 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
6333 UseAVX > 0 && !VM_Version::supports_avx512vl());
6334 match(Set dst (MinV src1 src2));
6335 match(Set dst (MaxV src1 src2));
6336 effect(TEMP dst);
6337 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
6338 ins_encode %{
6339 int vlen_enc = vector_length_encoding(this);
6340 int opcode = this->ideal_Opcode();
6341 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6342 assert(elem_bt == T_LONG, "sanity");
6343
6344 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6345 %}
6346 ins_pipe( pipe_slow );
6347 %}
6348
6349 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
6350 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
6351 Matcher::vector_element_basic_type(n) == T_LONG);
6352 match(Set dst (MinV src1 src2));
6353 match(Set dst (MaxV src1 src2));
6354 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
6355 ins_encode %{
6356 assert(UseAVX > 2, "required");
6357
6358 int vlen_enc = vector_length_encoding(this);
6359 int opcode = this->ideal_Opcode();
6360 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6361 assert(elem_bt == T_LONG, "sanity");
6362
6363 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
6364 %}
6365 ins_pipe( pipe_slow );
6366 %}
6367
6368 // Float/Double vector Min/Max
6369 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
6370 predicate(VM_Version::supports_avx10_2() &&
6371 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
6372 match(Set dst (MinV a b));
6373 match(Set dst (MaxV a b));
6374 format %{ "vector_minmaxFP $dst, $a, $b" %}
6375 ins_encode %{
6376 int vlen_enc = vector_length_encoding(this);
6377 int opcode = this->ideal_Opcode();
6378 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6379 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
6380 %}
6381 ins_pipe( pipe_slow );
6382 %}
6383
6384 // Float/Double vector Min/Max
6385 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
6386 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
6387 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
6388 UseAVX > 0);
6389 match(Set dst (MinV a b));
6390 match(Set dst (MaxV a b));
6391 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
6392 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
6393 ins_encode %{
6394 assert(UseAVX > 0, "required");
6395
6396 int opcode = this->ideal_Opcode();
6397 int vlen_enc = vector_length_encoding(this);
6398 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6399
6400 __ vminmax_fp(opcode, elem_bt,
6401 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6402 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6403 %}
6404 ins_pipe( pipe_slow );
6405 %}
6406
6407 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
6408 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
6409 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
6410 match(Set dst (MinV a b));
6411 match(Set dst (MaxV a b));
6412 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
6413 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
6414 ins_encode %{
6415 assert(UseAVX > 2, "required");
6416
6417 int opcode = this->ideal_Opcode();
6418 int vlen_enc = vector_length_encoding(this);
6419 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6420
6421 __ evminmax_fp(opcode, elem_bt,
6422 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
6423 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
6424 %}
6425 ins_pipe( pipe_slow );
6426 %}
6427
6428 // ------------------------------ Unsigned vector Min/Max ----------------------
6429
6430 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
6431 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
6432 match(Set dst (UMinV a b));
6433 match(Set dst (UMaxV a b));
6434 format %{ "vector_uminmax $dst,$a,$b\t!" %}
6435 ins_encode %{
6436 int opcode = this->ideal_Opcode();
6437 int vlen_enc = vector_length_encoding(this);
6438 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6439 assert(is_integral_type(elem_bt), "");
6440 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
6441 %}
6442 ins_pipe( pipe_slow );
6443 %}
6444
6445 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
6446 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
6447 match(Set dst (UMinV a (LoadVector b)));
6448 match(Set dst (UMaxV a (LoadVector b)));
6449 format %{ "vector_uminmax $dst,$a,$b\t!" %}
6450 ins_encode %{
6451 int opcode = this->ideal_Opcode();
6452 int vlen_enc = vector_length_encoding(this);
6453 BasicType elem_bt = Matcher::vector_element_basic_type(this);
6454 assert(is_integral_type(elem_bt), "");
6455 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
6456 %}
6457 ins_pipe( pipe_slow );
6458 %}
6459
6460 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
6461 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
6462 match(Set dst (UMinV a b));
6463 match(Set dst (UMaxV a b));
6464 effect(TEMP xtmp1, TEMP xtmp2);
6465 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
6466 ins_encode %{
6467 int opcode = this->ideal_Opcode();
6468 int vlen_enc = vector_length_encoding(this);
6469 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
6470 %}
6471 ins_pipe( pipe_slow );
6472 %}
6473
6474 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
6475 match(Set dst (UMinV (Binary dst src2) mask));
6476 match(Set dst (UMaxV (Binary dst src2) mask));
6477 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
6478 ins_encode %{
6479 int vlen_enc = vector_length_encoding(this);
6480 BasicType bt = Matcher::vector_element_basic_type(this);
6481 int opc = this->ideal_Opcode();
6482 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
6483 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
6484 %}
6485 ins_pipe( pipe_slow );
6486 %}
6487
6488 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
6489 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
6490 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
6491 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
6492 ins_encode %{
6493 int vlen_enc = vector_length_encoding(this);
6494 BasicType bt = Matcher::vector_element_basic_type(this);
6495 int opc = this->ideal_Opcode();
6496 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
6497 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
6498 %}
6499 ins_pipe( pipe_slow );
6500 %}
6501
6502 // --------------------------------- Signum/CopySign ---------------------------
6503
6504 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
6505 match(Set dst (SignumF dst (Binary zero one)));
6506 effect(KILL cr);
6507 format %{ "signumF $dst, $dst" %}
6508 ins_encode %{
6509 int opcode = this->ideal_Opcode();
6510 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6511 %}
6512 ins_pipe( pipe_slow );
6513 %}
6514
6515 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
6516 match(Set dst (SignumD dst (Binary zero one)));
6517 effect(KILL cr);
6518 format %{ "signumD $dst, $dst" %}
6519 ins_encode %{
6520 int opcode = this->ideal_Opcode();
6521 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
6522 %}
6523 ins_pipe( pipe_slow );
6524 %}
6525
6526 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
6527 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
6528 match(Set dst (SignumVF src (Binary zero one)));
6529 match(Set dst (SignumVD src (Binary zero one)));
6530 effect(TEMP dst, TEMP xtmp1);
6531 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
6532 ins_encode %{
6533 int opcode = this->ideal_Opcode();
6534 int vec_enc = vector_length_encoding(this);
6535 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6536 $xtmp1$$XMMRegister, vec_enc);
6537 %}
6538 ins_pipe( pipe_slow );
6539 %}
6540
6541 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
6542 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
6543 match(Set dst (SignumVF src (Binary zero one)));
6544 match(Set dst (SignumVD src (Binary zero one)));
6545 effect(TEMP dst, TEMP ktmp1);
6546 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
6547 ins_encode %{
6548 int opcode = this->ideal_Opcode();
6549 int vec_enc = vector_length_encoding(this);
6550 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
6551 $ktmp1$$KRegister, vec_enc);
6552 %}
6553 ins_pipe( pipe_slow );
6554 %}
6555
6556 // ---------------------------------------
6557 // For copySign use 0xE4 as writemask for vpternlog
6558 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
6559 // C (xmm2) is set to 0x7FFFFFFF
6560 // Wherever xmm2 is 0, we want to pick from B (sign)
6561 // Wherever xmm2 is 1, we want to pick from A (src)
6562 //
6563 // A B C Result
6564 // 0 0 0 0
6565 // 0 0 1 0
6566 // 0 1 0 1
6567 // 0 1 1 0
6568 // 1 0 0 0
6569 // 1 0 1 1
6570 // 1 1 0 1
6571 // 1 1 1 1
6572 //
6573 // Result going from high bit to low bit is 0x11100100 = 0xe4
6574 // ---------------------------------------
6575
6576 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
6577 match(Set dst (CopySignF dst src));
6578 effect(TEMP tmp1, TEMP tmp2);
6579 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6580 ins_encode %{
6581 __ movl($tmp2$$Register, 0x7FFFFFFF);
6582 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
6583 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6584 %}
6585 ins_pipe( pipe_slow );
6586 %}
6587
6588 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
6589 match(Set dst (CopySignD dst (Binary src zero)));
6590 ins_cost(100);
6591 effect(TEMP tmp1, TEMP tmp2);
6592 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6593 ins_encode %{
6594 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
6595 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
6596 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6597 %}
6598 ins_pipe( pipe_slow );
6599 %}
6600
6601 //----------------------------- CompressBits/ExpandBits ------------------------
6602
6603 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6604 predicate(n->bottom_type()->isa_int());
6605 match(Set dst (CompressBits src mask));
6606 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6607 ins_encode %{
6608 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
6609 %}
6610 ins_pipe( pipe_slow );
6611 %}
6612
6613 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
6614 predicate(n->bottom_type()->isa_int());
6615 match(Set dst (ExpandBits src mask));
6616 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6617 ins_encode %{
6618 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
6619 %}
6620 ins_pipe( pipe_slow );
6621 %}
6622
6623 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6624 predicate(n->bottom_type()->isa_int());
6625 match(Set dst (CompressBits src (LoadI mask)));
6626 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
6627 ins_encode %{
6628 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
6629 %}
6630 ins_pipe( pipe_slow );
6631 %}
6632
6633 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
6634 predicate(n->bottom_type()->isa_int());
6635 match(Set dst (ExpandBits src (LoadI mask)));
6636 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
6637 ins_encode %{
6638 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
6639 %}
6640 ins_pipe( pipe_slow );
6641 %}
6642
6643 // --------------------------------- Sqrt --------------------------------------
6644
6645 instruct vsqrtF_reg(vec dst, vec src) %{
6646 match(Set dst (SqrtVF src));
6647 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
6648 ins_encode %{
6649 assert(UseAVX > 0, "required");
6650 int vlen_enc = vector_length_encoding(this);
6651 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6652 %}
6653 ins_pipe( pipe_slow );
6654 %}
6655
6656 instruct vsqrtF_mem(vec dst, memory mem) %{
6657 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6658 match(Set dst (SqrtVF (LoadVector mem)));
6659 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
6660 ins_encode %{
6661 assert(UseAVX > 0, "required");
6662 int vlen_enc = vector_length_encoding(this);
6663 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
6664 %}
6665 ins_pipe( pipe_slow );
6666 %}
6667
6668 // Floating point vector sqrt
6669 instruct vsqrtD_reg(vec dst, vec src) %{
6670 match(Set dst (SqrtVD src));
6671 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
6672 ins_encode %{
6673 assert(UseAVX > 0, "required");
6674 int vlen_enc = vector_length_encoding(this);
6675 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6676 %}
6677 ins_pipe( pipe_slow );
6678 %}
6679
6680 instruct vsqrtD_mem(vec dst, memory mem) %{
6681 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6682 match(Set dst (SqrtVD (LoadVector mem)));
6683 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
6684 ins_encode %{
6685 assert(UseAVX > 0, "required");
6686 int vlen_enc = vector_length_encoding(this);
6687 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
6688 %}
6689 ins_pipe( pipe_slow );
6690 %}
6691
6692 // ------------------------------ Shift ---------------------------------------
6693
6694 // Left and right shift count vectors are the same on x86
6695 // (only lowest bits of xmm reg are used for count).
6696 instruct vshiftcnt(vec dst, rRegI cnt) %{
6697 match(Set dst (LShiftCntV cnt));
6698 match(Set dst (RShiftCntV cnt));
6699 format %{ "movdl $dst,$cnt\t! load shift count" %}
6700 ins_encode %{
6701 __ movdl($dst$$XMMRegister, $cnt$$Register);
6702 %}
6703 ins_pipe( pipe_slow );
6704 %}
6705
6706 // Byte vector shift
6707 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
6708 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
6709 match(Set dst ( LShiftVB src shift));
6710 match(Set dst ( RShiftVB src shift));
6711 match(Set dst (URShiftVB src shift));
6712 effect(TEMP dst, USE src, USE shift, TEMP tmp);
6713 format %{"vector_byte_shift $dst,$src,$shift" %}
6714 ins_encode %{
6715 assert(UseSSE > 3, "required");
6716 int opcode = this->ideal_Opcode();
6717 bool sign = (opcode != Op_URShiftVB);
6718 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
6719 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
6720 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6721 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
6722 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
6723 %}
6724 ins_pipe( pipe_slow );
6725 %}
6726
6727 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6728 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6729 UseAVX <= 1);
6730 match(Set dst ( LShiftVB src shift));
6731 match(Set dst ( RShiftVB src shift));
6732 match(Set dst (URShiftVB src shift));
6733 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
6734 format %{"vector_byte_shift $dst,$src,$shift" %}
6735 ins_encode %{
6736 assert(UseSSE > 3, "required");
6737 int opcode = this->ideal_Opcode();
6738 bool sign = (opcode != Op_URShiftVB);
6739 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
6740 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
6741 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
6742 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
6743 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
6744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6745 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
6746 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
6747 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
6748 %}
6749 ins_pipe( pipe_slow );
6750 %}
6751
6752 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
6753 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
6754 UseAVX > 1);
6755 match(Set dst ( LShiftVB src shift));
6756 match(Set dst ( RShiftVB src shift));
6757 match(Set dst (URShiftVB src shift));
6758 effect(TEMP dst, TEMP tmp);
6759 format %{"vector_byte_shift $dst,$src,$shift" %}
6760 ins_encode %{
6761 int opcode = this->ideal_Opcode();
6762 bool sign = (opcode != Op_URShiftVB);
6763 int vlen_enc = Assembler::AVX_256bit;
6764 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
6765 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6766 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6767 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
6768 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
6769 %}
6770 ins_pipe( pipe_slow );
6771 %}
6772
6773 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
6774 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
6775 match(Set dst ( LShiftVB src shift));
6776 match(Set dst ( RShiftVB src shift));
6777 match(Set dst (URShiftVB src shift));
6778 effect(TEMP dst, TEMP tmp);
6779 format %{"vector_byte_shift $dst,$src,$shift" %}
6780 ins_encode %{
6781 assert(UseAVX > 1, "required");
6782 int opcode = this->ideal_Opcode();
6783 bool sign = (opcode != Op_URShiftVB);
6784 int vlen_enc = Assembler::AVX_256bit;
6785 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
6786 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6787 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6788 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6789 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6790 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6791 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
6792 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6793 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
6794 %}
6795 ins_pipe( pipe_slow );
6796 %}
6797
6798 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
6799 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
6800 match(Set dst ( LShiftVB src shift));
6801 match(Set dst (RShiftVB src shift));
6802 match(Set dst (URShiftVB src shift));
6803 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
6804 format %{"vector_byte_shift $dst,$src,$shift" %}
6805 ins_encode %{
6806 assert(UseAVX > 2, "required");
6807 int opcode = this->ideal_Opcode();
6808 bool sign = (opcode != Op_URShiftVB);
6809 int vlen_enc = Assembler::AVX_512bit;
6810 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
6811 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
6812 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
6813 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6814 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6815 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
6816 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6817 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6818 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6819 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
6820 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
6821 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
6822 %}
6823 ins_pipe( pipe_slow );
6824 %}
6825
6826 // Shorts vector logical right shift produces incorrect Java result
6827 // for negative data because java code convert short value into int with
6828 // sign extension before a shift. But char vectors are fine since chars are
6829 // unsigned values.
6830 // Shorts/Chars vector left shift
6831 instruct vshiftS(vec dst, vec src, vec shift) %{
6832 predicate(!n->as_ShiftV()->is_var_shift());
6833 match(Set dst ( LShiftVS src shift));
6834 match(Set dst ( RShiftVS src shift));
6835 match(Set dst (URShiftVS src shift));
6836 effect(TEMP dst, USE src, USE shift);
6837 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
6838 ins_encode %{
6839 int opcode = this->ideal_Opcode();
6840 if (UseAVX > 0) {
6841 int vlen_enc = vector_length_encoding(this);
6842 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6843 } else {
6844 int vlen = Matcher::vector_length(this);
6845 if (vlen == 2) {
6846 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6847 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6848 } else if (vlen == 4) {
6849 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6850 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6851 } else {
6852 assert (vlen == 8, "sanity");
6853 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6854 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6855 }
6856 }
6857 %}
6858 ins_pipe( pipe_slow );
6859 %}
6860
6861 // Integers vector left shift
6862 instruct vshiftI(vec dst, vec src, vec shift) %{
6863 predicate(!n->as_ShiftV()->is_var_shift());
6864 match(Set dst ( LShiftVI src shift));
6865 match(Set dst ( RShiftVI src shift));
6866 match(Set dst (URShiftVI src shift));
6867 effect(TEMP dst, USE src, USE shift);
6868 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
6869 ins_encode %{
6870 int opcode = this->ideal_Opcode();
6871 if (UseAVX > 0) {
6872 int vlen_enc = vector_length_encoding(this);
6873 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6874 } else {
6875 int vlen = Matcher::vector_length(this);
6876 if (vlen == 2) {
6877 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6878 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6879 } else {
6880 assert(vlen == 4, "sanity");
6881 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6882 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6883 }
6884 }
6885 %}
6886 ins_pipe( pipe_slow );
6887 %}
6888
6889 // Integers vector left constant shift
6890 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
6891 match(Set dst (LShiftVI src (LShiftCntV shift)));
6892 match(Set dst (RShiftVI src (RShiftCntV shift)));
6893 match(Set dst (URShiftVI src (RShiftCntV shift)));
6894 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
6895 ins_encode %{
6896 int opcode = this->ideal_Opcode();
6897 if (UseAVX > 0) {
6898 int vector_len = vector_length_encoding(this);
6899 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6900 } else {
6901 int vlen = Matcher::vector_length(this);
6902 if (vlen == 2) {
6903 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6904 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6905 } else {
6906 assert(vlen == 4, "sanity");
6907 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6908 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6909 }
6910 }
6911 %}
6912 ins_pipe( pipe_slow );
6913 %}
6914
6915 // Longs vector shift
6916 instruct vshiftL(vec dst, vec src, vec shift) %{
6917 predicate(!n->as_ShiftV()->is_var_shift());
6918 match(Set dst ( LShiftVL src shift));
6919 match(Set dst (URShiftVL src shift));
6920 effect(TEMP dst, USE src, USE shift);
6921 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
6922 ins_encode %{
6923 int opcode = this->ideal_Opcode();
6924 if (UseAVX > 0) {
6925 int vlen_enc = vector_length_encoding(this);
6926 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6927 } else {
6928 assert(Matcher::vector_length(this) == 2, "");
6929 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6930 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
6931 }
6932 %}
6933 ins_pipe( pipe_slow );
6934 %}
6935
6936 // Longs vector constant shift
6937 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
6938 match(Set dst (LShiftVL src (LShiftCntV shift)));
6939 match(Set dst (URShiftVL src (RShiftCntV shift)));
6940 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
6941 ins_encode %{
6942 int opcode = this->ideal_Opcode();
6943 if (UseAVX > 0) {
6944 int vector_len = vector_length_encoding(this);
6945 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
6946 } else {
6947 assert(Matcher::vector_length(this) == 2, "");
6948 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6949 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
6950 }
6951 %}
6952 ins_pipe( pipe_slow );
6953 %}
6954
6955 // -------------------ArithmeticRightShift -----------------------------------
6956 // Long vector arithmetic right shift
6957 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
6958 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
6959 match(Set dst (RShiftVL src shift));
6960 effect(TEMP dst, TEMP tmp);
6961 format %{ "vshiftq $dst,$src,$shift" %}
6962 ins_encode %{
6963 uint vlen = Matcher::vector_length(this);
6964 if (vlen == 2) {
6965 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
6966 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
6967 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
6968 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
6969 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
6970 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
6971 } else {
6972 assert(vlen == 4, "sanity");
6973 assert(UseAVX > 1, "required");
6974 int vlen_enc = Assembler::AVX_256bit;
6975 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6976 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
6977 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6978 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6979 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
6980 }
6981 %}
6982 ins_pipe( pipe_slow );
6983 %}
6984
6985 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
6986 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
6987 match(Set dst (RShiftVL src shift));
6988 format %{ "vshiftq $dst,$src,$shift" %}
6989 ins_encode %{
6990 int vlen_enc = vector_length_encoding(this);
6991 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
6992 %}
6993 ins_pipe( pipe_slow );
6994 %}
6995
6996 // ------------------- Variable Shift -----------------------------
6997 // Byte variable shift
6998 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
6999 predicate(Matcher::vector_length(n) <= 8 &&
7000 n->as_ShiftV()->is_var_shift() &&
7001 !VM_Version::supports_avx512bw());
7002 match(Set dst ( LShiftVB src shift));
7003 match(Set dst ( RShiftVB src shift));
7004 match(Set dst (URShiftVB src shift));
7005 effect(TEMP dst, TEMP vtmp);
7006 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
7007 ins_encode %{
7008 assert(UseAVX >= 2, "required");
7009
7010 int opcode = this->ideal_Opcode();
7011 int vlen_enc = Assembler::AVX_128bit;
7012 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
7013 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
7014 %}
7015 ins_pipe( pipe_slow );
7016 %}
7017
7018 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7019 predicate(Matcher::vector_length(n) == 16 &&
7020 n->as_ShiftV()->is_var_shift() &&
7021 !VM_Version::supports_avx512bw());
7022 match(Set dst ( LShiftVB src shift));
7023 match(Set dst ( RShiftVB src shift));
7024 match(Set dst (URShiftVB src shift));
7025 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7026 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
7027 ins_encode %{
7028 assert(UseAVX >= 2, "required");
7029
7030 int opcode = this->ideal_Opcode();
7031 int vlen_enc = Assembler::AVX_128bit;
7032 // Shift lower half and get word result in dst
7033 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7034
7035 // Shift upper half and get word result in vtmp1
7036 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
7037 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
7038 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7039
7040 // Merge and down convert the two word results to byte in dst
7041 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
7042 %}
7043 ins_pipe( pipe_slow );
7044 %}
7045
7046 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
7047 predicate(Matcher::vector_length(n) == 32 &&
7048 n->as_ShiftV()->is_var_shift() &&
7049 !VM_Version::supports_avx512bw());
7050 match(Set dst ( LShiftVB src shift));
7051 match(Set dst ( RShiftVB src shift));
7052 match(Set dst (URShiftVB src shift));
7053 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
7054 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
7055 ins_encode %{
7056 assert(UseAVX >= 2, "required");
7057
7058 int opcode = this->ideal_Opcode();
7059 int vlen_enc = Assembler::AVX_128bit;
7060 // Process lower 128 bits and get result in dst
7061 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7062 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
7063 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
7064 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7065 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
7066
7067 // Process higher 128 bits and get result in vtmp3
7068 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
7069 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
7070 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
7071 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
7072 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
7073 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7074 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
7075
7076 // Merge the two results in dst
7077 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
7078 %}
7079 ins_pipe( pipe_slow );
7080 %}
7081
7082 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
7083 predicate(Matcher::vector_length(n) <= 32 &&
7084 n->as_ShiftV()->is_var_shift() &&
7085 VM_Version::supports_avx512bw());
7086 match(Set dst ( LShiftVB src shift));
7087 match(Set dst ( RShiftVB src shift));
7088 match(Set dst (URShiftVB src shift));
7089 effect(TEMP dst, TEMP vtmp);
7090 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
7091 ins_encode %{
7092 assert(UseAVX > 2, "required");
7093
7094 int opcode = this->ideal_Opcode();
7095 int vlen_enc = vector_length_encoding(this);
7096 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
7097 %}
7098 ins_pipe( pipe_slow );
7099 %}
7100
7101 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7102 predicate(Matcher::vector_length(n) == 64 &&
7103 n->as_ShiftV()->is_var_shift() &&
7104 VM_Version::supports_avx512bw());
7105 match(Set dst ( LShiftVB src shift));
7106 match(Set dst ( RShiftVB src shift));
7107 match(Set dst (URShiftVB src shift));
7108 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7109 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
7110 ins_encode %{
7111 assert(UseAVX > 2, "required");
7112
7113 int opcode = this->ideal_Opcode();
7114 int vlen_enc = Assembler::AVX_256bit;
7115 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
7116 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
7117 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
7118 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
7119 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
7120 %}
7121 ins_pipe( pipe_slow );
7122 %}
7123
7124 // Short variable shift
7125 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
7126 predicate(Matcher::vector_length(n) <= 8 &&
7127 n->as_ShiftV()->is_var_shift() &&
7128 !VM_Version::supports_avx512bw());
7129 match(Set dst ( LShiftVS src shift));
7130 match(Set dst ( RShiftVS src shift));
7131 match(Set dst (URShiftVS src shift));
7132 effect(TEMP dst, TEMP vtmp);
7133 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
7134 ins_encode %{
7135 assert(UseAVX >= 2, "required");
7136
7137 int opcode = this->ideal_Opcode();
7138 bool sign = (opcode != Op_URShiftVS);
7139 int vlen_enc = Assembler::AVX_256bit;
7140 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
7141 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
7142 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7143 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7144 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
7145 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7146 %}
7147 ins_pipe( pipe_slow );
7148 %}
7149
7150 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
7151 predicate(Matcher::vector_length(n) == 16 &&
7152 n->as_ShiftV()->is_var_shift() &&
7153 !VM_Version::supports_avx512bw());
7154 match(Set dst ( LShiftVS src shift));
7155 match(Set dst ( RShiftVS src shift));
7156 match(Set dst (URShiftVS src shift));
7157 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
7158 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
7159 ins_encode %{
7160 assert(UseAVX >= 2, "required");
7161
7162 int opcode = this->ideal_Opcode();
7163 bool sign = (opcode != Op_URShiftVS);
7164 int vlen_enc = Assembler::AVX_256bit;
7165 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
7166 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
7167 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7168 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7169 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7170
7171 // Shift upper half, with result in dst using vtmp1 as TEMP
7172 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
7173 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
7174 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7175 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7176 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
7177 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7178
7179 // Merge lower and upper half result into dst
7180 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7181 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
7182 %}
7183 ins_pipe( pipe_slow );
7184 %}
7185
7186 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
7187 predicate(n->as_ShiftV()->is_var_shift() &&
7188 VM_Version::supports_avx512bw());
7189 match(Set dst ( LShiftVS src shift));
7190 match(Set dst ( RShiftVS src shift));
7191 match(Set dst (URShiftVS src shift));
7192 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
7193 ins_encode %{
7194 assert(UseAVX > 2, "required");
7195
7196 int opcode = this->ideal_Opcode();
7197 int vlen_enc = vector_length_encoding(this);
7198 if (!VM_Version::supports_avx512vl()) {
7199 vlen_enc = Assembler::AVX_512bit;
7200 }
7201 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7202 %}
7203 ins_pipe( pipe_slow );
7204 %}
7205
7206 //Integer variable shift
7207 instruct vshiftI_var(vec dst, vec src, vec shift) %{
7208 predicate(n->as_ShiftV()->is_var_shift());
7209 match(Set dst ( LShiftVI src shift));
7210 match(Set dst ( RShiftVI src shift));
7211 match(Set dst (URShiftVI src shift));
7212 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
7213 ins_encode %{
7214 assert(UseAVX >= 2, "required");
7215
7216 int opcode = this->ideal_Opcode();
7217 int vlen_enc = vector_length_encoding(this);
7218 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7219 %}
7220 ins_pipe( pipe_slow );
7221 %}
7222
7223 //Long variable shift
7224 instruct vshiftL_var(vec dst, vec src, vec shift) %{
7225 predicate(n->as_ShiftV()->is_var_shift());
7226 match(Set dst ( LShiftVL src shift));
7227 match(Set dst (URShiftVL src shift));
7228 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
7229 ins_encode %{
7230 assert(UseAVX >= 2, "required");
7231
7232 int opcode = this->ideal_Opcode();
7233 int vlen_enc = vector_length_encoding(this);
7234 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7235 %}
7236 ins_pipe( pipe_slow );
7237 %}
7238
7239 //Long variable right shift arithmetic
7240 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
7241 predicate(Matcher::vector_length(n) <= 4 &&
7242 n->as_ShiftV()->is_var_shift() &&
7243 UseAVX == 2);
7244 match(Set dst (RShiftVL src shift));
7245 effect(TEMP dst, TEMP vtmp);
7246 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
7247 ins_encode %{
7248 int opcode = this->ideal_Opcode();
7249 int vlen_enc = vector_length_encoding(this);
7250 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
7251 $vtmp$$XMMRegister);
7252 %}
7253 ins_pipe( pipe_slow );
7254 %}
7255
7256 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
7257 predicate(n->as_ShiftV()->is_var_shift() &&
7258 UseAVX > 2);
7259 match(Set dst (RShiftVL src shift));
7260 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
7261 ins_encode %{
7262 int opcode = this->ideal_Opcode();
7263 int vlen_enc = vector_length_encoding(this);
7264 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
7265 %}
7266 ins_pipe( pipe_slow );
7267 %}
7268
7269 // --------------------------------- AND --------------------------------------
7270
7271 instruct vand(vec dst, vec src) %{
7272 predicate(UseAVX == 0);
7273 match(Set dst (AndV dst src));
7274 format %{ "pand $dst,$src\t! and vectors" %}
7275 ins_encode %{
7276 __ pand($dst$$XMMRegister, $src$$XMMRegister);
7277 %}
7278 ins_pipe( pipe_slow );
7279 %}
7280
7281 instruct vand_reg(vec dst, vec src1, vec src2) %{
7282 predicate(UseAVX > 0);
7283 match(Set dst (AndV src1 src2));
7284 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
7285 ins_encode %{
7286 int vlen_enc = vector_length_encoding(this);
7287 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7288 %}
7289 ins_pipe( pipe_slow );
7290 %}
7291
7292 instruct vand_mem(vec dst, vec src, memory mem) %{
7293 predicate((UseAVX > 0) &&
7294 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7295 match(Set dst (AndV src (LoadVector mem)));
7296 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
7297 ins_encode %{
7298 int vlen_enc = vector_length_encoding(this);
7299 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7300 %}
7301 ins_pipe( pipe_slow );
7302 %}
7303
7304 // --------------------------------- OR ---------------------------------------
7305
7306 instruct vor(vec dst, vec src) %{
7307 predicate(UseAVX == 0);
7308 match(Set dst (OrV dst src));
7309 format %{ "por $dst,$src\t! or vectors" %}
7310 ins_encode %{
7311 __ por($dst$$XMMRegister, $src$$XMMRegister);
7312 %}
7313 ins_pipe( pipe_slow );
7314 %}
7315
7316 instruct vor_reg(vec dst, vec src1, vec src2) %{
7317 predicate(UseAVX > 0);
7318 match(Set dst (OrV src1 src2));
7319 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
7320 ins_encode %{
7321 int vlen_enc = vector_length_encoding(this);
7322 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7323 %}
7324 ins_pipe( pipe_slow );
7325 %}
7326
7327 instruct vor_mem(vec dst, vec src, memory mem) %{
7328 predicate((UseAVX > 0) &&
7329 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7330 match(Set dst (OrV src (LoadVector mem)));
7331 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
7332 ins_encode %{
7333 int vlen_enc = vector_length_encoding(this);
7334 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7335 %}
7336 ins_pipe( pipe_slow );
7337 %}
7338
7339 // --------------------------------- XOR --------------------------------------
7340
7341 instruct vxor(vec dst, vec src) %{
7342 predicate(UseAVX == 0);
7343 match(Set dst (XorV dst src));
7344 format %{ "pxor $dst,$src\t! xor vectors" %}
7345 ins_encode %{
7346 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
7347 %}
7348 ins_pipe( pipe_slow );
7349 %}
7350
7351 instruct vxor_reg(vec dst, vec src1, vec src2) %{
7352 predicate(UseAVX > 0);
7353 match(Set dst (XorV src1 src2));
7354 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
7355 ins_encode %{
7356 int vlen_enc = vector_length_encoding(this);
7357 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7358 %}
7359 ins_pipe( pipe_slow );
7360 %}
7361
7362 instruct vxor_mem(vec dst, vec src, memory mem) %{
7363 predicate((UseAVX > 0) &&
7364 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
7365 match(Set dst (XorV src (LoadVector mem)));
7366 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
7367 ins_encode %{
7368 int vlen_enc = vector_length_encoding(this);
7369 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
7370 %}
7371 ins_pipe( pipe_slow );
7372 %}
7373
7374 // --------------------------------- VectorCast --------------------------------------
7375
7376 instruct vcastBtoX(vec dst, vec src) %{
7377 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
7378 match(Set dst (VectorCastB2X src));
7379 format %{ "vector_cast_b2x $dst,$src\t!" %}
7380 ins_encode %{
7381 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7382 int vlen_enc = vector_length_encoding(this);
7383 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7384 %}
7385 ins_pipe( pipe_slow );
7386 %}
7387
7388 instruct vcastBtoD(legVec dst, legVec src) %{
7389 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
7390 match(Set dst (VectorCastB2X src));
7391 format %{ "vector_cast_b2x $dst,$src\t!" %}
7392 ins_encode %{
7393 int vlen_enc = vector_length_encoding(this);
7394 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7395 %}
7396 ins_pipe( pipe_slow );
7397 %}
7398
7399 instruct castStoX(vec dst, vec src) %{
7400 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7401 Matcher::vector_length(n->in(1)) <= 8 && // src
7402 Matcher::vector_element_basic_type(n) == T_BYTE);
7403 match(Set dst (VectorCastS2X src));
7404 format %{ "vector_cast_s2x $dst,$src" %}
7405 ins_encode %{
7406 assert(UseAVX > 0, "required");
7407
7408 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
7409 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
7410 %}
7411 ins_pipe( pipe_slow );
7412 %}
7413
7414 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
7415 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
7416 Matcher::vector_length(n->in(1)) == 16 && // src
7417 Matcher::vector_element_basic_type(n) == T_BYTE);
7418 effect(TEMP dst, TEMP vtmp);
7419 match(Set dst (VectorCastS2X src));
7420 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
7421 ins_encode %{
7422 assert(UseAVX > 0, "required");
7423
7424 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
7425 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
7426 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
7427 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 instruct vcastStoX_evex(vec dst, vec src) %{
7433 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
7434 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7435 match(Set dst (VectorCastS2X src));
7436 format %{ "vector_cast_s2x $dst,$src\t!" %}
7437 ins_encode %{
7438 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7439 int src_vlen_enc = vector_length_encoding(this, $src);
7440 int vlen_enc = vector_length_encoding(this);
7441 switch (to_elem_bt) {
7442 case T_BYTE:
7443 if (!VM_Version::supports_avx512vl()) {
7444 vlen_enc = Assembler::AVX_512bit;
7445 }
7446 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7447 break;
7448 case T_INT:
7449 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7450 break;
7451 case T_FLOAT:
7452 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7453 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7454 break;
7455 case T_LONG:
7456 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7457 break;
7458 case T_DOUBLE: {
7459 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
7460 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
7461 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7462 break;
7463 }
7464 default:
7465 ShouldNotReachHere();
7466 }
7467 %}
7468 ins_pipe( pipe_slow );
7469 %}
7470
7471 instruct castItoX(vec dst, vec src) %{
7472 predicate(UseAVX <= 2 &&
7473 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
7474 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7475 match(Set dst (VectorCastI2X src));
7476 format %{ "vector_cast_i2x $dst,$src" %}
7477 ins_encode %{
7478 assert(UseAVX > 0, "required");
7479
7480 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7481 int vlen_enc = vector_length_encoding(this, $src);
7482
7483 if (to_elem_bt == T_BYTE) {
7484 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7485 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7486 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7487 } else {
7488 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7489 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7490 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7491 }
7492 %}
7493 ins_pipe( pipe_slow );
7494 %}
7495
7496 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
7497 predicate(UseAVX <= 2 &&
7498 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
7499 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
7500 match(Set dst (VectorCastI2X src));
7501 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
7502 effect(TEMP dst, TEMP vtmp);
7503 ins_encode %{
7504 assert(UseAVX > 0, "required");
7505
7506 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7507 int vlen_enc = vector_length_encoding(this, $src);
7508
7509 if (to_elem_bt == T_BYTE) {
7510 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
7511 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7512 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7513 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7514 } else {
7515 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
7516 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
7517 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
7518 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7519 }
7520 %}
7521 ins_pipe( pipe_slow );
7522 %}
7523
7524 instruct vcastItoX_evex(vec dst, vec src) %{
7525 predicate(UseAVX > 2 ||
7526 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
7527 match(Set dst (VectorCastI2X src));
7528 format %{ "vector_cast_i2x $dst,$src\t!" %}
7529 ins_encode %{
7530 assert(UseAVX > 0, "required");
7531
7532 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
7533 int src_vlen_enc = vector_length_encoding(this, $src);
7534 int dst_vlen_enc = vector_length_encoding(this);
7535 switch (dst_elem_bt) {
7536 case T_BYTE:
7537 if (!VM_Version::supports_avx512vl()) {
7538 src_vlen_enc = Assembler::AVX_512bit;
7539 }
7540 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7541 break;
7542 case T_SHORT:
7543 if (!VM_Version::supports_avx512vl()) {
7544 src_vlen_enc = Assembler::AVX_512bit;
7545 }
7546 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7547 break;
7548 case T_FLOAT:
7549 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7550 break;
7551 case T_LONG:
7552 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7553 break;
7554 case T_DOUBLE:
7555 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
7556 break;
7557 default:
7558 ShouldNotReachHere();
7559 }
7560 %}
7561 ins_pipe( pipe_slow );
7562 %}
7563
7564 instruct vcastLtoBS(vec dst, vec src) %{
7565 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
7566 UseAVX <= 2);
7567 match(Set dst (VectorCastL2X src));
7568 format %{ "vector_cast_l2x $dst,$src" %}
7569 ins_encode %{
7570 assert(UseAVX > 0, "required");
7571
7572 int vlen = Matcher::vector_length_in_bytes(this, $src);
7573 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7574 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
7575 : ExternalAddress(vector_int_to_short_mask());
7576 if (vlen <= 16) {
7577 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
7578 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7579 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7580 } else {
7581 assert(vlen <= 32, "required");
7582 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
7583 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
7584 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
7585 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7586 }
7587 if (to_elem_bt == T_BYTE) {
7588 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
7589 }
7590 %}
7591 ins_pipe( pipe_slow );
7592 %}
7593
7594 instruct vcastLtoX_evex(vec dst, vec src) %{
7595 predicate(UseAVX > 2 ||
7596 (Matcher::vector_element_basic_type(n) == T_INT ||
7597 Matcher::vector_element_basic_type(n) == T_FLOAT ||
7598 Matcher::vector_element_basic_type(n) == T_DOUBLE));
7599 match(Set dst (VectorCastL2X src));
7600 format %{ "vector_cast_l2x $dst,$src\t!" %}
7601 ins_encode %{
7602 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7603 int vlen = Matcher::vector_length_in_bytes(this, $src);
7604 int vlen_enc = vector_length_encoding(this, $src);
7605 switch (to_elem_bt) {
7606 case T_BYTE:
7607 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7608 vlen_enc = Assembler::AVX_512bit;
7609 }
7610 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7611 break;
7612 case T_SHORT:
7613 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
7614 vlen_enc = Assembler::AVX_512bit;
7615 }
7616 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7617 break;
7618 case T_INT:
7619 if (vlen == 8) {
7620 if ($dst$$XMMRegister != $src$$XMMRegister) {
7621 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7622 }
7623 } else if (vlen == 16) {
7624 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
7625 } else if (vlen == 32) {
7626 if (UseAVX > 2) {
7627 if (!VM_Version::supports_avx512vl()) {
7628 vlen_enc = Assembler::AVX_512bit;
7629 }
7630 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7631 } else {
7632 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
7633 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
7634 }
7635 } else { // vlen == 64
7636 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7637 }
7638 break;
7639 case T_FLOAT:
7640 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7641 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7642 break;
7643 case T_DOUBLE:
7644 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
7645 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7646 break;
7647
7648 default: assert(false, "%s", type2name(to_elem_bt));
7649 }
7650 %}
7651 ins_pipe( pipe_slow );
7652 %}
7653
7654 instruct vcastFtoD_reg(vec dst, vec src) %{
7655 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
7656 match(Set dst (VectorCastF2X src));
7657 format %{ "vector_cast_f2d $dst,$src\t!" %}
7658 ins_encode %{
7659 int vlen_enc = vector_length_encoding(this);
7660 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7661 %}
7662 ins_pipe( pipe_slow );
7663 %}
7664
7665
7666 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7667 predicate(!VM_Version::supports_avx10_2() &&
7668 !VM_Version::supports_avx512vl() &&
7669 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7670 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
7671 is_integral_type(Matcher::vector_element_basic_type(n)));
7672 match(Set dst (VectorCastF2X src));
7673 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7674 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
7675 ins_encode %{
7676 int vlen_enc = vector_length_encoding(this, $src);
7677 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7678 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
7679 // 32 bit addresses for register indirect addressing mode since stub constants
7680 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
7681 // However, targets are free to increase this limit, but having a large code cache size
7682 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
7683 // cap we save a temporary register allocation which in limiting case can prevent
7684 // spilling in high register pressure blocks.
7685 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7686 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
7687 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7688 %}
7689 ins_pipe( pipe_slow );
7690 %}
7691
7692 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7693 predicate(!VM_Version::supports_avx10_2() &&
7694 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7695 is_integral_type(Matcher::vector_element_basic_type(n)));
7696 match(Set dst (VectorCastF2X src));
7697 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7698 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7699 ins_encode %{
7700 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7701 if (to_elem_bt == T_LONG) {
7702 int vlen_enc = vector_length_encoding(this);
7703 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7704 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7705 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
7706 } else {
7707 int vlen_enc = vector_length_encoding(this, $src);
7708 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7709 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
7710 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7711 }
7712 %}
7713 ins_pipe( pipe_slow );
7714 %}
7715
7716 instruct castFtoX_reg_avx10(vec dst, vec src) %{
7717 predicate(VM_Version::supports_avx10_2() &&
7718 is_integral_type(Matcher::vector_element_basic_type(n)));
7719 match(Set dst (VectorCastF2X src));
7720 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
7721 ins_encode %{
7722 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7723 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
7724 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7725 %}
7726 ins_pipe( pipe_slow );
7727 %}
7728
7729 instruct castFtoX_mem_avx10(vec dst, memory src) %{
7730 predicate(VM_Version::supports_avx10_2() &&
7731 is_integral_type(Matcher::vector_element_basic_type(n)));
7732 match(Set dst (VectorCastF2X (LoadVector src)));
7733 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
7734 ins_encode %{
7735 int vlen = Matcher::vector_length(this);
7736 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7737 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
7738 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
7739 %}
7740 ins_pipe( pipe_slow );
7741 %}
7742
7743 instruct vcastDtoF_reg(vec dst, vec src) %{
7744 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
7745 match(Set dst (VectorCastD2X src));
7746 format %{ "vector_cast_d2x $dst,$src\t!" %}
7747 ins_encode %{
7748 int vlen_enc = vector_length_encoding(this, $src);
7749 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7750 %}
7751 ins_pipe( pipe_slow );
7752 %}
7753
7754 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
7755 predicate(!VM_Version::supports_avx10_2() &&
7756 !VM_Version::supports_avx512vl() &&
7757 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
7758 is_integral_type(Matcher::vector_element_basic_type(n)));
7759 match(Set dst (VectorCastD2X src));
7760 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
7761 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
7762 ins_encode %{
7763 int vlen_enc = vector_length_encoding(this, $src);
7764 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7765 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7766 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
7767 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
7768 %}
7769 ins_pipe( pipe_slow );
7770 %}
7771
7772 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7773 predicate(!VM_Version::supports_avx10_2() &&
7774 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
7775 is_integral_type(Matcher::vector_element_basic_type(n)));
7776 match(Set dst (VectorCastD2X src));
7777 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7778 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
7779 ins_encode %{
7780 int vlen_enc = vector_length_encoding(this, $src);
7781 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7782 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
7783 ExternalAddress(vector_float_signflip());
7784 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
7785 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
7786 %}
7787 ins_pipe( pipe_slow );
7788 %}
7789
7790 instruct castDtoX_reg_avx10(vec dst, vec src) %{
7791 predicate(VM_Version::supports_avx10_2() &&
7792 is_integral_type(Matcher::vector_element_basic_type(n)));
7793 match(Set dst (VectorCastD2X src));
7794 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
7795 ins_encode %{
7796 int vlen_enc = vector_length_encoding(this, $src);
7797 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7798 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7799 %}
7800 ins_pipe( pipe_slow );
7801 %}
7802
7803 instruct castDtoX_mem_avx10(vec dst, memory src) %{
7804 predicate(VM_Version::supports_avx10_2() &&
7805 is_integral_type(Matcher::vector_element_basic_type(n)));
7806 match(Set dst (VectorCastD2X (LoadVector src)));
7807 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
7808 ins_encode %{
7809 int vlen = Matcher::vector_length(this);
7810 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
7811 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7812 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
7813 %}
7814 ins_pipe( pipe_slow );
7815 %}
7816
7817 instruct vucast(vec dst, vec src) %{
7818 match(Set dst (VectorUCastB2X src));
7819 match(Set dst (VectorUCastS2X src));
7820 match(Set dst (VectorUCastI2X src));
7821 format %{ "vector_ucast $dst,$src\t!" %}
7822 ins_encode %{
7823 assert(UseAVX > 0, "required");
7824
7825 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
7826 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
7827 int vlen_enc = vector_length_encoding(this);
7828 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
7829 %}
7830 ins_pipe( pipe_slow );
7831 %}
7832
7833 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
7834 predicate(!VM_Version::supports_avx512vl() &&
7835 Matcher::vector_length_in_bytes(n) < 64 &&
7836 Matcher::vector_element_basic_type(n) == T_INT);
7837 match(Set dst (RoundVF src));
7838 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
7839 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
7840 ins_encode %{
7841 int vlen_enc = vector_length_encoding(this);
7842 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7843 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
7844 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7845 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
7846 %}
7847 ins_pipe( pipe_slow );
7848 %}
7849
7850 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7851 predicate((VM_Version::supports_avx512vl() ||
7852 Matcher::vector_length_in_bytes(n) == 64) &&
7853 Matcher::vector_element_basic_type(n) == T_INT);
7854 match(Set dst (RoundVF src));
7855 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7856 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7857 ins_encode %{
7858 int vlen_enc = vector_length_encoding(this);
7859 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7860 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
7861 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
7862 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7863 %}
7864 ins_pipe( pipe_slow );
7865 %}
7866
7867 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7868 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7869 match(Set dst (RoundVD src));
7870 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7871 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7872 ins_encode %{
7873 int vlen_enc = vector_length_encoding(this);
7874 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7875 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
7876 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
7877 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7878 %}
7879 ins_pipe( pipe_slow );
7880 %}
7881
7882 // --------------------------------- VectorMaskCmp --------------------------------------
7883
7884 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7885 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7886 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
7887 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7888 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7889 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7890 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7891 ins_encode %{
7892 int vlen_enc = vector_length_encoding(this, $src1);
7893 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7894 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7895 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7896 } else {
7897 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7898 }
7899 %}
7900 ins_pipe( pipe_slow );
7901 %}
7902
7903 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7904 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7905 n->bottom_type()->isa_vectmask() == nullptr &&
7906 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7907 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7908 effect(TEMP ktmp);
7909 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7910 ins_encode %{
7911 int vlen_enc = Assembler::AVX_512bit;
7912 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7913 KRegister mask = k0; // The comparison itself is not being masked.
7914 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7915 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7916 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7917 } else {
7918 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7919 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7920 }
7921 %}
7922 ins_pipe( pipe_slow );
7923 %}
7924
7925 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7926 predicate(n->bottom_type()->isa_vectmask() &&
7927 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7928 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7929 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7930 ins_encode %{
7931 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7932 int vlen_enc = vector_length_encoding(this, $src1);
7933 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7934 KRegister mask = k0; // The comparison itself is not being masked.
7935 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7936 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7937 } else {
7938 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7939 }
7940 %}
7941 ins_pipe( pipe_slow );
7942 %}
7943
7944 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7945 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7946 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7947 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7948 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7949 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7950 (n->in(2)->get_int() == BoolTest::eq ||
7951 n->in(2)->get_int() == BoolTest::lt ||
7952 n->in(2)->get_int() == BoolTest::gt)); // cond
7953 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7954 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7955 ins_encode %{
7956 int vlen_enc = vector_length_encoding(this, $src1);
7957 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7958 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7959 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
7960 %}
7961 ins_pipe( pipe_slow );
7962 %}
7963
7964 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7965 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7966 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7967 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7968 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7969 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7970 (n->in(2)->get_int() == BoolTest::ne ||
7971 n->in(2)->get_int() == BoolTest::le ||
7972 n->in(2)->get_int() == BoolTest::ge)); // cond
7973 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7974 effect(TEMP dst, TEMP xtmp);
7975 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7976 ins_encode %{
7977 int vlen_enc = vector_length_encoding(this, $src1);
7978 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7979 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7980 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7981 %}
7982 ins_pipe( pipe_slow );
7983 %}
7984
7985 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7986 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7987 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7988 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7989 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7990 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7991 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7992 effect(TEMP dst, TEMP xtmp);
7993 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7994 ins_encode %{
7995 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
7996 int vlen_enc = vector_length_encoding(this, $src1);
7997 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7998 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7999
8000 if (vlen_enc == Assembler::AVX_128bit) {
8001 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
8002 } else {
8003 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
8004 }
8005 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
8006 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8007 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
8008 %}
8009 ins_pipe( pipe_slow );
8010 %}
8011
8012 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
8013 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
8014 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
8015 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
8016 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8017 effect(TEMP ktmp);
8018 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
8019 ins_encode %{
8020 assert(UseAVX > 2, "required");
8021
8022 int vlen_enc = vector_length_encoding(this, $src1);
8023 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8024 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
8025 KRegister mask = k0; // The comparison itself is not being masked.
8026 bool merge = false;
8027 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
8028
8029 switch (src1_elem_bt) {
8030 case T_INT: {
8031 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8032 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
8033 break;
8034 }
8035 case T_LONG: {
8036 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8037 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
8038 break;
8039 }
8040 default: assert(false, "%s", type2name(src1_elem_bt));
8041 }
8042 %}
8043 ins_pipe( pipe_slow );
8044 %}
8045
8046
8047 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
8048 predicate(n->bottom_type()->isa_vectmask() &&
8049 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
8050 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
8051 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
8052 ins_encode %{
8053 assert(UseAVX > 2, "required");
8054 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
8055
8056 int vlen_enc = vector_length_encoding(this, $src1);
8057 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
8058 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
8059 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
8060
8061 // Comparison i
8062 switch (src1_elem_bt) {
8063 case T_BYTE: {
8064 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8065 break;
8066 }
8067 case T_SHORT: {
8068 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8069 break;
8070 }
8071 case T_INT: {
8072 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8073 break;
8074 }
8075 case T_LONG: {
8076 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
8077 break;
8078 }
8079 default: assert(false, "%s", type2name(src1_elem_bt));
8080 }
8081 %}
8082 ins_pipe( pipe_slow );
8083 %}
8084
8085 // Extract
8086
8087 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
8088 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
8089 match(Set dst (ExtractI src idx));
8090 match(Set dst (ExtractS src idx));
8091 match(Set dst (ExtractB src idx));
8092 format %{ "extractI $dst,$src,$idx\t!" %}
8093 ins_encode %{
8094 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8095
8096 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
8097 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
8098 %}
8099 ins_pipe( pipe_slow );
8100 %}
8101
8102 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
8103 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
8104 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
8105 match(Set dst (ExtractI src idx));
8106 match(Set dst (ExtractS src idx));
8107 match(Set dst (ExtractB src idx));
8108 effect(TEMP vtmp);
8109 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
8110 ins_encode %{
8111 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8112
8113 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
8114 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8115 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
8116 %}
8117 ins_pipe( pipe_slow );
8118 %}
8119
8120 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
8121 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
8122 match(Set dst (ExtractL src idx));
8123 format %{ "extractL $dst,$src,$idx\t!" %}
8124 ins_encode %{
8125 assert(UseSSE >= 4, "required");
8126 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8127
8128 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
8129 %}
8130 ins_pipe( pipe_slow );
8131 %}
8132
8133 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
8134 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
8135 Matcher::vector_length(n->in(1)) == 8); // src
8136 match(Set dst (ExtractL src idx));
8137 effect(TEMP vtmp);
8138 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
8139 ins_encode %{
8140 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8141
8142 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8143 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
8144 %}
8145 ins_pipe( pipe_slow );
8146 %}
8147
8148 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
8149 predicate(Matcher::vector_length(n->in(1)) <= 4);
8150 match(Set dst (ExtractF src idx));
8151 effect(TEMP dst, TEMP vtmp);
8152 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
8153 ins_encode %{
8154 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8155
8156 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
8157 %}
8158 ins_pipe( pipe_slow );
8159 %}
8160
8161 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
8162 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
8163 Matcher::vector_length(n->in(1)/*src*/) == 16);
8164 match(Set dst (ExtractF src idx));
8165 effect(TEMP vtmp);
8166 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
8167 ins_encode %{
8168 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8169
8170 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8171 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
8172 %}
8173 ins_pipe( pipe_slow );
8174 %}
8175
8176 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
8177 predicate(Matcher::vector_length(n->in(1)) == 2); // src
8178 match(Set dst (ExtractD src idx));
8179 format %{ "extractD $dst,$src,$idx\t!" %}
8180 ins_encode %{
8181 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8182
8183 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8184 %}
8185 ins_pipe( pipe_slow );
8186 %}
8187
8188 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
8189 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
8190 Matcher::vector_length(n->in(1)) == 8); // src
8191 match(Set dst (ExtractD src idx));
8192 effect(TEMP vtmp);
8193 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
8194 ins_encode %{
8195 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
8196
8197 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
8198 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
8199 %}
8200 ins_pipe( pipe_slow );
8201 %}
8202
8203 // --------------------------------- Vector Blend --------------------------------------
8204
8205 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
8206 predicate(UseAVX == 0);
8207 match(Set dst (VectorBlend (Binary dst src) mask));
8208 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
8209 effect(TEMP tmp);
8210 ins_encode %{
8211 assert(UseSSE >= 4, "required");
8212
8213 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
8214 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
8215 }
8216 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
8217 %}
8218 ins_pipe( pipe_slow );
8219 %}
8220
8221 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
8222 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8223 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8224 Matcher::vector_length_in_bytes(n) <= 32 &&
8225 is_integral_type(Matcher::vector_element_basic_type(n)));
8226 match(Set dst (VectorBlend (Binary src1 src2) mask));
8227 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8228 ins_encode %{
8229 int vlen_enc = vector_length_encoding(this);
8230 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8231 %}
8232 ins_pipe( pipe_slow );
8233 %}
8234
8235 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
8236 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
8237 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8238 Matcher::vector_length_in_bytes(n) <= 32 &&
8239 !is_integral_type(Matcher::vector_element_basic_type(n)));
8240 match(Set dst (VectorBlend (Binary src1 src2) mask));
8241 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
8242 ins_encode %{
8243 int vlen_enc = vector_length_encoding(this);
8244 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8245 %}
8246 ins_pipe( pipe_slow );
8247 %}
8248
8249 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
8250 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
8251 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
8252 Matcher::vector_length_in_bytes(n) <= 32);
8253 match(Set dst (VectorBlend (Binary src1 src2) mask));
8254 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
8255 effect(TEMP vtmp, TEMP dst);
8256 ins_encode %{
8257 int vlen_enc = vector_length_encoding(this);
8258 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
8259 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8260 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8261 %}
8262 ins_pipe( pipe_slow );
8263 %}
8264
8265 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
8266 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
8267 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
8268 match(Set dst (VectorBlend (Binary src1 src2) mask));
8269 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8270 effect(TEMP ktmp);
8271 ins_encode %{
8272 int vlen_enc = Assembler::AVX_512bit;
8273 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8274 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
8275 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8276 %}
8277 ins_pipe( pipe_slow );
8278 %}
8279
8280
8281 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
8282 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
8283 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
8284 VM_Version::supports_avx512bw()));
8285 match(Set dst (VectorBlend (Binary src1 src2) mask));
8286 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
8287 ins_encode %{
8288 int vlen_enc = vector_length_encoding(this);
8289 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8290 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8291 %}
8292 ins_pipe( pipe_slow );
8293 %}
8294
8295 // --------------------------------- ABS --------------------------------------
8296 // a = |a|
8297 instruct vabsB_reg(vec dst, vec src) %{
8298 match(Set dst (AbsVB src));
8299 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
8300 ins_encode %{
8301 uint vlen = Matcher::vector_length(this);
8302 if (vlen <= 16) {
8303 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8304 } else {
8305 int vlen_enc = vector_length_encoding(this);
8306 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8307 }
8308 %}
8309 ins_pipe( pipe_slow );
8310 %}
8311
8312 instruct vabsS_reg(vec dst, vec src) %{
8313 match(Set dst (AbsVS src));
8314 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
8315 ins_encode %{
8316 uint vlen = Matcher::vector_length(this);
8317 if (vlen <= 8) {
8318 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8319 } else {
8320 int vlen_enc = vector_length_encoding(this);
8321 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8322 }
8323 %}
8324 ins_pipe( pipe_slow );
8325 %}
8326
8327 instruct vabsI_reg(vec dst, vec src) %{
8328 match(Set dst (AbsVI src));
8329 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
8330 ins_encode %{
8331 uint vlen = Matcher::vector_length(this);
8332 if (vlen <= 4) {
8333 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8334 } else {
8335 int vlen_enc = vector_length_encoding(this);
8336 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8337 }
8338 %}
8339 ins_pipe( pipe_slow );
8340 %}
8341
8342 instruct vabsL_reg(vec dst, vec src) %{
8343 match(Set dst (AbsVL src));
8344 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
8345 ins_encode %{
8346 assert(UseAVX > 2, "required");
8347 int vlen_enc = vector_length_encoding(this);
8348 if (!VM_Version::supports_avx512vl()) {
8349 vlen_enc = Assembler::AVX_512bit;
8350 }
8351 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8352 %}
8353 ins_pipe( pipe_slow );
8354 %}
8355
8356 // --------------------------------- ABSNEG --------------------------------------
8357
8358 instruct vabsnegF(vec dst, vec src) %{
8359 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
8360 match(Set dst (AbsVF src));
8361 match(Set dst (NegVF src));
8362 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
8363 ins_cost(150);
8364 ins_encode %{
8365 int opcode = this->ideal_Opcode();
8366 int vlen = Matcher::vector_length(this);
8367 if (vlen == 2) {
8368 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8369 } else {
8370 assert(vlen == 8 || vlen == 16, "required");
8371 int vlen_enc = vector_length_encoding(this);
8372 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8373 }
8374 %}
8375 ins_pipe( pipe_slow );
8376 %}
8377
8378 instruct vabsneg4F(vec dst) %{
8379 predicate(Matcher::vector_length(n) == 4);
8380 match(Set dst (AbsVF dst));
8381 match(Set dst (NegVF dst));
8382 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
8383 ins_cost(150);
8384 ins_encode %{
8385 int opcode = this->ideal_Opcode();
8386 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
8387 %}
8388 ins_pipe( pipe_slow );
8389 %}
8390
8391 instruct vabsnegD(vec dst, vec src) %{
8392 match(Set dst (AbsVD src));
8393 match(Set dst (NegVD src));
8394 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
8395 ins_encode %{
8396 int opcode = this->ideal_Opcode();
8397 uint vlen = Matcher::vector_length(this);
8398 if (vlen == 2) {
8399 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
8400 } else {
8401 int vlen_enc = vector_length_encoding(this);
8402 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8403 }
8404 %}
8405 ins_pipe( pipe_slow );
8406 %}
8407
8408 //------------------------------------- VectorTest --------------------------------------------
8409
8410 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
8411 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
8412 match(Set cr (VectorTest src1 src2));
8413 effect(TEMP vtmp);
8414 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
8415 ins_encode %{
8416 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8417 int vlen = Matcher::vector_length_in_bytes(this, $src1);
8418 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
8419 %}
8420 ins_pipe( pipe_slow );
8421 %}
8422
8423 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
8424 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
8425 match(Set cr (VectorTest src1 src2));
8426 format %{ "vptest_ge16 $src1, $src2\n\t" %}
8427 ins_encode %{
8428 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
8429 int vlen = Matcher::vector_length_in_bytes(this, $src1);
8430 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
8431 %}
8432 ins_pipe( pipe_slow );
8433 %}
8434
8435 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8436 predicate((Matcher::vector_length(n->in(1)) < 8 ||
8437 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8438 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
8439 match(Set cr (VectorTest src1 src2));
8440 effect(TEMP tmp);
8441 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8442 ins_encode %{
8443 uint masklen = Matcher::vector_length(this, $src1);
8444 __ kmovwl($tmp$$Register, $src1$$KRegister);
8445 __ andl($tmp$$Register, (1 << masklen) - 1);
8446 __ cmpl($tmp$$Register, (1 << masklen) - 1);
8447 %}
8448 ins_pipe( pipe_slow );
8449 %}
8450
8451 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
8452 predicate((Matcher::vector_length(n->in(1)) < 8 ||
8453 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
8454 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
8455 match(Set cr (VectorTest src1 src2));
8456 effect(TEMP tmp);
8457 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
8458 ins_encode %{
8459 uint masklen = Matcher::vector_length(this, $src1);
8460 __ kmovwl($tmp$$Register, $src1$$KRegister);
8461 __ andl($tmp$$Register, (1 << masklen) - 1);
8462 %}
8463 ins_pipe( pipe_slow );
8464 %}
8465
8466 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
8467 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
8468 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
8469 match(Set cr (VectorTest src1 src2));
8470 format %{ "ktest_ge8 $src1, $src2\n\t" %}
8471 ins_encode %{
8472 uint masklen = Matcher::vector_length(this, $src1);
8473 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
8474 %}
8475 ins_pipe( pipe_slow );
8476 %}
8477
8478 //------------------------------------- LoadMask --------------------------------------------
8479
8480 instruct loadMask(legVec dst, legVec src) %{
8481 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
8482 match(Set dst (VectorLoadMask src));
8483 effect(TEMP dst);
8484 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
8485 ins_encode %{
8486 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8487 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8488 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
8489 %}
8490 ins_pipe( pipe_slow );
8491 %}
8492
8493 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
8494 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8495 match(Set dst (VectorLoadMask src));
8496 effect(TEMP xtmp);
8497 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
8498 ins_encode %{
8499 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8500 true, Assembler::AVX_512bit);
8501 %}
8502 ins_pipe( pipe_slow );
8503 %}
8504
8505 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
8506 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8507 match(Set dst (VectorLoadMask src));
8508 effect(TEMP xtmp);
8509 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
8510 ins_encode %{
8511 int vlen_enc = vector_length_encoding(in(1));
8512 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8513 false, vlen_enc);
8514 %}
8515 ins_pipe( pipe_slow );
8516 %}
8517
8518 //------------------------------------- StoreMask --------------------------------------------
8519
8520 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
8521 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8522 match(Set dst (VectorStoreMask src size));
8523 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8524 ins_encode %{
8525 int vlen = Matcher::vector_length(this);
8526 if (vlen <= 16 && UseAVX <= 2) {
8527 assert(UseSSE >= 3, "required");
8528 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8529 } else {
8530 assert(UseAVX > 0, "required");
8531 int src_vlen_enc = vector_length_encoding(this, $src);
8532 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8533 }
8534 %}
8535 ins_pipe( pipe_slow );
8536 %}
8537
8538 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
8539 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8540 match(Set dst (VectorStoreMask src size));
8541 effect(TEMP_DEF dst, TEMP xtmp);
8542 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8543 ins_encode %{
8544 int vlen_enc = Assembler::AVX_128bit;
8545 int vlen = Matcher::vector_length(this);
8546 if (vlen <= 8) {
8547 assert(UseSSE >= 3, "required");
8548 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8549 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8550 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8551 } else {
8552 assert(UseAVX > 0, "required");
8553 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8554 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8555 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8556 }
8557 %}
8558 ins_pipe( pipe_slow );
8559 %}
8560
8561 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
8562 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8563 match(Set dst (VectorStoreMask src size));
8564 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8565 effect(TEMP_DEF dst, TEMP xtmp);
8566 ins_encode %{
8567 int vlen_enc = Assembler::AVX_128bit;
8568 int vlen = Matcher::vector_length(this);
8569 if (vlen <= 4) {
8570 assert(UseSSE >= 3, "required");
8571 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8572 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8573 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8574 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8575 } else {
8576 assert(UseAVX > 0, "required");
8577 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8578 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8579 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8580 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8581 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8582 }
8583 %}
8584 ins_pipe( pipe_slow );
8585 %}
8586
8587 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
8588 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
8589 match(Set dst (VectorStoreMask src size));
8590 effect(TEMP_DEF dst, TEMP xtmp);
8591 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8592 ins_encode %{
8593 assert(UseSSE >= 3, "required");
8594 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8595 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
8596 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
8597 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8598 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8599 %}
8600 ins_pipe( pipe_slow );
8601 %}
8602
8603 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
8604 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
8605 match(Set dst (VectorStoreMask src size));
8606 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
8607 effect(TEMP_DEF dst, TEMP vtmp);
8608 ins_encode %{
8609 int vlen_enc = Assembler::AVX_128bit;
8610 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
8611 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
8612 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
8613 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8614 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8615 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8616 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8617 %}
8618 ins_pipe( pipe_slow );
8619 %}
8620
8621 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8622 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8623 match(Set dst (VectorStoreMask src size));
8624 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8625 ins_encode %{
8626 int src_vlen_enc = vector_length_encoding(this, $src);
8627 int dst_vlen_enc = vector_length_encoding(this);
8628 if (!VM_Version::supports_avx512vl()) {
8629 src_vlen_enc = Assembler::AVX_512bit;
8630 }
8631 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8632 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8633 %}
8634 ins_pipe( pipe_slow );
8635 %}
8636
8637 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8638 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8639 match(Set dst (VectorStoreMask src size));
8640 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8641 ins_encode %{
8642 int src_vlen_enc = vector_length_encoding(this, $src);
8643 int dst_vlen_enc = vector_length_encoding(this);
8644 if (!VM_Version::supports_avx512vl()) {
8645 src_vlen_enc = Assembler::AVX_512bit;
8646 }
8647 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8648 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8649 %}
8650 ins_pipe( pipe_slow );
8651 %}
8652
8653 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
8654 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8655 match(Set dst (VectorStoreMask mask size));
8656 effect(TEMP_DEF dst);
8657 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8658 ins_encode %{
8659 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
8660 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
8661 false, Assembler::AVX_512bit, noreg);
8662 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
8663 %}
8664 ins_pipe( pipe_slow );
8665 %}
8666
8667 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
8668 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8669 match(Set dst (VectorStoreMask mask size));
8670 effect(TEMP_DEF dst);
8671 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8672 ins_encode %{
8673 int dst_vlen_enc = vector_length_encoding(this);
8674 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
8675 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8676 %}
8677 ins_pipe( pipe_slow );
8678 %}
8679
8680 instruct vmaskcast_evex(kReg dst) %{
8681 match(Set dst (VectorMaskCast dst));
8682 ins_cost(0);
8683 format %{ "vector_mask_cast $dst" %}
8684 ins_encode %{
8685 // empty
8686 %}
8687 ins_pipe(empty);
8688 %}
8689
8690 instruct vmaskcast(vec dst) %{
8691 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
8692 match(Set dst (VectorMaskCast dst));
8693 ins_cost(0);
8694 format %{ "vector_mask_cast $dst" %}
8695 ins_encode %{
8696 // empty
8697 %}
8698 ins_pipe(empty);
8699 %}
8700
8701 instruct vmaskcast_avx(vec dst, vec src) %{
8702 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
8703 match(Set dst (VectorMaskCast src));
8704 format %{ "vector_mask_cast $dst, $src" %}
8705 ins_encode %{
8706 int vlen = Matcher::vector_length(this);
8707 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
8708 BasicType dst_bt = Matcher::vector_element_basic_type(this);
8709 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
8710 %}
8711 ins_pipe(pipe_slow);
8712 %}
8713
8714 //-------------------------------- Load Iota Indices ----------------------------------
8715
8716 instruct loadIotaIndices(vec dst, immI_0 src) %{
8717 match(Set dst (VectorLoadConst src));
8718 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
8719 ins_encode %{
8720 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8721 BasicType bt = Matcher::vector_element_basic_type(this);
8722 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
8723 %}
8724 ins_pipe( pipe_slow );
8725 %}
8726
8727 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
8728 match(Set dst (PopulateIndex src1 src2));
8729 effect(TEMP dst, TEMP vtmp);
8730 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8731 ins_encode %{
8732 assert($src2$$constant == 1, "required");
8733 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8734 int vlen_enc = vector_length_encoding(this);
8735 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8736 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8737 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8738 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8739 %}
8740 ins_pipe( pipe_slow );
8741 %}
8742
8743 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
8744 match(Set dst (PopulateIndex src1 src2));
8745 effect(TEMP dst, TEMP vtmp);
8746 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
8747 ins_encode %{
8748 assert($src2$$constant == 1, "required");
8749 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8750 int vlen_enc = vector_length_encoding(this);
8751 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8752 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
8753 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
8754 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8755 %}
8756 ins_pipe( pipe_slow );
8757 %}
8758
8759 //-------------------------------- Rearrange ----------------------------------
8760
8761 // LoadShuffle/Rearrange for Byte
8762 instruct rearrangeB(vec dst, vec shuffle) %{
8763 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8764 Matcher::vector_length(n) < 32);
8765 match(Set dst (VectorRearrange dst shuffle));
8766 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8767 ins_encode %{
8768 assert(UseSSE >= 4, "required");
8769 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8770 %}
8771 ins_pipe( pipe_slow );
8772 %}
8773
8774 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8775 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8776 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
8777 match(Set dst (VectorRearrange src shuffle));
8778 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8779 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8780 ins_encode %{
8781 assert(UseAVX >= 2, "required");
8782 // Swap src into vtmp1
8783 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8784 // Shuffle swapped src to get entries from other 128 bit lane
8785 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8786 // Shuffle original src to get entries from self 128 bit lane
8787 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8788 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
8789 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8790 // Perform the blend
8791 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8792 %}
8793 ins_pipe( pipe_slow );
8794 %}
8795
8796
8797 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
8798 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8799 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
8800 match(Set dst (VectorRearrange src shuffle));
8801 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
8802 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
8803 ins_encode %{
8804 int vlen_enc = vector_length_encoding(this);
8805 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
8806 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
8807 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
8808 %}
8809 ins_pipe( pipe_slow );
8810 %}
8811
8812 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
8813 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
8814 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
8815 match(Set dst (VectorRearrange src shuffle));
8816 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8817 ins_encode %{
8818 int vlen_enc = vector_length_encoding(this);
8819 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8820 %}
8821 ins_pipe( pipe_slow );
8822 %}
8823
8824 // LoadShuffle/Rearrange for Short
8825
8826 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
8827 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8828 !VM_Version::supports_avx512bw());
8829 match(Set dst (VectorLoadShuffle src));
8830 effect(TEMP dst, TEMP vtmp);
8831 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8832 ins_encode %{
8833 // Create a byte shuffle mask from short shuffle mask
8834 // only byte shuffle instruction available on these platforms
8835 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8836 if (UseAVX == 0) {
8837 assert(vlen_in_bytes <= 16, "required");
8838 // Multiply each shuffle by two to get byte index
8839 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
8840 __ psllw($vtmp$$XMMRegister, 1);
8841
8842 // Duplicate to create 2 copies of byte index
8843 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8844 __ psllw($dst$$XMMRegister, 8);
8845 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
8846
8847 // Add one to get alternate byte index
8848 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
8849 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8850 } else {
8851 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
8852 int vlen_enc = vector_length_encoding(this);
8853 // Multiply each shuffle by two to get byte index
8854 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
8855
8856 // Duplicate to create 2 copies of byte index
8857 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
8858 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8859
8860 // Add one to get alternate byte index
8861 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
8862 }
8863 %}
8864 ins_pipe( pipe_slow );
8865 %}
8866
8867 instruct rearrangeS(vec dst, vec shuffle) %{
8868 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8869 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
8870 match(Set dst (VectorRearrange dst shuffle));
8871 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8872 ins_encode %{
8873 assert(UseSSE >= 4, "required");
8874 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8875 %}
8876 ins_pipe( pipe_slow );
8877 %}
8878
8879 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
8880 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8881 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
8882 match(Set dst (VectorRearrange src shuffle));
8883 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
8884 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
8885 ins_encode %{
8886 assert(UseAVX >= 2, "required");
8887 // Swap src into vtmp1
8888 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
8889 // Shuffle swapped src to get entries from other 128 bit lane
8890 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8891 // Shuffle original src to get entries from self 128 bit lane
8892 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
8893 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
8894 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
8895 // Perform the blend
8896 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
8897 %}
8898 ins_pipe( pipe_slow );
8899 %}
8900
8901 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
8902 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
8903 VM_Version::supports_avx512bw());
8904 match(Set dst (VectorRearrange src shuffle));
8905 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8906 ins_encode %{
8907 int vlen_enc = vector_length_encoding(this);
8908 if (!VM_Version::supports_avx512vl()) {
8909 vlen_enc = Assembler::AVX_512bit;
8910 }
8911 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8912 %}
8913 ins_pipe( pipe_slow );
8914 %}
8915
8916 // LoadShuffle/Rearrange for Integer and Float
8917
8918 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
8919 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8920 Matcher::vector_length(n) == 4 && UseAVX == 0);
8921 match(Set dst (VectorLoadShuffle src));
8922 effect(TEMP dst, TEMP vtmp);
8923 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8924 ins_encode %{
8925 assert(UseSSE >= 4, "required");
8926
8927 // Create a byte shuffle mask from int shuffle mask
8928 // only byte shuffle instruction available on these platforms
8929
8930 // Duplicate and multiply each shuffle by 4
8931 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
8932 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8933 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
8934 __ psllw($vtmp$$XMMRegister, 2);
8935
8936 // Duplicate again to create 4 copies of byte index
8937 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
8938 __ psllw($dst$$XMMRegister, 8);
8939 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
8940
8941 // Add 3,2,1,0 to get alternate byte index
8942 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
8943 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
8944 %}
8945 ins_pipe( pipe_slow );
8946 %}
8947
8948 instruct rearrangeI(vec dst, vec shuffle) %{
8949 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8950 UseAVX == 0);
8951 match(Set dst (VectorRearrange dst shuffle));
8952 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
8953 ins_encode %{
8954 assert(UseSSE >= 4, "required");
8955 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
8956 %}
8957 ins_pipe( pipe_slow );
8958 %}
8959
8960 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
8961 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
8962 UseAVX > 0);
8963 match(Set dst (VectorRearrange src shuffle));
8964 format %{ "vector_rearrange $dst, $shuffle, $src" %}
8965 ins_encode %{
8966 int vlen_enc = vector_length_encoding(this);
8967 BasicType bt = Matcher::vector_element_basic_type(this);
8968 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
8969 %}
8970 ins_pipe( pipe_slow );
8971 %}
8972
8973 // LoadShuffle/Rearrange for Long and Double
8974
8975 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
8976 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
8977 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
8978 match(Set dst (VectorLoadShuffle src));
8979 effect(TEMP dst, TEMP vtmp);
8980 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
8981 ins_encode %{
8982 assert(UseAVX >= 2, "required");
8983
8984 int vlen_enc = vector_length_encoding(this);
8985 // Create a double word shuffle mask from long shuffle mask
8986 // only double word shuffle instruction available on these platforms
8987
8988 // Multiply each shuffle by two to get double word index
8989 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
8990
8991 // Duplicate each double word shuffle
8992 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
8993 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8994
8995 // Add one to get alternate double word index
8996 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
8997 %}
8998 ins_pipe( pipe_slow );
8999 %}
9000
9001 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
9002 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
9003 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
9004 match(Set dst (VectorRearrange src shuffle));
9005 format %{ "vector_rearrange $dst, $shuffle, $src" %}
9006 ins_encode %{
9007 assert(UseAVX >= 2, "required");
9008
9009 int vlen_enc = vector_length_encoding(this);
9010 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
9011 %}
9012 ins_pipe( pipe_slow );
9013 %}
9014
9015 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
9016 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
9017 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
9018 match(Set dst (VectorRearrange src shuffle));
9019 format %{ "vector_rearrange $dst, $shuffle, $src" %}
9020 ins_encode %{
9021 assert(UseAVX > 2, "required");
9022
9023 int vlen_enc = vector_length_encoding(this);
9024 if (vlen_enc == Assembler::AVX_128bit) {
9025 vlen_enc = Assembler::AVX_256bit;
9026 }
9027 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
9028 %}
9029 ins_pipe( pipe_slow );
9030 %}
9031
9032 // --------------------------------- FMA --------------------------------------
9033 // a * b + c
9034
9035 instruct vfmaF_reg(vec a, vec b, vec c) %{
9036 match(Set c (FmaVF c (Binary a b)));
9037 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
9038 ins_cost(150);
9039 ins_encode %{
9040 assert(UseFMA, "not enabled");
9041 int vlen_enc = vector_length_encoding(this);
9042 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
9043 %}
9044 ins_pipe( pipe_slow );
9045 %}
9046
9047 instruct vfmaF_mem(vec a, memory b, vec c) %{
9048 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
9049 match(Set c (FmaVF c (Binary a (LoadVector b))));
9050 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
9051 ins_cost(150);
9052 ins_encode %{
9053 assert(UseFMA, "not enabled");
9054 int vlen_enc = vector_length_encoding(this);
9055 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
9056 %}
9057 ins_pipe( pipe_slow );
9058 %}
9059
9060 instruct vfmaD_reg(vec a, vec b, vec c) %{
9061 match(Set c (FmaVD c (Binary a b)));
9062 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
9063 ins_cost(150);
9064 ins_encode %{
9065 assert(UseFMA, "not enabled");
9066 int vlen_enc = vector_length_encoding(this);
9067 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
9068 %}
9069 ins_pipe( pipe_slow );
9070 %}
9071
9072 instruct vfmaD_mem(vec a, memory b, vec c) %{
9073 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
9074 match(Set c (FmaVD c (Binary a (LoadVector b))));
9075 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
9076 ins_cost(150);
9077 ins_encode %{
9078 assert(UseFMA, "not enabled");
9079 int vlen_enc = vector_length_encoding(this);
9080 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
9081 %}
9082 ins_pipe( pipe_slow );
9083 %}
9084
9085 // --------------------------------- Vector Multiply Add --------------------------------------
9086
9087 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
9088 predicate(UseAVX == 0);
9089 match(Set dst (MulAddVS2VI dst src1));
9090 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
9091 ins_encode %{
9092 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
9093 %}
9094 ins_pipe( pipe_slow );
9095 %}
9096
9097 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
9098 predicate(UseAVX > 0);
9099 match(Set dst (MulAddVS2VI src1 src2));
9100 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
9101 ins_encode %{
9102 int vlen_enc = vector_length_encoding(this);
9103 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
9104 %}
9105 ins_pipe( pipe_slow );
9106 %}
9107
9108 // --------------------------------- Vector Multiply Add Add ----------------------------------
9109
9110 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
9111 predicate(VM_Version::supports_avx512_vnni());
9112 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
9113 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
9114 ins_encode %{
9115 assert(UseAVX > 2, "required");
9116 int vlen_enc = vector_length_encoding(this);
9117 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
9118 %}
9119 ins_pipe( pipe_slow );
9120 ins_cost(10);
9121 %}
9122
9123 // --------------------------------- PopCount --------------------------------------
9124
9125 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
9126 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9127 match(Set dst (PopCountVI src));
9128 match(Set dst (PopCountVL src));
9129 format %{ "vector_popcount_integral $dst, $src" %}
9130 ins_encode %{
9131 int opcode = this->ideal_Opcode();
9132 int vlen_enc = vector_length_encoding(this, $src);
9133 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9134 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
9135 %}
9136 ins_pipe( pipe_slow );
9137 %}
9138
9139 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
9140 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9141 match(Set dst (PopCountVI src mask));
9142 match(Set dst (PopCountVL src mask));
9143 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
9144 ins_encode %{
9145 int vlen_enc = vector_length_encoding(this, $src);
9146 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9147 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9148 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
9149 %}
9150 ins_pipe( pipe_slow );
9151 %}
9152
9153 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
9154 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
9155 match(Set dst (PopCountVI src));
9156 match(Set dst (PopCountVL src));
9157 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9158 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
9159 ins_encode %{
9160 int opcode = this->ideal_Opcode();
9161 int vlen_enc = vector_length_encoding(this, $src);
9162 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9163 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9164 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
9165 %}
9166 ins_pipe( pipe_slow );
9167 %}
9168
9169 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
9170
9171 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
9172 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9173 Matcher::vector_length_in_bytes(n->in(1))));
9174 match(Set dst (CountTrailingZerosV src));
9175 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
9176 ins_cost(400);
9177 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
9178 ins_encode %{
9179 int vlen_enc = vector_length_encoding(this, $src);
9180 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9181 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9182 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9183 %}
9184 ins_pipe( pipe_slow );
9185 %}
9186
9187 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9188 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9189 VM_Version::supports_avx512cd() &&
9190 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9191 match(Set dst (CountTrailingZerosV src));
9192 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9193 ins_cost(400);
9194 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
9195 ins_encode %{
9196 int vlen_enc = vector_length_encoding(this, $src);
9197 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9198 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9199 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
9200 %}
9201 ins_pipe( pipe_slow );
9202 %}
9203
9204 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
9205 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9206 match(Set dst (CountTrailingZerosV src));
9207 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
9208 ins_cost(400);
9209 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
9210 ins_encode %{
9211 int vlen_enc = vector_length_encoding(this, $src);
9212 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9213 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9214 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
9215 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
9216 %}
9217 ins_pipe( pipe_slow );
9218 %}
9219
9220 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9221 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9222 match(Set dst (CountTrailingZerosV src));
9223 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9224 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9225 ins_encode %{
9226 int vlen_enc = vector_length_encoding(this, $src);
9227 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9228 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9229 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9230 %}
9231 ins_pipe( pipe_slow );
9232 %}
9233
9234
9235 // --------------------------------- Bitwise Ternary Logic ----------------------------------
9236
9237 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
9238 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
9239 effect(TEMP dst);
9240 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9241 ins_encode %{
9242 int vector_len = vector_length_encoding(this);
9243 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
9244 %}
9245 ins_pipe( pipe_slow );
9246 %}
9247
9248 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
9249 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
9250 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
9251 effect(TEMP dst);
9252 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
9253 ins_encode %{
9254 int vector_len = vector_length_encoding(this);
9255 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
9256 %}
9257 ins_pipe( pipe_slow );
9258 %}
9259
9260 // --------------------------------- Rotation Operations ----------------------------------
9261 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
9262 match(Set dst (RotateLeftV src shift));
9263 match(Set dst (RotateRightV src shift));
9264 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
9265 ins_encode %{
9266 int opcode = this->ideal_Opcode();
9267 int vector_len = vector_length_encoding(this);
9268 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9269 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
9270 %}
9271 ins_pipe( pipe_slow );
9272 %}
9273
9274 instruct vprorate(vec dst, vec src, vec shift) %{
9275 match(Set dst (RotateLeftV src shift));
9276 match(Set dst (RotateRightV src shift));
9277 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
9278 ins_encode %{
9279 int opcode = this->ideal_Opcode();
9280 int vector_len = vector_length_encoding(this);
9281 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
9282 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
9283 %}
9284 ins_pipe( pipe_slow );
9285 %}
9286
9287 // ---------------------------------- Masked Operations ------------------------------------
9288 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
9289 predicate(!n->in(3)->bottom_type()->isa_vectmask());
9290 match(Set dst (LoadVectorMasked mem mask));
9291 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9292 ins_encode %{
9293 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9294 int vlen_enc = vector_length_encoding(this);
9295 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
9296 %}
9297 ins_pipe( pipe_slow );
9298 %}
9299
9300
9301 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
9302 predicate(n->in(3)->bottom_type()->isa_vectmask());
9303 match(Set dst (LoadVectorMasked mem mask));
9304 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
9305 ins_encode %{
9306 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
9307 int vector_len = vector_length_encoding(this);
9308 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
9309 %}
9310 ins_pipe( pipe_slow );
9311 %}
9312
9313 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
9314 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
9315 match(Set mem (StoreVectorMasked mem (Binary src mask)));
9316 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9317 ins_encode %{
9318 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9319 int vlen_enc = vector_length_encoding(src_node);
9320 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9321 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
9322 %}
9323 ins_pipe( pipe_slow );
9324 %}
9325
9326 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
9327 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
9328 match(Set mem (StoreVectorMasked mem (Binary src mask)));
9329 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
9330 ins_encode %{
9331 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
9332 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
9333 int vlen_enc = vector_length_encoding(src_node);
9334 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
9335 %}
9336 ins_pipe( pipe_slow );
9337 %}
9338
9339 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
9340 match(Set addr (VerifyVectorAlignment addr mask));
9341 effect(KILL cr);
9342 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
9343 ins_encode %{
9344 Label Lskip;
9345 // check if masked bits of addr are zero
9346 __ testq($addr$$Register, $mask$$constant);
9347 __ jccb(Assembler::equal, Lskip);
9348 __ stop("verify_vector_alignment found a misaligned vector memory access");
9349 __ bind(Lskip);
9350 %}
9351 ins_pipe(pipe_slow);
9352 %}
9353
9354 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
9355 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
9356 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
9357 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
9358 ins_encode %{
9359 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
9360 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
9361
9362 Label DONE;
9363 int vlen_enc = vector_length_encoding(this, $src1);
9364 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
9365
9366 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
9367 __ mov64($dst$$Register, -1L);
9368 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
9369 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
9370 __ jccb(Assembler::carrySet, DONE);
9371 __ kmovql($dst$$Register, $ktmp1$$KRegister);
9372 __ notq($dst$$Register);
9373 __ tzcntq($dst$$Register, $dst$$Register);
9374 __ bind(DONE);
9375 %}
9376 ins_pipe( pipe_slow );
9377 %}
9378
9379
9380 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
9381 match(Set dst (VectorMaskGen len));
9382 effect(TEMP temp, KILL cr);
9383 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
9384 ins_encode %{
9385 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
9386 %}
9387 ins_pipe( pipe_slow );
9388 %}
9389
9390 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
9391 match(Set dst (VectorMaskGen len));
9392 format %{ "vector_mask_gen $len \t! vector mask generator" %}
9393 effect(TEMP temp);
9394 ins_encode %{
9395 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
9396 __ kmovql($dst$$KRegister, $temp$$Register);
9397 %}
9398 ins_pipe( pipe_slow );
9399 %}
9400
9401 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
9402 predicate(n->in(1)->bottom_type()->isa_vectmask());
9403 match(Set dst (VectorMaskToLong mask));
9404 effect(TEMP dst, KILL cr);
9405 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
9406 ins_encode %{
9407 int opcode = this->ideal_Opcode();
9408 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9409 int mask_len = Matcher::vector_length(this, $mask);
9410 int mask_size = mask_len * type2aelembytes(mbt);
9411 int vlen_enc = vector_length_encoding(this, $mask);
9412 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9413 $dst$$Register, mask_len, mask_size, vlen_enc);
9414 %}
9415 ins_pipe( pipe_slow );
9416 %}
9417
9418 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
9419 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9420 match(Set dst (VectorMaskToLong mask));
9421 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
9422 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9423 ins_encode %{
9424 int opcode = this->ideal_Opcode();
9425 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9426 int mask_len = Matcher::vector_length(this, $mask);
9427 int vlen_enc = vector_length_encoding(this, $mask);
9428 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9429 $dst$$Register, mask_len, mbt, vlen_enc);
9430 %}
9431 ins_pipe( pipe_slow );
9432 %}
9433
9434 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
9435 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9436 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
9437 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
9438 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9439 ins_encode %{
9440 int opcode = this->ideal_Opcode();
9441 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9442 int mask_len = Matcher::vector_length(this, $mask);
9443 int vlen_enc = vector_length_encoding(this, $mask);
9444 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9445 $dst$$Register, mask_len, mbt, vlen_enc);
9446 %}
9447 ins_pipe( pipe_slow );
9448 %}
9449
9450 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9451 predicate(n->in(1)->bottom_type()->isa_vectmask());
9452 match(Set dst (VectorMaskTrueCount mask));
9453 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9454 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
9455 ins_encode %{
9456 int opcode = this->ideal_Opcode();
9457 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9458 int mask_len = Matcher::vector_length(this, $mask);
9459 int mask_size = mask_len * type2aelembytes(mbt);
9460 int vlen_enc = vector_length_encoding(this, $mask);
9461 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9462 $tmp$$Register, mask_len, mask_size, vlen_enc);
9463 %}
9464 ins_pipe( pipe_slow );
9465 %}
9466
9467 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9468 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9469 match(Set dst (VectorMaskTrueCount mask));
9470 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9471 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9472 ins_encode %{
9473 int opcode = this->ideal_Opcode();
9474 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9475 int mask_len = Matcher::vector_length(this, $mask);
9476 int vlen_enc = vector_length_encoding(this, $mask);
9477 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9478 $tmp$$Register, mask_len, mbt, vlen_enc);
9479 %}
9480 ins_pipe( pipe_slow );
9481 %}
9482
9483 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9484 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9485 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
9486 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9487 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9488 ins_encode %{
9489 int opcode = this->ideal_Opcode();
9490 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9491 int mask_len = Matcher::vector_length(this, $mask);
9492 int vlen_enc = vector_length_encoding(this, $mask);
9493 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9494 $tmp$$Register, mask_len, mbt, vlen_enc);
9495 %}
9496 ins_pipe( pipe_slow );
9497 %}
9498
9499 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9500 predicate(n->in(1)->bottom_type()->isa_vectmask());
9501 match(Set dst (VectorMaskFirstTrue mask));
9502 match(Set dst (VectorMaskLastTrue mask));
9503 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9504 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
9505 ins_encode %{
9506 int opcode = this->ideal_Opcode();
9507 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9508 int mask_len = Matcher::vector_length(this, $mask);
9509 int mask_size = mask_len * type2aelembytes(mbt);
9510 int vlen_enc = vector_length_encoding(this, $mask);
9511 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9512 $tmp$$Register, mask_len, mask_size, vlen_enc);
9513 %}
9514 ins_pipe( pipe_slow );
9515 %}
9516
9517 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9518 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9519 match(Set dst (VectorMaskFirstTrue mask));
9520 match(Set dst (VectorMaskLastTrue mask));
9521 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9522 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9523 ins_encode %{
9524 int opcode = this->ideal_Opcode();
9525 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9526 int mask_len = Matcher::vector_length(this, $mask);
9527 int vlen_enc = vector_length_encoding(this, $mask);
9528 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9529 $tmp$$Register, mask_len, mbt, vlen_enc);
9530 %}
9531 ins_pipe( pipe_slow );
9532 %}
9533
9534 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9535 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9536 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9537 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9538 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9539 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9540 ins_encode %{
9541 int opcode = this->ideal_Opcode();
9542 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9543 int mask_len = Matcher::vector_length(this, $mask);
9544 int vlen_enc = vector_length_encoding(this, $mask);
9545 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9546 $tmp$$Register, mask_len, mbt, vlen_enc);
9547 %}
9548 ins_pipe( pipe_slow );
9549 %}
9550
9551 // --------------------------------- Compress/Expand Operations ---------------------------
9552 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
9553 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
9554 match(Set dst (CompressV src mask));
9555 match(Set dst (ExpandV src mask));
9556 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
9557 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
9558 ins_encode %{
9559 int opcode = this->ideal_Opcode();
9560 int vlen_enc = vector_length_encoding(this);
9561 BasicType bt = Matcher::vector_element_basic_type(this);
9562 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
9563 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
9564 %}
9565 ins_pipe( pipe_slow );
9566 %}
9567
9568 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9569 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
9570 match(Set dst (CompressV src mask));
9571 match(Set dst (ExpandV src mask));
9572 format %{ "vector_compress_expand $dst, $src, $mask" %}
9573 ins_encode %{
9574 int opcode = this->ideal_Opcode();
9575 int vector_len = vector_length_encoding(this);
9576 BasicType bt = Matcher::vector_element_basic_type(this);
9577 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
9578 %}
9579 ins_pipe( pipe_slow );
9580 %}
9581
9582 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
9583 match(Set dst (CompressM mask));
9584 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
9585 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
9586 ins_encode %{
9587 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
9588 int mask_len = Matcher::vector_length(this);
9589 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
9590 %}
9591 ins_pipe( pipe_slow );
9592 %}
9593
9594 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
9595
9596 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9597 predicate(!VM_Version::supports_gfni());
9598 match(Set dst (ReverseV src));
9599 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9600 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9601 ins_encode %{
9602 int vec_enc = vector_length_encoding(this);
9603 BasicType bt = Matcher::vector_element_basic_type(this);
9604 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9605 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9606 %}
9607 ins_pipe( pipe_slow );
9608 %}
9609
9610 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
9611 predicate(VM_Version::supports_gfni());
9612 match(Set dst (ReverseV src));
9613 effect(TEMP dst, TEMP xtmp);
9614 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
9615 ins_encode %{
9616 int vec_enc = vector_length_encoding(this);
9617 BasicType bt = Matcher::vector_element_basic_type(this);
9618 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
9619 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
9620 $xtmp$$XMMRegister);
9621 %}
9622 ins_pipe( pipe_slow );
9623 %}
9624
9625 instruct vreverse_byte_reg(vec dst, vec src) %{
9626 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
9627 match(Set dst (ReverseBytesV src));
9628 effect(TEMP dst);
9629 format %{ "vector_reverse_byte $dst, $src" %}
9630 ins_encode %{
9631 int vec_enc = vector_length_encoding(this);
9632 BasicType bt = Matcher::vector_element_basic_type(this);
9633 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
9634 %}
9635 ins_pipe( pipe_slow );
9636 %}
9637
9638 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9639 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
9640 match(Set dst (ReverseBytesV src));
9641 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9642 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9643 ins_encode %{
9644 int vec_enc = vector_length_encoding(this);
9645 BasicType bt = Matcher::vector_element_basic_type(this);
9646 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9647 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9648 %}
9649 ins_pipe( pipe_slow );
9650 %}
9651
9652 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
9653
9654 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
9655 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9656 Matcher::vector_length_in_bytes(n->in(1))));
9657 match(Set dst (CountLeadingZerosV src));
9658 format %{ "vector_count_leading_zeros $dst, $src" %}
9659 ins_encode %{
9660 int vlen_enc = vector_length_encoding(this, $src);
9661 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9662 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9663 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
9664 %}
9665 ins_pipe( pipe_slow );
9666 %}
9667
9668 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
9669 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9670 Matcher::vector_length_in_bytes(n->in(1))));
9671 match(Set dst (CountLeadingZerosV src mask));
9672 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
9673 ins_encode %{
9674 int vlen_enc = vector_length_encoding(this, $src);
9675 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9676 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9677 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
9678 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
9679 %}
9680 ins_pipe( pipe_slow );
9681 %}
9682
9683 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
9684 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9685 VM_Version::supports_avx512cd() &&
9686 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9687 match(Set dst (CountLeadingZerosV src));
9688 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
9689 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
9690 ins_encode %{
9691 int vlen_enc = vector_length_encoding(this, $src);
9692 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9693 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9694 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
9695 %}
9696 ins_pipe( pipe_slow );
9697 %}
9698
9699 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
9700 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9701 match(Set dst (CountLeadingZerosV src));
9702 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
9703 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
9704 ins_encode %{
9705 int vlen_enc = vector_length_encoding(this, $src);
9706 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9707 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9708 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
9709 $rtmp$$Register, true, vlen_enc);
9710 %}
9711 ins_pipe( pipe_slow );
9712 %}
9713
9714 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
9715 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
9716 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9717 match(Set dst (CountLeadingZerosV src));
9718 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
9719 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
9720 ins_encode %{
9721 int vlen_enc = vector_length_encoding(this, $src);
9722 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9723 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9724 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
9725 %}
9726 ins_pipe( pipe_slow );
9727 %}
9728
9729 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9730 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
9731 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9732 match(Set dst (CountLeadingZerosV src));
9733 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9734 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9735 ins_encode %{
9736 int vlen_enc = vector_length_encoding(this, $src);
9737 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9738 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9739 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9740 %}
9741 ins_pipe( pipe_slow );
9742 %}
9743
9744 // ---------------------------------- Vector Masked Operations ------------------------------------
9745
9746 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
9747 match(Set dst (AddVB (Binary dst src2) mask));
9748 match(Set dst (AddVS (Binary dst src2) mask));
9749 match(Set dst (AddVI (Binary dst src2) mask));
9750 match(Set dst (AddVL (Binary dst src2) mask));
9751 match(Set dst (AddVF (Binary dst src2) mask));
9752 match(Set dst (AddVD (Binary dst src2) mask));
9753 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9754 ins_encode %{
9755 int vlen_enc = vector_length_encoding(this);
9756 BasicType bt = Matcher::vector_element_basic_type(this);
9757 int opc = this->ideal_Opcode();
9758 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9759 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9760 %}
9761 ins_pipe( pipe_slow );
9762 %}
9763
9764 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
9765 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
9766 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
9767 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
9768 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
9769 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
9770 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
9771 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9772 ins_encode %{
9773 int vlen_enc = vector_length_encoding(this);
9774 BasicType bt = Matcher::vector_element_basic_type(this);
9775 int opc = this->ideal_Opcode();
9776 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9777 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9778 %}
9779 ins_pipe( pipe_slow );
9780 %}
9781
9782 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
9783 match(Set dst (XorV (Binary dst src2) mask));
9784 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9785 ins_encode %{
9786 int vlen_enc = vector_length_encoding(this);
9787 BasicType bt = Matcher::vector_element_basic_type(this);
9788 int opc = this->ideal_Opcode();
9789 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9790 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9791 %}
9792 ins_pipe( pipe_slow );
9793 %}
9794
9795 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
9796 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
9797 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
9798 ins_encode %{
9799 int vlen_enc = vector_length_encoding(this);
9800 BasicType bt = Matcher::vector_element_basic_type(this);
9801 int opc = this->ideal_Opcode();
9802 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9803 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9804 %}
9805 ins_pipe( pipe_slow );
9806 %}
9807
9808 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
9809 match(Set dst (OrV (Binary dst src2) mask));
9810 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9811 ins_encode %{
9812 int vlen_enc = vector_length_encoding(this);
9813 BasicType bt = Matcher::vector_element_basic_type(this);
9814 int opc = this->ideal_Opcode();
9815 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9816 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9817 %}
9818 ins_pipe( pipe_slow );
9819 %}
9820
9821 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
9822 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
9823 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
9824 ins_encode %{
9825 int vlen_enc = vector_length_encoding(this);
9826 BasicType bt = Matcher::vector_element_basic_type(this);
9827 int opc = this->ideal_Opcode();
9828 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9829 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9830 %}
9831 ins_pipe( pipe_slow );
9832 %}
9833
9834 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
9835 match(Set dst (AndV (Binary dst src2) mask));
9836 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9837 ins_encode %{
9838 int vlen_enc = vector_length_encoding(this);
9839 BasicType bt = Matcher::vector_element_basic_type(this);
9840 int opc = this->ideal_Opcode();
9841 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9842 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9843 %}
9844 ins_pipe( pipe_slow );
9845 %}
9846
9847 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
9848 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
9849 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
9850 ins_encode %{
9851 int vlen_enc = vector_length_encoding(this);
9852 BasicType bt = Matcher::vector_element_basic_type(this);
9853 int opc = this->ideal_Opcode();
9854 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9855 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9856 %}
9857 ins_pipe( pipe_slow );
9858 %}
9859
9860 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
9861 match(Set dst (SubVB (Binary dst src2) mask));
9862 match(Set dst (SubVS (Binary dst src2) mask));
9863 match(Set dst (SubVI (Binary dst src2) mask));
9864 match(Set dst (SubVL (Binary dst src2) mask));
9865 match(Set dst (SubVF (Binary dst src2) mask));
9866 match(Set dst (SubVD (Binary dst src2) mask));
9867 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9868 ins_encode %{
9869 int vlen_enc = vector_length_encoding(this);
9870 BasicType bt = Matcher::vector_element_basic_type(this);
9871 int opc = this->ideal_Opcode();
9872 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9873 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9874 %}
9875 ins_pipe( pipe_slow );
9876 %}
9877
9878 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
9879 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
9880 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
9881 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
9882 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
9883 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
9884 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
9885 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
9886 ins_encode %{
9887 int vlen_enc = vector_length_encoding(this);
9888 BasicType bt = Matcher::vector_element_basic_type(this);
9889 int opc = this->ideal_Opcode();
9890 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9891 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9892 %}
9893 ins_pipe( pipe_slow );
9894 %}
9895
9896 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
9897 match(Set dst (MulVS (Binary dst src2) mask));
9898 match(Set dst (MulVI (Binary dst src2) mask));
9899 match(Set dst (MulVL (Binary dst src2) mask));
9900 match(Set dst (MulVF (Binary dst src2) mask));
9901 match(Set dst (MulVD (Binary dst src2) mask));
9902 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9903 ins_encode %{
9904 int vlen_enc = vector_length_encoding(this);
9905 BasicType bt = Matcher::vector_element_basic_type(this);
9906 int opc = this->ideal_Opcode();
9907 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9908 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9909 %}
9910 ins_pipe( pipe_slow );
9911 %}
9912
9913 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
9914 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
9915 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
9916 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
9917 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
9918 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
9919 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
9920 ins_encode %{
9921 int vlen_enc = vector_length_encoding(this);
9922 BasicType bt = Matcher::vector_element_basic_type(this);
9923 int opc = this->ideal_Opcode();
9924 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9925 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9926 %}
9927 ins_pipe( pipe_slow );
9928 %}
9929
9930 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
9931 match(Set dst (SqrtVF dst mask));
9932 match(Set dst (SqrtVD dst mask));
9933 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
9934 ins_encode %{
9935 int vlen_enc = vector_length_encoding(this);
9936 BasicType bt = Matcher::vector_element_basic_type(this);
9937 int opc = this->ideal_Opcode();
9938 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9939 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
9940 %}
9941 ins_pipe( pipe_slow );
9942 %}
9943
9944 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
9945 match(Set dst (DivVF (Binary dst src2) mask));
9946 match(Set dst (DivVD (Binary dst src2) mask));
9947 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9948 ins_encode %{
9949 int vlen_enc = vector_length_encoding(this);
9950 BasicType bt = Matcher::vector_element_basic_type(this);
9951 int opc = this->ideal_Opcode();
9952 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9953 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9954 %}
9955 ins_pipe( pipe_slow );
9956 %}
9957
9958 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
9959 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
9960 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
9961 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
9962 ins_encode %{
9963 int vlen_enc = vector_length_encoding(this);
9964 BasicType bt = Matcher::vector_element_basic_type(this);
9965 int opc = this->ideal_Opcode();
9966 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9967 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9968 %}
9969 ins_pipe( pipe_slow );
9970 %}
9971
9972
9973 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
9974 match(Set dst (RotateLeftV (Binary dst shift) mask));
9975 match(Set dst (RotateRightV (Binary dst shift) mask));
9976 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
9977 ins_encode %{
9978 int vlen_enc = vector_length_encoding(this);
9979 BasicType bt = Matcher::vector_element_basic_type(this);
9980 int opc = this->ideal_Opcode();
9981 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9982 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9983 %}
9984 ins_pipe( pipe_slow );
9985 %}
9986
9987 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
9988 match(Set dst (RotateLeftV (Binary dst src2) mask));
9989 match(Set dst (RotateRightV (Binary dst src2) mask));
9990 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
9991 ins_encode %{
9992 int vlen_enc = vector_length_encoding(this);
9993 BasicType bt = Matcher::vector_element_basic_type(this);
9994 int opc = this->ideal_Opcode();
9995 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9996 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9997 %}
9998 ins_pipe( pipe_slow );
9999 %}
10000
10001 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10002 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
10003 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
10004 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
10005 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
10006 ins_encode %{
10007 int vlen_enc = vector_length_encoding(this);
10008 BasicType bt = Matcher::vector_element_basic_type(this);
10009 int opc = this->ideal_Opcode();
10010 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10011 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10012 %}
10013 ins_pipe( pipe_slow );
10014 %}
10015
10016 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
10017 predicate(!n->as_ShiftV()->is_var_shift());
10018 match(Set dst (LShiftVS (Binary dst src2) mask));
10019 match(Set dst (LShiftVI (Binary dst src2) mask));
10020 match(Set dst (LShiftVL (Binary dst src2) mask));
10021 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
10022 ins_encode %{
10023 int vlen_enc = vector_length_encoding(this);
10024 BasicType bt = Matcher::vector_element_basic_type(this);
10025 int opc = this->ideal_Opcode();
10026 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10027 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10028 %}
10029 ins_pipe( pipe_slow );
10030 %}
10031
10032 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10033 predicate(n->as_ShiftV()->is_var_shift());
10034 match(Set dst (LShiftVS (Binary dst src2) mask));
10035 match(Set dst (LShiftVI (Binary dst src2) mask));
10036 match(Set dst (LShiftVL (Binary dst src2) mask));
10037 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
10038 ins_encode %{
10039 int vlen_enc = vector_length_encoding(this);
10040 BasicType bt = Matcher::vector_element_basic_type(this);
10041 int opc = this->ideal_Opcode();
10042 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10043 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10044 %}
10045 ins_pipe( pipe_slow );
10046 %}
10047
10048 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10049 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
10050 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
10051 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
10052 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
10053 ins_encode %{
10054 int vlen_enc = vector_length_encoding(this);
10055 BasicType bt = Matcher::vector_element_basic_type(this);
10056 int opc = this->ideal_Opcode();
10057 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10058 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10059 %}
10060 ins_pipe( pipe_slow );
10061 %}
10062
10063 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
10064 predicate(!n->as_ShiftV()->is_var_shift());
10065 match(Set dst (RShiftVS (Binary dst src2) mask));
10066 match(Set dst (RShiftVI (Binary dst src2) mask));
10067 match(Set dst (RShiftVL (Binary dst src2) mask));
10068 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
10069 ins_encode %{
10070 int vlen_enc = vector_length_encoding(this);
10071 BasicType bt = Matcher::vector_element_basic_type(this);
10072 int opc = this->ideal_Opcode();
10073 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10074 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10075 %}
10076 ins_pipe( pipe_slow );
10077 %}
10078
10079 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10080 predicate(n->as_ShiftV()->is_var_shift());
10081 match(Set dst (RShiftVS (Binary dst src2) mask));
10082 match(Set dst (RShiftVI (Binary dst src2) mask));
10083 match(Set dst (RShiftVL (Binary dst src2) mask));
10084 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
10085 ins_encode %{
10086 int vlen_enc = vector_length_encoding(this);
10087 BasicType bt = Matcher::vector_element_basic_type(this);
10088 int opc = this->ideal_Opcode();
10089 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10090 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10091 %}
10092 ins_pipe( pipe_slow );
10093 %}
10094
10095 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
10096 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
10097 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
10098 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
10099 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
10100 ins_encode %{
10101 int vlen_enc = vector_length_encoding(this);
10102 BasicType bt = Matcher::vector_element_basic_type(this);
10103 int opc = this->ideal_Opcode();
10104 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10105 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
10106 %}
10107 ins_pipe( pipe_slow );
10108 %}
10109
10110 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
10111 predicate(!n->as_ShiftV()->is_var_shift());
10112 match(Set dst (URShiftVS (Binary dst src2) mask));
10113 match(Set dst (URShiftVI (Binary dst src2) mask));
10114 match(Set dst (URShiftVL (Binary dst src2) mask));
10115 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
10116 ins_encode %{
10117 int vlen_enc = vector_length_encoding(this);
10118 BasicType bt = Matcher::vector_element_basic_type(this);
10119 int opc = this->ideal_Opcode();
10120 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10121 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
10122 %}
10123 ins_pipe( pipe_slow );
10124 %}
10125
10126 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
10127 predicate(n->as_ShiftV()->is_var_shift());
10128 match(Set dst (URShiftVS (Binary dst src2) mask));
10129 match(Set dst (URShiftVI (Binary dst src2) mask));
10130 match(Set dst (URShiftVL (Binary dst src2) mask));
10131 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
10132 ins_encode %{
10133 int vlen_enc = vector_length_encoding(this);
10134 BasicType bt = Matcher::vector_element_basic_type(this);
10135 int opc = this->ideal_Opcode();
10136 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10137 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
10138 %}
10139 ins_pipe( pipe_slow );
10140 %}
10141
10142 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
10143 match(Set dst (MaxV (Binary dst src2) mask));
10144 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10145 ins_encode %{
10146 int vlen_enc = vector_length_encoding(this);
10147 BasicType bt = Matcher::vector_element_basic_type(this);
10148 int opc = this->ideal_Opcode();
10149 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10150 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10151 %}
10152 ins_pipe( pipe_slow );
10153 %}
10154
10155 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
10156 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
10157 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
10158 ins_encode %{
10159 int vlen_enc = vector_length_encoding(this);
10160 BasicType bt = Matcher::vector_element_basic_type(this);
10161 int opc = this->ideal_Opcode();
10162 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10163 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10164 %}
10165 ins_pipe( pipe_slow );
10166 %}
10167
10168 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
10169 match(Set dst (MinV (Binary dst src2) mask));
10170 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10171 ins_encode %{
10172 int vlen_enc = vector_length_encoding(this);
10173 BasicType bt = Matcher::vector_element_basic_type(this);
10174 int opc = this->ideal_Opcode();
10175 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10176 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10177 %}
10178 ins_pipe( pipe_slow );
10179 %}
10180
10181 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
10182 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
10183 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
10184 ins_encode %{
10185 int vlen_enc = vector_length_encoding(this);
10186 BasicType bt = Matcher::vector_element_basic_type(this);
10187 int opc = this->ideal_Opcode();
10188 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10189 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
10190 %}
10191 ins_pipe( pipe_slow );
10192 %}
10193
10194 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
10195 match(Set dst (VectorRearrange (Binary dst src2) mask));
10196 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
10197 ins_encode %{
10198 int vlen_enc = vector_length_encoding(this);
10199 BasicType bt = Matcher::vector_element_basic_type(this);
10200 int opc = this->ideal_Opcode();
10201 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10202 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
10203 %}
10204 ins_pipe( pipe_slow );
10205 %}
10206
10207 instruct vabs_masked(vec dst, kReg mask) %{
10208 match(Set dst (AbsVB dst mask));
10209 match(Set dst (AbsVS dst mask));
10210 match(Set dst (AbsVI dst mask));
10211 match(Set dst (AbsVL dst mask));
10212 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
10213 ins_encode %{
10214 int vlen_enc = vector_length_encoding(this);
10215 BasicType bt = Matcher::vector_element_basic_type(this);
10216 int opc = this->ideal_Opcode();
10217 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10218 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
10219 %}
10220 ins_pipe( pipe_slow );
10221 %}
10222
10223 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
10224 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
10225 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
10226 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10227 ins_encode %{
10228 assert(UseFMA, "Needs FMA instructions support.");
10229 int vlen_enc = vector_length_encoding(this);
10230 BasicType bt = Matcher::vector_element_basic_type(this);
10231 int opc = this->ideal_Opcode();
10232 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10233 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
10234 %}
10235 ins_pipe( pipe_slow );
10236 %}
10237
10238 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
10239 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
10240 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
10241 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
10242 ins_encode %{
10243 assert(UseFMA, "Needs FMA instructions support.");
10244 int vlen_enc = vector_length_encoding(this);
10245 BasicType bt = Matcher::vector_element_basic_type(this);
10246 int opc = this->ideal_Opcode();
10247 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
10248 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
10249 %}
10250 ins_pipe( pipe_slow );
10251 %}
10252
10253 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
10254 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
10255 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
10256 ins_encode %{
10257 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
10258 int vlen_enc = vector_length_encoding(this, $src1);
10259 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
10260
10261 // Comparison i
10262 switch (src1_elem_bt) {
10263 case T_BYTE: {
10264 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10265 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10266 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10267 break;
10268 }
10269 case T_SHORT: {
10270 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10271 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10272 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10273 break;
10274 }
10275 case T_INT: {
10276 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10277 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10278 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10279 break;
10280 }
10281 case T_LONG: {
10282 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
10283 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
10284 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
10285 break;
10286 }
10287 case T_FLOAT: {
10288 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10289 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10290 break;
10291 }
10292 case T_DOUBLE: {
10293 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
10294 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
10295 break;
10296 }
10297 default: assert(false, "%s", type2name(src1_elem_bt)); break;
10298 }
10299 %}
10300 ins_pipe( pipe_slow );
10301 %}
10302
10303 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
10304 predicate(Matcher::vector_length(n) <= 32);
10305 match(Set dst (MaskAll src));
10306 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
10307 ins_encode %{
10308 int mask_len = Matcher::vector_length(this);
10309 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
10310 %}
10311 ins_pipe( pipe_slow );
10312 %}
10313
10314 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
10315 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
10316 match(Set dst (XorVMask src (MaskAll cnt)));
10317 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
10318 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
10319 ins_encode %{
10320 uint masklen = Matcher::vector_length(this);
10321 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
10322 %}
10323 ins_pipe( pipe_slow );
10324 %}
10325
10326 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
10327 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
10328 (Matcher::vector_length(n) == 16) ||
10329 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
10330 match(Set dst (XorVMask src (MaskAll cnt)));
10331 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
10332 ins_encode %{
10333 uint masklen = Matcher::vector_length(this);
10334 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
10335 %}
10336 ins_pipe( pipe_slow );
10337 %}
10338
10339 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
10340 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
10341 match(Set dst (VectorLongToMask src));
10342 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
10343 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
10344 ins_encode %{
10345 int mask_len = Matcher::vector_length(this);
10346 int vec_enc = vector_length_encoding(mask_len);
10347 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10348 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
10349 %}
10350 ins_pipe( pipe_slow );
10351 %}
10352
10353
10354 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
10355 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
10356 match(Set dst (VectorLongToMask src));
10357 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
10358 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
10359 ins_encode %{
10360 int mask_len = Matcher::vector_length(this);
10361 assert(mask_len <= 32, "invalid mask length");
10362 int vec_enc = vector_length_encoding(mask_len);
10363 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10364 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
10365 %}
10366 ins_pipe( pipe_slow );
10367 %}
10368
10369 instruct long_to_mask_evex(kReg dst, rRegL src) %{
10370 predicate(n->bottom_type()->isa_vectmask());
10371 match(Set dst (VectorLongToMask src));
10372 format %{ "long_to_mask_evex $dst, $src\t!" %}
10373 ins_encode %{
10374 __ kmov($dst$$KRegister, $src$$Register);
10375 %}
10376 ins_pipe( pipe_slow );
10377 %}
10378
10379 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
10380 match(Set dst (AndVMask src1 src2));
10381 match(Set dst (OrVMask src1 src2));
10382 match(Set dst (XorVMask src1 src2));
10383 effect(TEMP kscratch);
10384 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
10385 ins_encode %{
10386 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
10387 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
10388 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
10389 uint masklen = Matcher::vector_length(this);
10390 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
10391 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
10392 %}
10393 ins_pipe( pipe_slow );
10394 %}
10395
10396 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
10397 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10398 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10399 ins_encode %{
10400 int vlen_enc = vector_length_encoding(this);
10401 BasicType bt = Matcher::vector_element_basic_type(this);
10402 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10403 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
10404 %}
10405 ins_pipe( pipe_slow );
10406 %}
10407
10408 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
10409 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
10410 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
10411 ins_encode %{
10412 int vlen_enc = vector_length_encoding(this);
10413 BasicType bt = Matcher::vector_element_basic_type(this);
10414 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
10415 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
10416 %}
10417 ins_pipe( pipe_slow );
10418 %}
10419
10420 instruct castMM(kReg dst)
10421 %{
10422 match(Set dst (CastVV dst));
10423
10424 size(0);
10425 format %{ "# castVV of $dst" %}
10426 ins_encode(/* empty encoding */);
10427 ins_cost(0);
10428 ins_pipe(empty);
10429 %}
10430
10431 instruct castVV(vec dst)
10432 %{
10433 match(Set dst (CastVV dst));
10434
10435 size(0);
10436 format %{ "# castVV of $dst" %}
10437 ins_encode(/* empty encoding */);
10438 ins_cost(0);
10439 ins_pipe(empty);
10440 %}
10441
10442 instruct castVVLeg(legVec dst)
10443 %{
10444 match(Set dst (CastVV dst));
10445
10446 size(0);
10447 format %{ "# castVV of $dst" %}
10448 ins_encode(/* empty encoding */);
10449 ins_cost(0);
10450 ins_pipe(empty);
10451 %}
10452
10453 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
10454 %{
10455 match(Set dst (IsInfiniteF src));
10456 effect(TEMP ktmp, KILL cr);
10457 format %{ "float_class_check $dst, $src" %}
10458 ins_encode %{
10459 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10460 __ kmovbl($dst$$Register, $ktmp$$KRegister);
10461 %}
10462 ins_pipe(pipe_slow);
10463 %}
10464
10465 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
10466 %{
10467 match(Set dst (IsInfiniteD src));
10468 effect(TEMP ktmp, KILL cr);
10469 format %{ "double_class_check $dst, $src" %}
10470 ins_encode %{
10471 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
10472 __ kmovbl($dst$$Register, $ktmp$$KRegister);
10473 %}
10474 ins_pipe(pipe_slow);
10475 %}
10476
10477 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
10478 %{
10479 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10480 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10481 match(Set dst (SaturatingAddV src1 src2));
10482 match(Set dst (SaturatingSubV src1 src2));
10483 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
10484 ins_encode %{
10485 int vlen_enc = vector_length_encoding(this);
10486 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10487 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10488 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
10489 %}
10490 ins_pipe(pipe_slow);
10491 %}
10492
10493 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
10494 %{
10495 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10496 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10497 match(Set dst (SaturatingAddV src1 src2));
10498 match(Set dst (SaturatingSubV src1 src2));
10499 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
10500 ins_encode %{
10501 int vlen_enc = vector_length_encoding(this);
10502 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10503 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10504 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
10505 %}
10506 ins_pipe(pipe_slow);
10507 %}
10508
10509 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
10510 %{
10511 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10512 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
10513 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10514 match(Set dst (SaturatingAddV src1 src2));
10515 match(Set dst (SaturatingSubV src1 src2));
10516 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
10517 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
10518 ins_encode %{
10519 int vlen_enc = vector_length_encoding(this);
10520 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10521 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10522 $src1$$XMMRegister, $src2$$XMMRegister,
10523 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
10524 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
10525 %}
10526 ins_pipe(pipe_slow);
10527 %}
10528
10529 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
10530 %{
10531 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10532 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
10533 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10534 match(Set dst (SaturatingAddV src1 src2));
10535 match(Set dst (SaturatingSubV src1 src2));
10536 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
10537 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
10538 ins_encode %{
10539 int vlen_enc = vector_length_encoding(this);
10540 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10541 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
10542 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
10543 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
10544 %}
10545 ins_pipe(pipe_slow);
10546 %}
10547
10548 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
10549 %{
10550 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10551 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10552 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10553 match(Set dst (SaturatingAddV src1 src2));
10554 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
10555 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
10556 ins_encode %{
10557 int vlen_enc = vector_length_encoding(this);
10558 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10559 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10560 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
10561 %}
10562 ins_pipe(pipe_slow);
10563 %}
10564
10565 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
10566 %{
10567 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10568 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10569 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10570 match(Set dst (SaturatingAddV src1 src2));
10571 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
10572 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
10573 ins_encode %{
10574 int vlen_enc = vector_length_encoding(this);
10575 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10576 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10577 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
10578 %}
10579 ins_pipe(pipe_slow);
10580 %}
10581
10582 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
10583 %{
10584 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10585 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10586 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
10587 match(Set dst (SaturatingSubV src1 src2));
10588 effect(TEMP ktmp);
10589 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
10590 ins_encode %{
10591 int vlen_enc = vector_length_encoding(this);
10592 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10593 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
10594 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
10595 %}
10596 ins_pipe(pipe_slow);
10597 %}
10598
10599 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
10600 %{
10601 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
10602 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
10603 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
10604 match(Set dst (SaturatingSubV src1 src2));
10605 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
10606 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
10607 ins_encode %{
10608 int vlen_enc = vector_length_encoding(this);
10609 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10610 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
10611 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
10612 %}
10613 ins_pipe(pipe_slow);
10614 %}
10615
10616 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
10617 %{
10618 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10619 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10620 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
10621 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
10622 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
10623 ins_encode %{
10624 int vlen_enc = vector_length_encoding(this);
10625 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10626 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10627 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
10628 %}
10629 ins_pipe(pipe_slow);
10630 %}
10631
10632 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
10633 %{
10634 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10635 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10636 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
10637 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
10638 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
10639 ins_encode %{
10640 int vlen_enc = vector_length_encoding(this);
10641 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10642 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
10643 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
10644 %}
10645 ins_pipe(pipe_slow);
10646 %}
10647
10648 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
10649 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10650 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10651 match(Set dst (SaturatingAddV (Binary dst src) mask));
10652 match(Set dst (SaturatingSubV (Binary dst src) mask));
10653 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
10654 ins_encode %{
10655 int vlen_enc = vector_length_encoding(this);
10656 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10657 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10658 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
10659 %}
10660 ins_pipe( pipe_slow );
10661 %}
10662
10663 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
10664 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10665 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10666 match(Set dst (SaturatingAddV (Binary dst src) mask));
10667 match(Set dst (SaturatingSubV (Binary dst src) mask));
10668 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
10669 ins_encode %{
10670 int vlen_enc = vector_length_encoding(this);
10671 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10672 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10673 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
10674 %}
10675 ins_pipe( pipe_slow );
10676 %}
10677
10678 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
10679 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10680 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
10681 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
10682 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
10683 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
10684 ins_encode %{
10685 int vlen_enc = vector_length_encoding(this);
10686 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10687 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10688 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
10689 %}
10690 ins_pipe( pipe_slow );
10691 %}
10692
10693 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
10694 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
10695 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
10696 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
10697 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
10698 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
10699 ins_encode %{
10700 int vlen_enc = vector_length_encoding(this);
10701 BasicType elem_bt = Matcher::vector_element_basic_type(this);
10702 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
10703 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
10704 %}
10705 ins_pipe( pipe_slow );
10706 %}
10707
10708 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
10709 %{
10710 match(Set index (SelectFromTwoVector (Binary index src1) src2));
10711 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
10712 ins_encode %{
10713 int vlen_enc = vector_length_encoding(this);
10714 BasicType bt = Matcher::vector_element_basic_type(this);
10715 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
10716 %}
10717 ins_pipe(pipe_slow);
10718 %}
10719
10720 instruct reinterpretS2HF(regF dst, rRegI src)
10721 %{
10722 match(Set dst (ReinterpretS2HF src));
10723 format %{ "vmovw $dst, $src" %}
10724 ins_encode %{
10725 __ vmovw($dst$$XMMRegister, $src$$Register);
10726 %}
10727 ins_pipe(pipe_slow);
10728 %}
10729
10730 instruct reinterpretHF2S(rRegI dst, regF src)
10731 %{
10732 match(Set dst (ReinterpretHF2S src));
10733 format %{ "vmovw $dst, $src" %}
10734 ins_encode %{
10735 __ vmovw($dst$$Register, $src$$XMMRegister);
10736 %}
10737 ins_pipe(pipe_slow);
10738 %}
10739
10740 instruct convF2HFAndS2HF(regF dst, regF src)
10741 %{
10742 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
10743 format %{ "convF2HFAndS2HF $dst, $src" %}
10744 ins_encode %{
10745 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
10746 %}
10747 ins_pipe(pipe_slow);
10748 %}
10749
10750 instruct convHF2SAndHF2F(regF dst, regF src)
10751 %{
10752 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
10753 format %{ "convHF2SAndHF2F $dst, $src" %}
10754 ins_encode %{
10755 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
10756 %}
10757 ins_pipe(pipe_slow);
10758 %}
10759
10760 instruct scalar_sqrt_HF_reg(regF dst, regF src)
10761 %{
10762 match(Set dst (SqrtHF src));
10763 format %{ "scalar_sqrt_fp16 $dst, $src" %}
10764 ins_encode %{
10765 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
10766 %}
10767 ins_pipe(pipe_slow);
10768 %}
10769
10770 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
10771 %{
10772 match(Set dst (AddHF src1 src2));
10773 match(Set dst (DivHF src1 src2));
10774 match(Set dst (MulHF src1 src2));
10775 match(Set dst (SubHF src1 src2));
10776 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
10777 ins_encode %{
10778 int opcode = this->ideal_Opcode();
10779 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
10780 %}
10781 ins_pipe(pipe_slow);
10782 %}
10783
10784 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
10785 %{
10786 predicate(VM_Version::supports_avx10_2());
10787 match(Set dst (MaxHF src1 src2));
10788 match(Set dst (MinHF src1 src2));
10789 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
10790 ins_encode %{
10791 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10792 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
10793 %}
10794 ins_pipe( pipe_slow );
10795 %}
10796
10797 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
10798 %{
10799 predicate(!VM_Version::supports_avx10_2());
10800 match(Set dst (MaxHF src1 src2));
10801 match(Set dst (MinHF src1 src2));
10802 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
10803 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
10804 ins_encode %{
10805 int opcode = this->ideal_Opcode();
10806 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
10807 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
10808 %}
10809 ins_pipe( pipe_slow );
10810 %}
10811
10812 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
10813 %{
10814 match(Set dst (FmaHF src2 (Binary dst src1)));
10815 effect(DEF dst);
10816 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10817 ins_encode %{
10818 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
10819 %}
10820 ins_pipe( pipe_slow );
10821 %}
10822
10823
10824 instruct vector_sqrt_HF_reg(vec dst, vec src)
10825 %{
10826 match(Set dst (SqrtVHF src));
10827 format %{ "vector_sqrt_fp16 $dst, $src" %}
10828 ins_encode %{
10829 int vlen_enc = vector_length_encoding(this);
10830 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
10831 %}
10832 ins_pipe(pipe_slow);
10833 %}
10834
10835 instruct vector_sqrt_HF_mem(vec dst, memory src)
10836 %{
10837 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
10838 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
10839 ins_encode %{
10840 int vlen_enc = vector_length_encoding(this);
10841 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
10842 %}
10843 ins_pipe(pipe_slow);
10844 %}
10845
10846 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
10847 %{
10848 match(Set dst (AddVHF src1 src2));
10849 match(Set dst (DivVHF src1 src2));
10850 match(Set dst (MulVHF src1 src2));
10851 match(Set dst (SubVHF src1 src2));
10852 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
10853 ins_encode %{
10854 int vlen_enc = vector_length_encoding(this);
10855 int opcode = this->ideal_Opcode();
10856 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
10857 %}
10858 ins_pipe(pipe_slow);
10859 %}
10860
10861
10862 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
10863 %{
10864 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
10865 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
10866 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
10867 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
10868 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
10869 ins_encode %{
10870 int vlen_enc = vector_length_encoding(this);
10871 int opcode = this->ideal_Opcode();
10872 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
10873 %}
10874 ins_pipe(pipe_slow);
10875 %}
10876
10877 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
10878 %{
10879 match(Set dst (FmaVHF src2 (Binary dst src1)));
10880 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10881 ins_encode %{
10882 int vlen_enc = vector_length_encoding(this);
10883 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
10884 %}
10885 ins_pipe( pipe_slow );
10886 %}
10887
10888 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
10889 %{
10890 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
10891 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
10892 ins_encode %{
10893 int vlen_enc = vector_length_encoding(this);
10894 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
10895 %}
10896 ins_pipe( pipe_slow );
10897 %}
10898
10899 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
10900 %{
10901 predicate(VM_Version::supports_avx10_2());
10902 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
10903 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
10904 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
10905 ins_encode %{
10906 int vlen_enc = vector_length_encoding(this);
10907 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10908 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
10909 %}
10910 ins_pipe( pipe_slow );
10911 %}
10912
10913 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
10914 %{
10915 predicate(VM_Version::supports_avx10_2());
10916 match(Set dst (MinVHF src1 src2));
10917 match(Set dst (MaxVHF src1 src2));
10918 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
10919 ins_encode %{
10920 int vlen_enc = vector_length_encoding(this);
10921 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
10922 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
10923 %}
10924 ins_pipe( pipe_slow );
10925 %}
10926
10927 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
10928 %{
10929 predicate(!VM_Version::supports_avx10_2());
10930 match(Set dst (MinVHF src1 src2));
10931 match(Set dst (MaxVHF src1 src2));
10932 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
10933 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
10934 ins_encode %{
10935 int vlen_enc = vector_length_encoding(this);
10936 int opcode = this->ideal_Opcode();
10937 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
10938 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
10939 %}
10940 ins_pipe( pipe_slow );
10941 %}