1 /*
2 * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
27
28 // C2_MacroAssembler contains high-level macros for C2
29
30 public:
31 // C2 compiled method's prolog code.
32 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
33
34 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
35
36 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
37 // See full description in c2_MacroAssembler_x86.cpp.
38 void fast_lock(Register obj, Register box, Register rax_reg,
39 Register t, Register thread);
40 void fast_unlock(Register obj, Register reg_rax, Register t, Register thread);
41
42 void verify_int_in_range(uint idx, const TypeInt* t, Register val);
43 void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp);
44
45 // Generic instructions support for use in .ad files C2 code generation
46 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
47 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
48 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
49 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
50
51 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
52 XMMRegister tmp = xnoreg);
53 void vpminmax(int opcode, BasicType elem_bt,
54 XMMRegister dst, XMMRegister src1, XMMRegister src2,
55 int vlen_enc);
56
57 void vpuminmax(int opcode, BasicType elem_bt,
58 XMMRegister dst, XMMRegister src1, XMMRegister src2,
59 int vlen_enc);
60
61 void vpuminmax(int opcode, BasicType elem_bt,
62 XMMRegister dst, XMMRegister src1, Address src2,
63 int vlen_enc);
64
65 void vminmax_fp(int opcode, BasicType elem_bt,
66 XMMRegister dst, XMMRegister a, XMMRegister b,
67 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
68 int vlen_enc);
69
70 void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
71 XMMRegister src1, XMMRegister src2, int vlen_enc);
72
73 void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
74 XMMRegister src1, XMMRegister src2);
75
76 void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
77
78 void evminmax_fp(int opcode, BasicType elem_bt,
79 XMMRegister dst, XMMRegister a, XMMRegister b,
80 KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
81 int vlen_enc);
82
83 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
84
85 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
86 bool merge, BasicType bt, int vec_enc);
87
88 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
89
90 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
91 void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
92 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
93 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
94
95 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
96 void vshiftd_imm(int opcode, XMMRegister dst, int shift);
97 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
98 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
99 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
100 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
101 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
102 void vshiftq_imm(int opcode, XMMRegister dst, int shift);
103 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
104 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
105
106 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
107 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
108
109 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
110 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
111 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
112 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
113 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
114
115 void insert(BasicType typ, XMMRegister dst, Register val, int idx);
116 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
117 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
118 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
119 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
120
121 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
122 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
123 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len);
124
125 // extract
126 void extract(BasicType typ, Register dst, XMMRegister src, int idx);
127 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
128 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
129 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
130 void movsxl(BasicType typ, Register dst);
131
132 // vector test
133 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
134
135 // Covert B2X
136 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
137 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
138
139 // blend
140 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
141 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
142 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
143
144 void load_vector(BasicType bt, XMMRegister dst, Address src, int vlen_in_bytes);
145 void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
146
147 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
148 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
149
150 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
151 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
152
153 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
154
155 // dst = src1 reduce(op, src2) using vtmp as temps
156 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
157 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
158 void genmask(KRegister dst, Register len, Register temp);
159
160 // dst = reduce(op, src2) using vtmp as temps
161 void reduce_fp(int opcode, int vlen,
162 XMMRegister dst, XMMRegister src,
163 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
164 void unordered_reduce_fp(int opcode, int vlen,
165 XMMRegister dst, XMMRegister src,
166 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
167 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
168 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
169 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
170 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
171 XMMRegister dst, XMMRegister src,
172 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
173 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
174 XMMRegister dst, XMMRegister src,
175 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
176 private:
177 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
178 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
179 void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
180 void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
181
182 // Int Reduction
183 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
184 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
185 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
186 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
187
188 // Byte Reduction
189 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
190 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
191 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
192 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
197
198 // Short Reduction
199 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
200 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
201 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
202 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203
204 // Long Reduction
205 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
206 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
207 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
208
209 // Float Reduction
210 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
211 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
212 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
213 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
214
215 // Unordered Float Reduction
216 void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src);
217 void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
218 void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
219 void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
220
221 // Double Reduction
222 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
223 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
224 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
225
226 // Unordered Double Reduction
227 void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src);
228 void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
229 void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
230
231 // Base reduction instruction
232 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
233 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
234 void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
235 void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
236
237 public:
238 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
239
240 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
241
242 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
243 Register tmp, int masklen, BasicType bt, int vec_enc);
244 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
245 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
246
247 void vector_maskall_operation(KRegister dst, Register src, int mask_len);
248
249 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
250 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
251
252 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
253 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
254
255 // IndexOf strings.
256 // Small strings are loaded through stack if they cross page boundary.
257 void string_indexof(Register str1, Register str2,
258 Register cnt1, Register cnt2,
259 int int_cnt2, Register result,
260 XMMRegister vec, Register tmp,
261 int ae);
262
263 // IndexOf for constant substrings with size >= 8 elements
264 // which don't need to be loaded through stack.
265 void string_indexofC8(Register str1, Register str2,
266 Register cnt1, Register cnt2,
267 int int_cnt2, Register result,
268 XMMRegister vec, Register tmp,
269 int ae);
270
271 // Smallest code: we don't need to load through stack,
272 // check string tail.
273
274 // helper function for string_compare
275 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
276 Address::ScaleFactor scale, Address::ScaleFactor scale1,
277 Address::ScaleFactor scale2, Register index, int ae);
278 // Compare strings.
279 void string_compare(Register str1, Register str2,
280 Register cnt1, Register cnt2, Register result,
281 XMMRegister vec1, int ae, KRegister mask = knoreg);
282
283 // Search for Non-ASCII character (Negative byte value) in a byte array,
284 // return index of the first such character, otherwise len.
285 void count_positives(Register ary1, Register len,
286 Register result, Register tmp1,
287 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
288
289 // Compare char[] or byte[] arrays.
290 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
291 Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
292 bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);
293
294 void arrays_hashcode(Register str1, Register cnt1, Register result,
295 Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
296 XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
297 XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
298 XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
299 BasicType eltype);
300
301 // helper functions for arrays_hashcode
302 int arrays_hashcode_elsize(BasicType eltype);
303 void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
304 void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
305 void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
306 void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
307
308 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
309
310 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
311 XMMRegister dst, XMMRegister src1, XMMRegister src2,
312 bool merge, int vlen_enc, bool is_varshift = false);
313
314 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
315 XMMRegister dst, XMMRegister src1, Address src2,
316 bool merge, int vlen_enc);
317
318 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
319 XMMRegister src1, int imm8, bool merge, int vlen_enc);
320
321 void masked_op(int ideal_opc, int mask_len, KRegister dst,
322 KRegister src1, KRegister src2);
323
324 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
325 BasicType from_elem_bt, BasicType to_elem_bt);
326
327 void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
328 BasicType from_elem_bt, BasicType to_elem_bt);
329
330 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
331 XMMRegister xtmp, Register rscratch, int vec_enc);
332
333 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
334 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
335 AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
336
337 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
338 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
339 Register rscratch, int vec_enc);
340
341 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
342 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
343 Register rscratch, int vec_enc);
344
345 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
346 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
347 Register rscratch, int vec_enc);
348
349 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
350 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
351 AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
352
353 void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
354
355 void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
356
357 void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
358
359 void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
360
361 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
362 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
363 AddressLiteral float_sign_flip, int vec_enc);
364
365 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
366 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
367 int vec_enc);
368
369 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
370 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
371 int vec_enc);
372
373 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
374 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
375 int vec_enc);
376
377 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
378 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
379 int vec_enc);
380
381 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
382 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
383 int vec_enc);
384
385 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
386 XMMRegister xtmp, int index, int vec_enc);
387
388 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
389
390 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
391 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
392
393 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
394 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
395
396 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
397 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
398
399 void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
400 Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
401 BasicType bt, int vec_enc);
402
403 void udivI(Register rax, Register divisor, Register rdx);
404 void umodI(Register rax, Register divisor, Register rdx);
405 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
406
407 void reverseI(Register dst, Register src, XMMRegister xtmp1,
408 XMMRegister xtmp2, Register rtmp);
409 void reverseL(Register dst, Register src, XMMRegister xtmp1,
410 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
411 void udivL(Register rax, Register divisor, Register rdx);
412 void umodL(Register rax, Register divisor, Register rdx);
413 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
414
415 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
416 bool merge, BasicType bt, int vlen_enc);
417
418 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
419 bool merge, BasicType bt, int vlen_enc);
420
421 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
422 XMMRegister xtmp2, Register rtmp, int vec_enc);
423
424 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
425 XMMRegister xtmp, Register rscratch = noreg);
426
427 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
428
429 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
430 XMMRegister xtmp2, Register rtmp, int vec_enc);
431
432 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
433 XMMRegister xtmp2, Register rtmp, int vec_enc);
434
435 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
436 XMMRegister xtmp2, Register rtmp, int vec_enc);
437
438 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
439 XMMRegister xtmp2, Register rtmp, int vec_enc);
440
441 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
442 XMMRegister xtmp2, Register rtmp, int vec_enc);
443
444 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
445 KRegister mask, bool merge, int vec_enc);
446
447 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
448
449 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
450 XMMRegister xtmp2, Register rtmp, int vec_enc);
451
452 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
453 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
454 KRegister ktmp, Register rtmp, bool merge, int vec_enc);
455
456 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
457 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
458
459 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
460 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
461
462 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
463 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
464
465 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
466 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
467
468 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
469 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
470
471 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
472
473 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
474
475 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
476 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
477 Register rtmp, int vec_enc);
478
479 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
480 XMMRegister xtmp1, Register rtmp, int vec_enc);
481
482 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
483 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
484
485 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
486 XMMRegister xtmp1, int vec_enc);
487
488 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
489 KRegister ktmp1, int vec_enc);
490
491 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
492
493 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
494
495 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
496 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
497
498 void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
499 XMMRegister src, int vlen_enc);
500
501 void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
502
503 void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register mask,
504 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
505 Register midx, Register length, int vector_len, int vlen_enc);
506
507 void vgather8b_masked(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
508 Register mask, Register midx, Register rtmp, int vlen_enc);
509 void vgather8b(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
510 Register rtmp, int vlen_enc);
511
512 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc);
513
514 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc);
515
516 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
517
518 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
519
520 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
521
522 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
523
524 void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc);
525
526 void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
527 XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
528
529 void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
530 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc);
531
532 void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
533 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc);
534
535 void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
536 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc);
537
538 void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
539 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc);
540
541 void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
542
543 void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
544
545 void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc);
546
547 void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
548
549 void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
550
551 void evpcmpu(BasicType etype, KRegister kmask, XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc);
552
553 void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
554
555 void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
556 int vlen_enc, bool xtmp2_hold_M1 = false);
557
558 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
559 bool is_unsigned, bool merge, int vlen_enc);
560
561 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
562 bool is_unsigned, bool merge, int vlen_enc);
563
564 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
565 bool merge, int vlen_enc);
566
567 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
568 bool merge, int vlen_enc);
569
570 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
571 XMMRegister src2, bool merge, int vlen_enc);
572
573 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
574 Address src2, bool merge, int vlen_enc);
575
576 void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
577
578 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
579
580 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
581
582 void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
583 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
584
585 void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
586 KRegister ktmp, int vlen_enc);
587
588 void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
589 KRegister ktmp, int vlen_enc);
590
591 void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
592 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
593
594 void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
595 KRegister ktmp);
596
597 void reconstruct_frame_pointer(Register rtmp);
598
599 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP