1 /*
2 * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
27
28 // C2_MacroAssembler contains high-level macros for C2
29
30 public:
31 // C2 compiled method's prolog code.
32 void verified_entry(Compile* C, int sp_inc = 0);
33
34 void entry_barrier();
35 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
36
37 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
38 // See full description in c2_MacroAssembler_x86.cpp.
39 void fast_lock(Register obj, Register box, Register rax_reg,
40 Register t, Register thread);
41 void fast_unlock(Register obj, Register reg_rax, Register t, Register thread);
42
43 void verify_int_in_range(uint idx, const TypeInt* t, Register val);
44 void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp);
45
46 // Generic instructions support for use in .ad files C2 code generation
47 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
48 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
49 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
50 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
51
52 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
53 XMMRegister tmp = xnoreg);
54 void vpminmax(int opcode, BasicType elem_bt,
55 XMMRegister dst, XMMRegister src1, XMMRegister src2,
56 int vlen_enc);
57
58 void vpuminmax(int opcode, BasicType elem_bt,
59 XMMRegister dst, XMMRegister src1, XMMRegister src2,
60 int vlen_enc);
61
62 void vpuminmax(int opcode, BasicType elem_bt,
63 XMMRegister dst, XMMRegister src1, Address src2,
64 int vlen_enc);
65
66 void vminmax_fp(int opcode, BasicType elem_bt,
67 XMMRegister dst, XMMRegister a, XMMRegister b,
68 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
69 int vlen_enc);
70
71 void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
72 XMMRegister src1, XMMRegister src2, int vlen_enc);
73
74 void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
75 XMMRegister src1, XMMRegister src2);
76
77 void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
78
79 void evminmax_fp(int opcode, BasicType elem_bt,
80 XMMRegister dst, XMMRegister a, XMMRegister b,
81 KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
82 int vlen_enc);
83
84 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
85
86 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
87 bool merge, BasicType bt, int vec_enc);
88
89 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
90
91 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
92 void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
93 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
94 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
95
96 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
97 void vshiftd_imm(int opcode, XMMRegister dst, int shift);
98 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
99 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
100 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
101 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
102 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
103 void vshiftq_imm(int opcode, XMMRegister dst, int shift);
104 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
105 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
106
107 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
108 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
109
110 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
111 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
112 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
113 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
114 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
115
116 void insert(BasicType typ, XMMRegister dst, Register val, int idx);
117 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
118 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
119 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
120 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
121
122 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
123 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
124 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len);
125
126 // extract
127 void extract(BasicType typ, Register dst, XMMRegister src, int idx);
128 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
129 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
130 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
131 void movsxl(BasicType typ, Register dst);
132
133 // vector test
134 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
135
136 // Covert B2X
137 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
138 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
139
140 // blend
141 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
142 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
143 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
144
145 void load_vector(BasicType bt, XMMRegister dst, Address src, int vlen_in_bytes);
146 void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
147
148 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
149 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
150
151 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
152 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
153
154 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
155
156 // dst = src1 reduce(op, src2) using vtmp as temps
157 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
158 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
159 void genmask(KRegister dst, Register len, Register temp);
160
161 // dst = reduce(op, src2) using vtmp as temps
162 void reduce_fp(int opcode, int vlen,
163 XMMRegister dst, XMMRegister src,
164 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
165 void unordered_reduce_fp(int opcode, int vlen,
166 XMMRegister dst, XMMRegister src,
167 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
168 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
169 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
170 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
171 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
172 XMMRegister dst, XMMRegister src,
173 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
174 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
175 XMMRegister dst, XMMRegister src,
176 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
177 private:
178 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
179 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
180 void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
181 void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
182
183 // Int Reduction
184 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
185 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
186 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
187 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
188
189 // Byte Reduction
190 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
191 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
192 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
197 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
198
199 // Short Reduction
200 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
201 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
202 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
204
205 // Long Reduction
206 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
207 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
208 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
209
210 // Float Reduction
211 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
212 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
213 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
214 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
215
216 // Unordered Float Reduction
217 void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src);
218 void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
219 void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
220 void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
221
222 // Double Reduction
223 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
224 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
225 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
226
227 // Unordered Double Reduction
228 void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src);
229 void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
230 void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
231
232 // Base reduction instruction
233 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
234 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
235 void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
236 void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
237
238 public:
239 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
240
241 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
242
243 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
244 Register tmp, int masklen, BasicType bt, int vec_enc);
245 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
246 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
247
248 void vector_maskall_operation(KRegister dst, Register src, int mask_len);
249
250 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
251 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
252
253 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
254 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
255
256 // IndexOf strings.
257 // Small strings are loaded through stack if they cross page boundary.
258 void string_indexof(Register str1, Register str2,
259 Register cnt1, Register cnt2,
260 int int_cnt2, Register result,
261 XMMRegister vec, Register tmp,
262 int ae);
263
264 // IndexOf for constant substrings with size >= 8 elements
265 // which don't need to be loaded through stack.
266 void string_indexofC8(Register str1, Register str2,
267 Register cnt1, Register cnt2,
268 int int_cnt2, Register result,
269 XMMRegister vec, Register tmp,
270 int ae);
271
272 // Smallest code: we don't need to load through stack,
273 // check string tail.
274
275 // helper function for string_compare
276 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
277 Address::ScaleFactor scale, Address::ScaleFactor scale1,
278 Address::ScaleFactor scale2, Register index, int ae);
279 // Compare strings.
280 void string_compare(Register str1, Register str2,
281 Register cnt1, Register cnt2, Register result,
282 XMMRegister vec1, int ae, KRegister mask = knoreg);
283
284 // Search for Non-ASCII character (Negative byte value) in a byte array,
285 // return index of the first such character, otherwise len.
286 void count_positives(Register ary1, Register len,
287 Register result, Register tmp1,
288 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
289
290 // Compare char[] or byte[] arrays.
291 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
292 Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
293 bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);
294
295 void arrays_hashcode(Register str1, Register cnt1, Register result,
296 Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
297 XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
298 XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
299 XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
300 BasicType eltype);
301
302 // helper functions for arrays_hashcode
303 int arrays_hashcode_elsize(BasicType eltype);
304 void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
305 void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
306 void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
307 void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
308
309 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
310
311 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
312 XMMRegister dst, XMMRegister src1, XMMRegister src2,
313 bool merge, int vlen_enc, bool is_varshift = false);
314
315 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
316 XMMRegister dst, XMMRegister src1, Address src2,
317 bool merge, int vlen_enc);
318
319 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
320 XMMRegister src1, int imm8, bool merge, int vlen_enc);
321
322 void masked_op(int ideal_opc, int mask_len, KRegister dst,
323 KRegister src1, KRegister src2);
324
325 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
326 BasicType from_elem_bt, BasicType to_elem_bt);
327
328 void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
329 BasicType from_elem_bt, BasicType to_elem_bt);
330
331 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
332 XMMRegister xtmp, Register rscratch, int vec_enc);
333
334 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
335 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
336 AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
337
338 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
339 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
340 Register rscratch, int vec_enc);
341
342 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
343 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
344 Register rscratch, int vec_enc);
345
346 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
347 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
348 Register rscratch, int vec_enc);
349
350 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
351 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
352 AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
353
354 void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
355
356 void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
357
358 void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
359
360 void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
361
362 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
363 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
364 AddressLiteral float_sign_flip, int vec_enc);
365
366 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
367 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
368 int vec_enc);
369
370 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
371 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
372 int vec_enc);
373
374 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
375 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
376 int vec_enc);
377
378 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
379 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
380 int vec_enc);
381
382 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
383 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
384 int vec_enc);
385
386 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
387 XMMRegister xtmp, int index, int vec_enc);
388
389 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
390
391 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
392 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
393
394 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
395 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
396
397 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
398 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
399
400 void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
401 Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
402 BasicType bt, int vec_enc);
403
404 void udivI(Register rax, Register divisor, Register rdx);
405 void umodI(Register rax, Register divisor, Register rdx);
406 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
407
408 void reverseI(Register dst, Register src, XMMRegister xtmp1,
409 XMMRegister xtmp2, Register rtmp);
410 void reverseL(Register dst, Register src, XMMRegister xtmp1,
411 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
412 void udivL(Register rax, Register divisor, Register rdx);
413 void umodL(Register rax, Register divisor, Register rdx);
414 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
415
416 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
417 bool merge, BasicType bt, int vlen_enc);
418
419 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
420 bool merge, BasicType bt, int vlen_enc);
421
422 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
423 XMMRegister xtmp2, Register rtmp, int vec_enc);
424
425 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
426 XMMRegister xtmp, Register rscratch = noreg);
427
428 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
429
430 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
431 XMMRegister xtmp2, Register rtmp, int vec_enc);
432
433 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
434 XMMRegister xtmp2, Register rtmp, int vec_enc);
435
436 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
437 XMMRegister xtmp2, Register rtmp, int vec_enc);
438
439 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
440 XMMRegister xtmp2, Register rtmp, int vec_enc);
441
442 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
443 XMMRegister xtmp2, Register rtmp, int vec_enc);
444
445 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
446 KRegister mask, bool merge, int vec_enc);
447
448 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
449
450 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
451 XMMRegister xtmp2, Register rtmp, int vec_enc);
452
453 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
454 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
455 KRegister ktmp, Register rtmp, bool merge, int vec_enc);
456
457 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
458 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
459
460 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
461 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
462
463 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
464 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
465
466 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
467 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
468
469 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
470 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
471
472 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
473
474 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
475
476 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
477 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
478 Register rtmp, int vec_enc);
479
480 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
481 XMMRegister xtmp1, Register rtmp, int vec_enc);
482
483 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
484 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
485
486 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
487 XMMRegister xtmp1, int vec_enc);
488
489 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
490 KRegister ktmp1, int vec_enc);
491
492 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
493
494 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
495
496 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
497 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
498
499 void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
500 XMMRegister src, int vlen_enc);
501
502 void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
503
504 void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register mask,
505 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
506 Register midx, Register length, int vector_len, int vlen_enc);
507
508 void vgather8b_masked(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
509 Register mask, Register midx, Register rtmp, int vlen_enc);
510 void vgather8b(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
511 Register rtmp, int vlen_enc);
512
513 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc);
514
515 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc);
516
517 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
518
519 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
520
521 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
522
523 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
524
525 void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc);
526
527 void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
528 XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
529
530 void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
531 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc);
532
533 void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
534 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc);
535
536 void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
537 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc);
538
539 void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
540 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc);
541
542 void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
543
544 void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
545
546 void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc);
547
548 void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
549
550 void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
551
552 void evpcmpu(BasicType etype, KRegister kmask, XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc);
553
554 void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
555
556 void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
557 int vlen_enc, bool xtmp2_hold_M1 = false);
558
559 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
560 bool is_unsigned, bool merge, int vlen_enc);
561
562 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
563 bool is_unsigned, bool merge, int vlen_enc);
564
565 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
566 bool merge, int vlen_enc);
567
568 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
569 bool merge, int vlen_enc);
570
571 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
572 XMMRegister src2, bool merge, int vlen_enc);
573
574 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
575 Address src2, bool merge, int vlen_enc);
576
577 void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
578
579 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
580
581 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
582
583 void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
584 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
585
586 void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
587 KRegister ktmp, int vlen_enc);
588
589 void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
590 KRegister ktmp, int vlen_enc);
591
592 void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
593 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
594
595 void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
596 KRegister ktmp);
597
598 void reconstruct_frame_pointer(Register rtmp);
599
600 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP