1 /*
  2  * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
 27 
 28 // C2_MacroAssembler contains high-level macros for C2
 29 
 30 public:
 31   // C2 compiled method's prolog code.
 32   void verified_entry(Compile* C, int sp_inc = 0);
 33 
 34   void entry_barrier();
 35   Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
 36 
 37   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
 38   // See full description in c2_MacroAssembler_x86.cpp.
 39   void fast_lock(Register obj, Register box, Register rax_reg,
 40                  Register t, Register thread);
 41   void fast_unlock(Register obj, Register reg_rax, Register t, Register thread);
 42 
 43   void verify_int_in_range(uint idx, const TypeInt* t, Register val);
 44   void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp);
 45 
 46   // Generic instructions support for use in .ad files C2 code generation
 47   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
 48   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 49   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
 50   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 51 
 52   void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
 53                XMMRegister tmp = xnoreg);
 54   void vpminmax(int opcode, BasicType elem_bt,
 55                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 56                 int vlen_enc);
 57 
 58   void vpuminmax(int opcode, BasicType elem_bt,
 59                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 60                 int vlen_enc);
 61 
 62   void vpuminmax(int opcode, BasicType elem_bt,
 63                 XMMRegister dst, XMMRegister src1, Address src2,
 64                 int vlen_enc);
 65 
 66   void vminmax_fp(int opcode, BasicType elem_bt,
 67                   XMMRegister dst, XMMRegister a, XMMRegister b,
 68                   XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
 69                   int vlen_enc);
 70 
 71   void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
 72                           XMMRegister src1, XMMRegister src2, int vlen_enc);
 73 
 74   void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
 75                           XMMRegister src1, XMMRegister src2);
 76 
 77   void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
 78 
 79   void evminmax_fp(int opcode, BasicType elem_bt,
 80                    XMMRegister dst, XMMRegister a, XMMRegister b,
 81                    KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
 82                    int vlen_enc);
 83 
 84   void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
 85 
 86   void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
 87                               bool merge, BasicType bt, int vec_enc);
 88 
 89   void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
 90 
 91   void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 92   void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
 93   void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 94   void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 95 
 96   void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
 97   void vshiftd_imm(int opcode, XMMRegister dst, int shift);
 98   void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 99   void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
100   void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
101   void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
102   void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
103   void vshiftq_imm(int opcode, XMMRegister dst, int shift);
104   void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
105   void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
106 
107   void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
108   void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
109 
110   void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
111   void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
112   void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
113   void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
114   void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
115 
116   void insert(BasicType typ, XMMRegister dst, Register val, int idx);
117   void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
118   void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
119   void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
120   void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
121 
122   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
123   void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
124   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len);
125 
126   // extract
127   void extract(BasicType typ, Register dst, XMMRegister src, int idx);
128   XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
129   void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
130   void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
131   void movsxl(BasicType typ, Register dst);
132 
133   // vector test
134   void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
135 
136  // Covert B2X
137  void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
138  void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
139 
140   // blend
141   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister    src2, int comparison, int vector_len);
142   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
143   void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
144 
145   void load_vector(BasicType bt, XMMRegister dst, Address        src, int vlen_in_bytes);
146   void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
147 
148   void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
149   void load_vector_mask(KRegister   dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
150 
151   void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
152   void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
153 
154   // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
155 
156   // dst = src1  reduce(op, src2) using vtmp as temps
157   void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
158   void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
159   void genmask(KRegister dst, Register len, Register temp);
160 
161   // dst = reduce(op, src2) using vtmp as temps
162   void reduce_fp(int opcode, int vlen,
163                  XMMRegister dst, XMMRegister src,
164                  XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
165   void unordered_reduce_fp(int opcode, int vlen,
166                            XMMRegister dst, XMMRegister src,
167                            XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
168   void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
169   void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
170   void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
171   void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
172                          XMMRegister dst, XMMRegister src,
173                          XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
174   void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
175                           XMMRegister dst, XMMRegister src,
176                           XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
177  private:
178   void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
179   void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
180   void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
181   void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
182 
183   // Int Reduction
184   void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
185   void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
186   void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
187   void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
188 
189   // Byte Reduction
190   void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
191   void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
192   void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193   void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194   void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195   void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196   void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
197   void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
198 
199   // Short Reduction
200   void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
201   void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
202   void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203   void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
204 
205   // Long Reduction
206   void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
207   void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
208   void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
209 
210   // Float Reduction
211   void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
212   void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
213   void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
214   void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
215 
216   // Unordered Float Reduction
217   void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src);
218   void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
219   void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
220   void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
221 
222   // Double Reduction
223   void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
224   void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
225   void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
226 
227   // Unordered Double Reduction
228   void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src);
229   void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
230   void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
231 
232   // Base reduction instruction
233   void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
234   void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
235   void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
236   void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
237 
238  public:
239   void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
240 
241   void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
242 
243   void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
244                              Register tmp, int masklen, BasicType bt, int vec_enc);
245   void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
246                               Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
247 
248   void vector_maskall_operation(KRegister dst, Register src, int mask_len);
249 
250   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
251                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
252 
253   void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
254                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
255 
256   // IndexOf strings.
257   // Small strings are loaded through stack if they cross page boundary.
258   void string_indexof(Register str1, Register str2,
259                       Register cnt1, Register cnt2,
260                       int int_cnt2,  Register result,
261                       XMMRegister vec, Register tmp,
262                       int ae);
263 
264   // IndexOf for constant substrings with size >= 8 elements
265   // which don't need to be loaded through stack.
266   void string_indexofC8(Register str1, Register str2,
267                       Register cnt1, Register cnt2,
268                       int int_cnt2,  Register result,
269                       XMMRegister vec, Register tmp,
270                       int ae);
271 
272     // Smallest code: we don't need to load through stack,
273     // check string tail.
274 
275   // helper function for string_compare
276   void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
277                           Address::ScaleFactor scale, Address::ScaleFactor scale1,
278                           Address::ScaleFactor scale2, Register index, int ae);
279   // Compare strings.
280   void string_compare(Register str1, Register str2,
281                       Register cnt1, Register cnt2, Register result,
282                       XMMRegister vec1, int ae, KRegister mask = knoreg);
283 
284   // Search for Non-ASCII character (Negative byte value) in a byte array,
285   // return index of the first such character, otherwise len.
286   void count_positives(Register ary1, Register len,
287                        Register result, Register tmp1,
288                        XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
289 
290   // Compare char[] or byte[] arrays.
291   void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
292                      Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
293                      bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);
294 
295   void arrays_hashcode(Register str1, Register cnt1, Register result,
296                        Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
297                        XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
298                        XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
299                        XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
300                        BasicType eltype);
301 
302   // helper functions for arrays_hashcode
303   int arrays_hashcode_elsize(BasicType eltype);
304   void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
305   void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
306   void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
307   void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
308 
309   void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
310 
311   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
312                    XMMRegister dst, XMMRegister src1, XMMRegister src2,
313                    bool merge, int vlen_enc, bool is_varshift = false);
314 
315   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
316                    XMMRegister dst, XMMRegister src1, Address src2,
317                    bool merge, int vlen_enc);
318 
319   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
320                    XMMRegister src1, int imm8, bool merge, int vlen_enc);
321 
322   void masked_op(int ideal_opc, int mask_len, KRegister dst,
323                  KRegister src1, KRegister src2);
324 
325   void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
326                             BasicType from_elem_bt, BasicType to_elem_bt);
327 
328   void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
329                           BasicType from_elem_bt, BasicType to_elem_bt);
330 
331   void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
332                                   XMMRegister xtmp, Register rscratch, int vec_enc);
333 
334   void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
335                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
336                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
337 
338   void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
339                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
340                            Register rscratch, int vec_enc);
341 
342   void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
343                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
344                            Register rscratch, int vec_enc);
345 
346   void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
347                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
348                            Register rscratch, int vec_enc);
349 
350   void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
351                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
352                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
353 
354   void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
355 
356   void vector_castF2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
357 
358   void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vec_enc);
359 
360   void vector_castD2X_avx10_2(BasicType to_elem_bt, XMMRegister dst, Address src, int vec_enc);
361 
362   void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
363                                                    XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
364                                                    AddressLiteral float_sign_flip, int vec_enc);
365 
366   void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
367                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
368                                                     int vec_enc);
369 
370   void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
371                                                      KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
372                                                      int vec_enc);
373 
374   void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
375                                                    KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
376                                                    int vec_enc);
377 
378   void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
379                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
380                                                     int vec_enc);
381 
382   void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
383                                                   XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
384                                                   int vec_enc);
385 
386   void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
387                                             XMMRegister xtmp, int index, int vec_enc);
388 
389   void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
390 
391   void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
392                                 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
393 
394   void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
395                                Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
396 
397   void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
398                               Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
399 
400   void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
401                                    Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
402                                    BasicType bt, int vec_enc);
403 
404   void udivI(Register rax, Register divisor, Register rdx);
405   void umodI(Register rax, Register divisor, Register rdx);
406   void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
407 
408   void reverseI(Register dst, Register src, XMMRegister xtmp1,
409                 XMMRegister xtmp2, Register rtmp);
410   void reverseL(Register dst, Register src, XMMRegister xtmp1,
411                 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
412   void udivL(Register rax, Register divisor, Register rdx);
413   void umodL(Register rax, Register divisor, Register rdx);
414   void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
415 
416   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
417                   bool merge, BasicType bt, int vlen_enc);
418 
419   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
420                   bool merge, BasicType bt, int vlen_enc);
421 
422   void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
423                           XMMRegister xtmp2, Register rtmp, int vec_enc);
424 
425   void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
426                                XMMRegister xtmp, Register rscratch = noreg);
427 
428   void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
429 
430   void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
431                            XMMRegister xtmp2, Register rtmp, int vec_enc);
432 
433   void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
434                             XMMRegister xtmp2, Register rtmp, int vec_enc);
435 
436   void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
437                              XMMRegister xtmp2, Register rtmp, int vec_enc);
438 
439   void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
440                             XMMRegister xtmp2, Register rtmp, int vec_enc);
441 
442   void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
443                                 XMMRegister xtmp2, Register rtmp, int vec_enc);
444 
445   void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
446                                      KRegister mask, bool merge, int vec_enc);
447 
448   void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
449 
450   void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
451                              XMMRegister xtmp2, Register rtmp, int vec_enc);
452 
453   void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
454                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
455                                        KRegister ktmp, Register rtmp, bool merge, int vec_enc);
456 
457   void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
458                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
459 
460   void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
461                                             XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
462 
463   void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
464                                           XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
465 
466   void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
467                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
468 
469   void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
470                                       XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
471 
472   void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
473 
474   void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
475 
476   void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
477                                         XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
478                                         Register rtmp, int vec_enc);
479 
480   void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
481                          XMMRegister xtmp1, Register rtmp, int vec_enc);
482 
483   void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
484                                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
485 
486   void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
487                          XMMRegister xtmp1, int vec_enc);
488 
489   void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
490                           KRegister ktmp1, int vec_enc);
491 
492   void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
493 
494   void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
495 
496   void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
497                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
498 
499   void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
500                                   XMMRegister src, int vlen_enc);
501 
502   void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
503 
504   void vgather_subword(BasicType elem_ty, XMMRegister dst,  Register base, Register idx_base, Register mask,
505                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
506                        Register midx, Register length, int vector_len, int vlen_enc);
507 
508   void vgather8b_masked(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
509                         Register mask, Register midx, Register rtmp, int vlen_enc);
510   void vgather8b(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
511                  Register rtmp, int vlen_enc);
512 
513   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc);
514 
515   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc);
516 
517   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
518 
519   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
520 
521   void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
522 
523   void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
524 
525   void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc);
526 
527   void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
528                                              XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
529 
530   void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
531                                               XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc);
532 
533   void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
534                                              XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc);
535 
536   void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
537                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc);
538 
539   void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
540                                         XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc);
541 
542   void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
543 
544   void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
545 
546   void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc);
547 
548   void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
549 
550   void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
551 
552   void evpcmpu(BasicType etype, KRegister kmask,  XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc);
553 
554   void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
555 
556   void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
557                           int vlen_enc, bool xtmp2_hold_M1 = false);
558 
559   void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
560                               bool is_unsigned, bool merge, int vlen_enc);
561 
562   void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
563                               bool is_unsigned, bool merge, int vlen_enc);
564 
565   void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
566                               bool merge, int vlen_enc);
567 
568   void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
569                               bool merge, int vlen_enc);
570 
571   void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
572                                        XMMRegister src2, bool merge, int vlen_enc);
573 
574   void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
575                                        Address src2, bool merge, int vlen_enc);
576 
577   void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
578 
579   void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
580 
581   void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
582 
583   void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
584                     KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
585 
586   void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
587                             KRegister ktmp, int vlen_enc);
588 
589   void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
590                             KRegister ktmp, int vlen_enc);
591 
592   void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
593                     KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
594 
595   void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
596                             KRegister ktmp);
597 
598   void reconstruct_frame_pointer(Register rtmp);
599 
600 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP