1 /*
  2  * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
 27 
 28 // C2_MacroAssembler contains high-level macros for C2
 29 
 30 public:
 31   // C2 compiled method's prolog code.
 32   void verified_entry(Compile* C, int sp_inc = 0);
 33 
 34   void entry_barrier();
 35   Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
 36 
 37   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
 38   // See full description in macroAssembler_x86.cpp.
 39   void fast_lock(Register obj, Register box, Register tmp,
 40                  Register scr, Register cx1, Register cx2, Register thread,
 41                  Metadata* method_data);
 42   void fast_unlock(Register obj, Register box, Register tmp);
 43 
 44   void fast_lock_lightweight(Register obj, Register box, Register rax_reg,
 45                              Register t, Register thread);
 46   void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread);
 47 
 48   void verify_int_in_range(uint idx, const TypeInt* t, Register val);
 49   void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp);
 50 
 51   // Generic instructions support for use in .ad files C2 code generation
 52   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
 53   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 54   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
 55   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 56 
 57   void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
 58                XMMRegister tmp = xnoreg);
 59   void vpminmax(int opcode, BasicType elem_bt,
 60                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 61                 int vlen_enc);
 62 
 63   void vpuminmax(int opcode, BasicType elem_bt,
 64                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 65                 int vlen_enc);
 66 
 67   void vpuminmax(int opcode, BasicType elem_bt,
 68                 XMMRegister dst, XMMRegister src1, Address src2,
 69                 int vlen_enc);
 70 
 71   void vminmax_fp(int opcode, BasicType elem_bt,
 72                   XMMRegister dst, XMMRegister a, XMMRegister b,
 73                   XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
 74                   int vlen_enc);
 75 
 76   void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
 77                   XMMRegister src1, XMMRegister src2, int vlen_enc);
 78 
 79   void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
 80 
 81   void evminmax_fp(int opcode, BasicType elem_bt,
 82                    XMMRegister dst, XMMRegister a, XMMRegister b,
 83                    KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
 84                    int vlen_enc);
 85 
 86   void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
 87 
 88   void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
 89                               bool merge, BasicType bt, int vec_enc);
 90 
 91   void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
 92 
 93   void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 94   void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
 95   void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 96   void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 97 
 98   void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
 99   void vshiftd_imm(int opcode, XMMRegister dst, int shift);
100   void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
101   void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
102   void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
103   void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
104   void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
105   void vshiftq_imm(int opcode, XMMRegister dst, int shift);
106   void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
107   void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
108 
109   void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
110   void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
111 
112   void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
113   void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
114   void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
115   void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
116   void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
117 
118   void insert(BasicType typ, XMMRegister dst, Register val, int idx);
119   void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
120   void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
121   void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
122   void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
123 
124   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
125   void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
126   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len);
127 
128   // extract
129   void extract(BasicType typ, Register dst, XMMRegister src, int idx);
130   XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
131   void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
132   void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
133   void movsxl(BasicType typ, Register dst);
134 
135   // vector test
136   void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
137 
138  // Covert B2X
139  void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
140  void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
141 
142   // blend
143   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister    src2, int comparison, int vector_len);
144   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
145   void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
146 
147   void load_vector(BasicType bt, XMMRegister dst, Address        src, int vlen_in_bytes);
148   void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
149 
150   void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
151   void load_vector_mask(KRegister   dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
152 
153   void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
154   void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
155 
156   // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
157 
158   // dst = src1  reduce(op, src2) using vtmp as temps
159   void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
160   void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
161   void genmask(KRegister dst, Register len, Register temp);
162 
163   // dst = reduce(op, src2) using vtmp as temps
164   void reduce_fp(int opcode, int vlen,
165                  XMMRegister dst, XMMRegister src,
166                  XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
167   void unordered_reduce_fp(int opcode, int vlen,
168                            XMMRegister dst, XMMRegister src,
169                            XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
170   void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
171   void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
172   void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
173   void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
174                          XMMRegister dst, XMMRegister src,
175                          XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
176   void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
177                           XMMRegister dst, XMMRegister src,
178                           XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
179  private:
180   void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
181   void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
182   void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
183   void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
184 
185   // Int Reduction
186   void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
187   void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
188   void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
189   void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
190 
191   // Byte Reduction
192   void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193   void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194   void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195   void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196   void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
197   void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
198   void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
199   void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
200 
201   // Short Reduction
202   void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203   void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
204   void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
205   void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
206 
207   // Long Reduction
208   void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
209   void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
210   void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
211 
212   // Float Reduction
213   void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
214   void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
215   void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
216   void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
217 
218   // Unordered Float Reduction
219   void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src);
220   void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
221   void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
222   void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
223 
224   // Double Reduction
225   void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
226   void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
227   void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
228 
229   // Unordered Double Reduction
230   void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src);
231   void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
232   void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
233 
234   // Base reduction instruction
235   void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
236   void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
237   void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
238   void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
239 
240  public:
241   void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
242 
243   void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
244 
245   void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
246                              Register tmp, int masklen, BasicType bt, int vec_enc);
247   void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
248                               Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
249 
250   void vector_maskall_operation(KRegister dst, Register src, int mask_len);
251 
252   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
253                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
254 
255   void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
256                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
257 
258   // IndexOf strings.
259   // Small strings are loaded through stack if they cross page boundary.
260   void string_indexof(Register str1, Register str2,
261                       Register cnt1, Register cnt2,
262                       int int_cnt2,  Register result,
263                       XMMRegister vec, Register tmp,
264                       int ae);
265 
266   // IndexOf for constant substrings with size >= 8 elements
267   // which don't need to be loaded through stack.
268   void string_indexofC8(Register str1, Register str2,
269                       Register cnt1, Register cnt2,
270                       int int_cnt2,  Register result,
271                       XMMRegister vec, Register tmp,
272                       int ae);
273 
274     // Smallest code: we don't need to load through stack,
275     // check string tail.
276 
277   // helper function for string_compare
278   void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
279                           Address::ScaleFactor scale, Address::ScaleFactor scale1,
280                           Address::ScaleFactor scale2, Register index, int ae);
281   // Compare strings.
282   void string_compare(Register str1, Register str2,
283                       Register cnt1, Register cnt2, Register result,
284                       XMMRegister vec1, int ae, KRegister mask = knoreg);
285 
286   // Search for Non-ASCII character (Negative byte value) in a byte array,
287   // return index of the first such character, otherwise len.
288   void count_positives(Register ary1, Register len,
289                        Register result, Register tmp1,
290                        XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
291 
292   // Compare char[] or byte[] arrays.
293   void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
294                      Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
295                      bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);
296 
297   void arrays_hashcode(Register str1, Register cnt1, Register result,
298                        Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
299                        XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
300                        XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
301                        XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
302                        BasicType eltype);
303 
304   // helper functions for arrays_hashcode
305   int arrays_hashcode_elsize(BasicType eltype);
306   void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
307   void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
308   void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
309   void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
310 
311   void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
312 
313   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
314                    XMMRegister dst, XMMRegister src1, XMMRegister src2,
315                    bool merge, int vlen_enc, bool is_varshift = false);
316 
317   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
318                    XMMRegister dst, XMMRegister src1, Address src2,
319                    bool merge, int vlen_enc);
320 
321   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
322                    XMMRegister src1, int imm8, bool merge, int vlen_enc);
323 
324   void masked_op(int ideal_opc, int mask_len, KRegister dst,
325                  KRegister src1, KRegister src2);
326 
327   void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
328                             BasicType from_elem_bt, BasicType to_elem_bt);
329 
330   void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
331                           BasicType from_elem_bt, BasicType to_elem_bt);
332 
333   void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
334                                   XMMRegister xtmp, Register rscratch, int vec_enc);
335 
336   void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
337                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
338                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
339 
340   void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
341                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
342                            Register rscratch, int vec_enc);
343 
344   void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
345                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
346                            Register rscratch, int vec_enc);
347 
348   void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
349                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
350                            Register rscratch, int vec_enc);
351 
352   void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
353                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
354                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
355 
356 
357   void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
358                                                    XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
359                                                    AddressLiteral float_sign_flip, int vec_enc);
360 
361   void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
362                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
363                                                     int vec_enc);
364 
365   void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
366                                                      KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
367                                                      int vec_enc);
368 
369   void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
370                                                    KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
371                                                    int vec_enc);
372 
373   void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
374                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
375                                                     int vec_enc);
376 
377   void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
378                                                   XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
379                                                   int vec_enc);
380 
381   void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
382                                             XMMRegister xtmp, int index, int vec_enc);
383 
384   void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
385 
386   void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
387                                 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
388 
389   void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
390                                Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
391 
392   void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
393                               Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
394 
395   void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
396                                    Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
397                                    BasicType bt, int vec_enc);
398 
399   void udivI(Register rax, Register divisor, Register rdx);
400   void umodI(Register rax, Register divisor, Register rdx);
401   void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
402 
403   void reverseI(Register dst, Register src, XMMRegister xtmp1,
404                 XMMRegister xtmp2, Register rtmp);
405   void reverseL(Register dst, Register src, XMMRegister xtmp1,
406                 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
407   void udivL(Register rax, Register divisor, Register rdx);
408   void umodL(Register rax, Register divisor, Register rdx);
409   void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
410 
411   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
412                   bool merge, BasicType bt, int vlen_enc);
413 
414   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
415                   bool merge, BasicType bt, int vlen_enc);
416 
417   void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
418                           XMMRegister xtmp2, Register rtmp, int vec_enc);
419 
420   void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
421                                XMMRegister xtmp, Register rscratch = noreg);
422 
423   void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
424 
425   void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
426                            XMMRegister xtmp2, Register rtmp, int vec_enc);
427 
428   void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
429                             XMMRegister xtmp2, Register rtmp, int vec_enc);
430 
431   void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
432                              XMMRegister xtmp2, Register rtmp, int vec_enc);
433 
434   void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
435                             XMMRegister xtmp2, Register rtmp, int vec_enc);
436 
437   void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
438                                 XMMRegister xtmp2, Register rtmp, int vec_enc);
439 
440   void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
441                                      KRegister mask, bool merge, int vec_enc);
442 
443   void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
444 
445   void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
446                              XMMRegister xtmp2, Register rtmp, int vec_enc);
447 
448   void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
449                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
450                                        KRegister ktmp, Register rtmp, bool merge, int vec_enc);
451 
452   void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
453                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
454 
455   void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
456                                             XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
457 
458   void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
459                                           XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
460 
461   void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
462                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
463 
464   void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
465                                       XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
466 
467   void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
468 
469   void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
470 
471   void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
472                                         XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
473                                         Register rtmp, int vec_enc);
474 
475   void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
476                          XMMRegister xtmp1, Register rtmp, int vec_enc);
477 
478   void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
479                                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
480 
481   void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
482                          XMMRegister xtmp1, int vec_enc);
483 
484   void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
485                           KRegister ktmp1, int vec_enc);
486 
487   void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
488 
489   void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
490 
491   void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
492                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
493 
494   void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
495                                   XMMRegister src, int vlen_enc);
496 
497   void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
498 
499   void vgather_subword(BasicType elem_ty, XMMRegister dst,  Register base, Register idx_base, Register offset,
500                        Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
501                        Register midx, Register length, int vector_len, int vlen_enc);
502 
503   void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
504                                Register offset, Register mask, Register midx, Register rtmp, int vlen_enc);
505 
506   void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
507                               Register offset, Register rtmp, int vlen_enc);
508 
509   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc);
510 
511   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc);
512 
513   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
514 
515   void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
516 
517   void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
518 
519   void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
520 
521   void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc);
522 
523   void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
524                                              XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
525 
526   void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
527                                               XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc);
528 
529   void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
530                                              XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc);
531 
532   void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
533                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc);
534 
535   void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2,
536                                         XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc);
537 
538   void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
539 
540   void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false);
541 
542   void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc);
543 
544   void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
545 
546   void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false);
547 
548   void evpcmpu(BasicType etype, KRegister kmask,  XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc);
549 
550   void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
551 
552   void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
553                           int vlen_enc, bool xtmp2_hold_M1 = false);
554 
555   void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
556                               bool is_unsigned, bool merge, int vlen_enc);
557 
558   void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
559                               bool is_unsigned, bool merge, int vlen_enc);
560 
561   void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2,
562                               bool merge, int vlen_enc);
563 
564   void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2,
565                               bool merge, int vlen_enc);
566 
567   void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
568                                        XMMRegister src2, bool merge, int vlen_enc);
569 
570   void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1,
571                                        Address src2, bool merge, int vlen_enc);
572 
573   void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
574 
575   void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
576 
577   void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
578 
579   void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
580                           KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
581 
582   void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
583                           KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
584 
585   void reconstruct_frame_pointer(Register rtmp);
586 
587 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP