1 /*
  2  * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
 27 
 28 // C2_MacroAssembler contains high-level macros for C2
 29 
 30 public:
 31   // C2 compiled method's prolog code.
 32   void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
 33 
 34   Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
 35 
 36   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
 37   // See full description in macroAssembler_x86.cpp.
 38   void fast_lock(Register obj, Register box, Register tmp,
 39                  Register scr, Register cx1, Register cx2, Register thread,
 40                  Metadata* method_data);
 41   void fast_unlock(Register obj, Register box, Register tmp);
 42 
 43   void fast_lock_lightweight(Register obj, Register box, Register rax_reg,
 44                              Register t, Register thread);
 45   void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread);
 46 
 47   // Generic instructions support for use in .ad files C2 code generation
 48   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
 49   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 50   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
 51   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 52 
 53   void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
 54                XMMRegister tmp = xnoreg);
 55   void vpminmax(int opcode, BasicType elem_bt,
 56                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 57                 int vlen_enc);
 58 
 59   void vminmax_fp(int opcode, BasicType elem_bt,
 60                   XMMRegister dst, XMMRegister a, XMMRegister b,
 61                   XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
 62                   int vlen_enc);
 63   void evminmax_fp(int opcode, BasicType elem_bt,
 64                    XMMRegister dst, XMMRegister a, XMMRegister b,
 65                    KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
 66                    int vlen_enc);
 67 
 68   void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
 69 
 70   void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
 71                               bool merge, BasicType bt, int vec_enc);
 72 
 73   void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
 74 
 75   void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 76   void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
 77   void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 78   void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
 79 
 80   void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
 81   void vshiftd_imm(int opcode, XMMRegister dst, int shift);
 82   void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 83   void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
 84   void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
 85   void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 86   void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
 87   void vshiftq_imm(int opcode, XMMRegister dst, int shift);
 88   void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 89   void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
 90 
 91   void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
 92   void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 93 
 94   void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 95   void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
 96   void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
 97   void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
 98   void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
 99 
100   void insert(BasicType typ, XMMRegister dst, Register val, int idx);
101   void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
102   void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
103   void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
104   void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
105 
106   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
107   void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
108 
109   // extract
110   void extract(BasicType typ, Register dst, XMMRegister src, int idx);
111   XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
112   void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
113   void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
114   void movsxl(BasicType typ, Register dst);
115 
116   // vector test
117   void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
118 
119  // Covert B2X
120  void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
121 #ifdef _LP64
122  void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
123 #endif
124 
125   // blend
126   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister    src2, int comparison, int vector_len);
127   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
128   void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
129 
130   void load_vector(XMMRegister dst, Address        src, int vlen_in_bytes);
131   void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
132 
133   void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
134   void load_vector_mask(KRegister   dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
135 
136   void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
137   void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
138 
139   // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
140 
141   // dst = src1  reduce(op, src2) using vtmp as temps
142   void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
143 #ifdef _LP64
144   void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
145   void genmask(KRegister dst, Register len, Register temp);
146 #endif // _LP64
147 
148   // dst = reduce(op, src2) using vtmp as temps
149   void reduce_fp(int opcode, int vlen,
150                  XMMRegister dst, XMMRegister src,
151                  XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
152   void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
153   void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
154   void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
155   void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
156                          XMMRegister dst, XMMRegister src,
157                          XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
158   void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
159                           XMMRegister dst, XMMRegister src,
160                           XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
161  private:
162   void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
163   void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
164 
165   // Int Reduction
166   void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
167   void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
168   void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
169   void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
170 
171   // Byte Reduction
172   void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
173   void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
174   void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
175   void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
176   void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
177   void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
178   void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
179   void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
180 
181   // Short Reduction
182   void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
183   void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
184   void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
185   void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
186 
187   // Long Reduction
188 #ifdef _LP64
189   void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
190   void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
191   void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
192 #endif // _LP64
193 
194   // Float Reduction
195   void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
196   void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
197   void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
198   void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
199 
200   // Double Reduction
201   void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
202   void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
203   void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
204 
205   // Base reduction instruction
206   void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
207   void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
208 
209  public:
210 #ifdef _LP64
211   void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
212 
213   void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
214 
215   void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
216                              Register tmp, int masklen, BasicType bt, int vec_enc);
217   void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
218                               Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
219 #endif
220 
221   void vector_maskall_operation(KRegister dst, Register src, int mask_len);
222 
223 #ifndef _LP64
224   void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len);
225 #endif
226 
227   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
228                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
229 
230   void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
231                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
232 
233   // IndexOf strings.
234   // Small strings are loaded through stack if they cross page boundary.
235   void string_indexof(Register str1, Register str2,
236                       Register cnt1, Register cnt2,
237                       int int_cnt2,  Register result,
238                       XMMRegister vec, Register tmp,
239                       int ae);
240 
241   // IndexOf for constant substrings with size >= 8 elements
242   // which don't need to be loaded through stack.
243   void string_indexofC8(Register str1, Register str2,
244                       Register cnt1, Register cnt2,
245                       int int_cnt2,  Register result,
246                       XMMRegister vec, Register tmp,
247                       int ae);
248 
249     // Smallest code: we don't need to load through stack,
250     // check string tail.
251 
252   // helper function for string_compare
253   void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
254                           Address::ScaleFactor scale, Address::ScaleFactor scale1,
255                           Address::ScaleFactor scale2, Register index, int ae);
256   // Compare strings.
257   void string_compare(Register str1, Register str2,
258                       Register cnt1, Register cnt2, Register result,
259                       XMMRegister vec1, int ae, KRegister mask = knoreg);
260 
261   // Search for Non-ASCII character (Negative byte value) in a byte array,
262   // return index of the first such character, otherwise len.
263   void count_positives(Register ary1, Register len,
264                        Register result, Register tmp1,
265                        XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
266 
267   // Compare char[] or byte[] arrays.
268   void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
269                      Register result, Register chr, XMMRegister vec1, XMMRegister vec2,
270                      bool is_char, KRegister mask = knoreg, bool expand_ary2 = false);
271 
272   void arrays_hashcode(Register str1, Register cnt1, Register result,
273                        Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
274                        XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
275                        XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
276                        XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
277                        BasicType eltype);
278 
279   // helper functions for arrays_hashcode
280   int arrays_hashcode_elsize(BasicType eltype);
281   void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
282   void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
283   void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
284   void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
285 
286 #ifdef _LP64
287   void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
288 #endif
289 
290   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
291                    XMMRegister dst, XMMRegister src1, XMMRegister src2,
292                    bool merge, int vlen_enc, bool is_varshift = false);
293 
294   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
295                    XMMRegister dst, XMMRegister src1, Address src2,
296                    bool merge, int vlen_enc);
297 
298   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
299                    XMMRegister src1, int imm8, bool merge, int vlen_enc);
300 
301   void masked_op(int ideal_opc, int mask_len, KRegister dst,
302                  KRegister src1, KRegister src2);
303 
304   void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
305                             BasicType from_elem_bt, BasicType to_elem_bt);
306 
307   void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
308                           BasicType from_elem_bt, BasicType to_elem_bt);
309 
310   void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
311                                   XMMRegister xtmp, Register rscratch, int vec_enc);
312 
313   void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
314                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
315                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
316 
317   void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
318                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
319                            Register rscratch, int vec_enc);
320 
321   void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
322                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
323                            Register rscratch, int vec_enc);
324 
325   void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
326                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
327                            Register rscratch, int vec_enc);
328 
329   void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
330                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
331                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
332 
333 
334   void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
335                                                    XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
336                                                    AddressLiteral float_sign_flip, int vec_enc);
337 
338   void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
339                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
340                                                     int vec_enc);
341 
342   void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
343                                                      KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
344                                                      int vec_enc);
345 
346   void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
347                                                    KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
348                                                    int vec_enc);
349 
350   void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
351                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
352                                                     int vec_enc);
353 
354   void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
355                                                   XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
356                                                   int vec_enc);
357 
358   void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
359                                             XMMRegister xtmp, int index, int vec_enc);
360 
361   void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
362 
363 #ifdef _LP64
364   void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
365                                 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
366 
367   void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
368                                Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
369 
370   void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
371                               Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
372 
373   void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
374                                    Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
375                                    BasicType bt, int vec_enc);
376 #endif // _LP64
377 
378   void udivI(Register rax, Register divisor, Register rdx);
379   void umodI(Register rax, Register divisor, Register rdx);
380   void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
381 
382 #ifdef _LP64
383   void reverseI(Register dst, Register src, XMMRegister xtmp1,
384                 XMMRegister xtmp2, Register rtmp);
385   void reverseL(Register dst, Register src, XMMRegister xtmp1,
386                 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
387   void udivL(Register rax, Register divisor, Register rdx);
388   void umodL(Register rax, Register divisor, Register rdx);
389   void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
390 #endif
391 
392   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
393                   bool merge, BasicType bt, int vlen_enc);
394 
395   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
396                   bool merge, BasicType bt, int vlen_enc);
397 
398   void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
399                           XMMRegister xtmp2, Register rtmp, int vec_enc);
400 
401   void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
402                                XMMRegister xtmp, Register rscratch = noreg);
403 
404   void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
405 
406   void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
407                            XMMRegister xtmp2, Register rtmp, int vec_enc);
408 
409   void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
410                             XMMRegister xtmp2, Register rtmp, int vec_enc);
411 
412   void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
413                              XMMRegister xtmp2, Register rtmp, int vec_enc);
414 
415   void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
416                             XMMRegister xtmp2, Register rtmp, int vec_enc);
417 
418   void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
419                                 XMMRegister xtmp2, Register rtmp, int vec_enc);
420 
421   void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
422                                      KRegister mask, bool merge, int vec_enc);
423 
424   void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
425 
426   void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
427                              XMMRegister xtmp2, Register rtmp, int vec_enc);
428 
429   void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
430                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
431                                        KRegister ktmp, Register rtmp, bool merge, int vec_enc);
432 
433   void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
434                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
435 
436   void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
437                                             XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
438 
439   void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
440                                           XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
441 
442   void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
443                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
444 
445   void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
446                                       XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
447 
448   void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
449 
450   void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
451 
452   void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
453                                         XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
454                                         Register rtmp, int vec_enc);
455 
456   void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
457                          XMMRegister xtmp1, Register rtmp, int vec_enc);
458 
459   void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
460                                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
461 
462   void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
463                          XMMRegister xtmp1, int vec_enc);
464 
465   void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
466                           KRegister ktmp1, int vec_enc);
467 
468   void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
469 
470   void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
471 
472   void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
473                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
474 
475   void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
476                                   XMMRegister src, int vlen_enc);
477 
478 
479   void vgather_subword(BasicType elem_ty, XMMRegister dst,  Register base, Register idx_base, Register offset,
480                        Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
481                        Register midx, Register length, int vector_len, int vlen_enc);
482 
483 #ifdef _LP64
484   void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
485                                Register offset, Register mask, Register midx, Register rtmp, int vlen_enc);
486 #endif
487   void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
488                               Register offset, Register rtmp, int vlen_enc);
489 
490 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP