1 /*
  2  * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
 27 
 28 // C2_MacroAssembler contains high-level macros for C2
 29 
 30 public:
 31   // C2 compiled method's prolog code.
 32   void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
 33 
 34   Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
 35 
 36   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
 37   // See full description in macroAssembler_x86.cpp.
 38   void fast_lock(Register obj, Register box, Register tmp,
 39                  Register scr, Register cx1, Register cx2, Register thread,
 40                  RTMLockingCounters* rtm_counters,
 41                  RTMLockingCounters* stack_rtm_counters,
 42                  Metadata* method_data,
 43                  bool use_rtm, bool profile_rtm);
 44   void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
 45 
 46   void fast_lock_lightweight(Register obj, Register box, Register rax_reg,
 47                              Register t, Register thread);
 48   void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread);
 49 
 50 #if INCLUDE_RTM_OPT
 51   void rtm_counters_update(Register abort_status, Register rtm_counters);
 52   void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
 53   void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
 54                                    RTMLockingCounters* rtm_counters,
 55                                    Metadata* method_data);
 56   void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
 57                      RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
 58   void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
 59   void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
 60   void rtm_stack_locking(Register obj, Register tmp, Register scr,
 61                          Register retry_on_abort_count,
 62                          RTMLockingCounters* stack_rtm_counters,
 63                          Metadata* method_data, bool profile_rtm,
 64                          Label& DONE_LABEL, Label& IsInflated);
 65   void rtm_inflated_locking(Register obj, Register box, Register tmp,
 66                             Register scr, Register retry_on_busy_count,
 67                             Register retry_on_abort_count,
 68                             RTMLockingCounters* rtm_counters,
 69                             Metadata* method_data, bool profile_rtm,
 70                             Label& DONE_LABEL);
 71 #endif
 72 
 73   // Generic instructions support for use in .ad files C2 code generation
 74   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src);
 75   void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 76   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src);
 77   void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len);
 78 
 79   void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
 80                XMMRegister tmp = xnoreg);
 81   void vpminmax(int opcode, BasicType elem_bt,
 82                 XMMRegister dst, XMMRegister src1, XMMRegister src2,
 83                 int vlen_enc);
 84 
 85   void vminmax_fp(int opcode, BasicType elem_bt,
 86                   XMMRegister dst, XMMRegister a, XMMRegister b,
 87                   XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
 88                   int vlen_enc);
 89   void evminmax_fp(int opcode, BasicType elem_bt,
 90                    XMMRegister dst, XMMRegister a, XMMRegister b,
 91                    KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
 92                    int vlen_enc);
 93 
 94   void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one);
 95 
 96   void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
 97                               bool merge, BasicType bt, int vec_enc);
 98 
 99   void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
100 
101   void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
102   void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
103   void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
104   void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
105 
106   void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
107   void vshiftd_imm(int opcode, XMMRegister dst, int shift);
108   void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
109   void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
110   void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
111   void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
112   void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
113   void vshiftq_imm(int opcode, XMMRegister dst, int shift);
114   void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
115   void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
116 
117   void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
118   void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
119 
120   void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
121   void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
122   void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
123   void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
124   void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp);
125 
126   void insert(BasicType typ, XMMRegister dst, Register val, int idx);
127   void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
128   void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
129   void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
130   void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
131 
132   void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
133   void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
134 
135   // extract
136   void extract(BasicType typ, Register dst, XMMRegister src, int idx);
137   XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
138   void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
139   void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg);
140   void movsxl(BasicType typ, Register dst);
141 
142   // vector test
143   void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes);
144 
145  // Covert B2X
146  void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
147 #ifdef _LP64
148  void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
149 #endif
150 
151   // blend
152   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister    src2, int comparison, int vector_len);
153   void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg);
154   void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
155 
156   void load_vector(XMMRegister dst, Address        src, int vlen_in_bytes);
157   void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg);
158 
159   void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
160   void load_vector_mask(KRegister   dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc);
161 
162   void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
163   void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt);
164 
165   // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
166 
167   // dst = src1  reduce(op, src2) using vtmp as temps
168   void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
169 #ifdef _LP64
170   void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
171   void genmask(KRegister dst, Register len, Register temp);
172 #endif // _LP64
173 
174   // dst = reduce(op, src2) using vtmp as temps
175   void reduce_fp(int opcode, int vlen,
176                  XMMRegister dst, XMMRegister src,
177                  XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
178   void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
179   void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
180   void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
181   void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
182                          XMMRegister dst, XMMRegister src,
183                          XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
184   void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
185                           XMMRegister dst, XMMRegister src,
186                           XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
187  private:
188   void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
189   void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
190 
191   // Int Reduction
192   void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193   void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194   void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195   void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196 
197   // Byte Reduction
198   void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
199   void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
200   void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
201   void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
202   void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203   void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
204   void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
205   void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
206 
207   // Short Reduction
208   void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
209   void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
210   void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
211   void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
212 
213   // Long Reduction
214 #ifdef _LP64
215   void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
216   void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
217   void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
218 #endif // _LP64
219 
220   // Float Reduction
221   void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
222   void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
223   void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
224   void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
225 
226   // Double Reduction
227   void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
228   void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
229   void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
230 
231   // Base reduction instruction
232   void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
233   void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
234 
235  public:
236 #ifdef _LP64
237   void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
238 
239   void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
240 
241   void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
242                              Register tmp, int masklen, BasicType bt, int vec_enc);
243   void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
244                               Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc);
245 #endif
246 
247   void vector_maskall_operation(KRegister dst, Register src, int mask_len);
248 
249 #ifndef _LP64
250   void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len);
251 #endif
252 
253   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
254                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
255 
256   void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
257                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
258 
259   // IndexOf strings.
260   // Small strings are loaded through stack if they cross page boundary.
261   void string_indexof(Register str1, Register str2,
262                       Register cnt1, Register cnt2,
263                       int int_cnt2,  Register result,
264                       XMMRegister vec, Register tmp,
265                       int ae);
266 
267   // IndexOf for constant substrings with size >= 8 elements
268   // which don't need to be loaded through stack.
269   void string_indexofC8(Register str1, Register str2,
270                       Register cnt1, Register cnt2,
271                       int int_cnt2,  Register result,
272                       XMMRegister vec, Register tmp,
273                       int ae);
274 
275     // Smallest code: we don't need to load through stack,
276     // check string tail.
277 
278   // helper function for string_compare
279   void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
280                           Address::ScaleFactor scale, Address::ScaleFactor scale1,
281                           Address::ScaleFactor scale2, Register index, int ae);
282   // Compare strings.
283   void string_compare(Register str1, Register str2,
284                       Register cnt1, Register cnt2, Register result,
285                       XMMRegister vec1, int ae, KRegister mask = knoreg);
286 
287   // Search for Non-ASCII character (Negative byte value) in a byte array,
288   // return index of the first such character, otherwise len.
289   void count_positives(Register ary1, Register len,
290                        Register result, Register tmp1,
291                        XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
292   // Compare char[] or byte[] arrays.
293   void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
294                      Register limit, Register result, Register chr,
295                      XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
296 
297   void arrays_hashcode(Register str1, Register cnt1, Register result,
298                        Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext,
299                        XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3,
300                        XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3,
301                        XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3,
302                        BasicType eltype);
303 
304   // helper functions for arrays_hashcode
305   int arrays_hashcode_elsize(BasicType eltype);
306   void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
307   void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype);
308   void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
309   void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
310 
311 #ifdef _LP64
312   void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
313 #endif
314 
315   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
316                    XMMRegister dst, XMMRegister src1, XMMRegister src2,
317                    bool merge, int vlen_enc, bool is_varshift = false);
318 
319   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
320                    XMMRegister dst, XMMRegister src1, Address src2,
321                    bool merge, int vlen_enc);
322 
323   void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
324                    XMMRegister src1, int imm8, bool merge, int vlen_enc);
325 
326   void masked_op(int ideal_opc, int mask_len, KRegister dst,
327                  KRegister src1, KRegister src2);
328 
329   void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
330                             BasicType from_elem_bt, BasicType to_elem_bt);
331 
332   void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
333                           BasicType from_elem_bt, BasicType to_elem_bt);
334 
335   void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero,
336                                   XMMRegister xtmp, Register rscratch, int vec_enc);
337 
338   void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
339                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
340                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
341 
342   void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
343                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
344                            Register rscratch, int vec_enc);
345 
346   void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
347                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
348                            Register rscratch, int vec_enc);
349 
350   void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
351                            XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip,
352                            Register rscratch, int vec_enc);
353 
354   void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
355                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5,
356                           AddressLiteral float_sign_flip, Register rscratch, int vec_enc);
357 
358 
359   void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
360                                                    XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch,
361                                                    AddressLiteral float_sign_flip, int vec_enc);
362 
363   void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
364                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
365                                                     int vec_enc);
366 
367   void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
368                                                      KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
369                                                      int vec_enc);
370 
371   void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
372                                                    KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip,
373                                                    int vec_enc);
374 
375   void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
376                                                     KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip,
377                                                     int vec_enc);
378 
379   void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
380                                                   XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip,
381                                                   int vec_enc);
382 
383   void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero,
384                                             XMMRegister xtmp, int index, int vec_enc);
385 
386   void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen);
387 
388 #ifdef _LP64
389   void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
390                                 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
391 
392   void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
393                                Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2);
394 
395   void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc,
396                               Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4);
397 
398   void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask,
399                                    Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp,
400                                    BasicType bt, int vec_enc);
401 #endif // _LP64
402 
403   void udivI(Register rax, Register divisor, Register rdx);
404   void umodI(Register rax, Register divisor, Register rdx);
405   void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
406 
407 #ifdef _LP64
408   void reverseI(Register dst, Register src, XMMRegister xtmp1,
409                 XMMRegister xtmp2, Register rtmp);
410   void reverseL(Register dst, Register src, XMMRegister xtmp1,
411                 XMMRegister xtmp2, Register rtmp1, Register rtmp2);
412   void udivL(Register rax, Register divisor, Register rdx);
413   void umodL(Register rax, Register divisor, Register rdx);
414   void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
415 #endif
416 
417   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
418                   bool merge, BasicType bt, int vlen_enc);
419 
420   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
421                   bool merge, BasicType bt, int vlen_enc);
422 
423   void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
424                           XMMRegister xtmp2, Register rtmp, int vec_enc);
425 
426   void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc,
427                                XMMRegister xtmp, Register rscratch = noreg);
428 
429   void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc);
430 
431   void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
432                            XMMRegister xtmp2, Register rtmp, int vec_enc);
433 
434   void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
435                             XMMRegister xtmp2, Register rtmp, int vec_enc);
436 
437   void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
438                              XMMRegister xtmp2, Register rtmp, int vec_enc);
439 
440   void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
441                             XMMRegister xtmp2, Register rtmp, int vec_enc);
442 
443   void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
444                                 XMMRegister xtmp2, Register rtmp, int vec_enc);
445 
446   void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
447                                      KRegister mask, bool merge, int vec_enc);
448 
449   void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
450 
451   void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
452                              XMMRegister xtmp2, Register rtmp, int vec_enc);
453 
454   void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
455                                        XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
456                                        KRegister ktmp, Register rtmp, bool merge, int vec_enc);
457 
458   void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
459                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
460 
461   void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
462                                             XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
463 
464   void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
465                                           XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
466 
467   void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
468                                            XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
469 
470   void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
471                                       XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
472 
473   void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
474 
475   void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
476 
477   void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
478                                         XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
479                                         Register rtmp, int vec_enc);
480 
481   void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
482                          XMMRegister xtmp1, Register rtmp, int vec_enc);
483 
484   void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
485                                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
486 
487   void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
488                          XMMRegister xtmp1, int vec_enc);
489 
490   void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
491                           KRegister ktmp1, int vec_enc);
492 
493   void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc);
494 
495   void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc);
496 
497   void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
498                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
499 
500   void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle,
501                                   XMMRegister src, int vlen_enc);
502 
503   void load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp);
504 
505 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP