1 /* 2 * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 // C2 compiled method's prolog code. 32 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); 33 34 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 35 36 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 37 // See full description in macroAssembler_x86.cpp. 38 void fast_lock(Register obj, Register box, Register tmp, 39 Register scr, Register cx1, Register cx2, Register thread, 40 RTMLockingCounters* rtm_counters, 41 RTMLockingCounters* stack_rtm_counters, 42 Metadata* method_data, 43 bool use_rtm, bool profile_rtm); 44 void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); 45 46 void fast_lock_lightweight(Register obj, Register box, Register rax_reg, 47 Register t, Register thread); 48 void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread); 49 50 #if INCLUDE_RTM_OPT 51 void rtm_counters_update(Register abort_status, Register rtm_counters); 52 void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); 53 void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, 54 RTMLockingCounters* rtm_counters, 55 Metadata* method_data); 56 void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, 57 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 58 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); 59 void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); 60 void rtm_stack_locking(Register obj, Register tmp, Register scr, 61 Register retry_on_abort_count, 62 RTMLockingCounters* stack_rtm_counters, 63 Metadata* method_data, bool profile_rtm, 64 Label& DONE_LABEL, Label& IsInflated); 65 void rtm_inflated_locking(Register obj, Register box, Register tmp, 66 Register scr, Register retry_on_busy_count, 67 Register retry_on_abort_count, 68 RTMLockingCounters* rtm_counters, 69 Metadata* method_data, bool profile_rtm, 70 Label& DONE_LABEL); 71 #endif 72 73 // Generic instructions support for use in .ad files C2 code generation 74 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); 75 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 76 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src); 77 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 78 79 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 80 XMMRegister tmp = xnoreg); 81 void vpminmax(int opcode, BasicType elem_bt, 82 XMMRegister dst, XMMRegister src1, XMMRegister src2, 83 int vlen_enc); 84 85 void vminmax_fp(int opcode, BasicType elem_bt, 86 XMMRegister dst, XMMRegister a, XMMRegister b, 87 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 88 int vlen_enc); 89 void evminmax_fp(int opcode, BasicType elem_bt, 90 XMMRegister dst, XMMRegister a, XMMRegister b, 91 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 92 int vlen_enc); 93 94 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one); 95 96 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, 97 bool merge, BasicType bt, int vec_enc); 98 99 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len); 100 101 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 102 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 103 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 104 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 105 106 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 107 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 108 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 109 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 110 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 111 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 112 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 113 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 114 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 115 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 116 117 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 118 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 119 120 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 121 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 122 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 123 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 124 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 125 126 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 127 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 128 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 129 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 130 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 131 132 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 133 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 134 135 // extract 136 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 137 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 138 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 139 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg); 140 void movsxl(BasicType typ, Register dst); 141 142 // vector test 143 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes); 144 145 // Covert B2X 146 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc); 147 #ifdef _LP64 148 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc); 149 #endif 150 151 // blend 152 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 153 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg); 154 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 155 156 void load_vector(XMMRegister dst, Address src, int vlen_in_bytes); 157 void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg); 158 159 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 160 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc); 161 162 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen); 163 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt); 164 165 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 166 167 // dst = src1 reduce(op, src2) using vtmp as temps 168 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 169 #ifdef _LP64 170 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 171 void genmask(KRegister dst, Register len, Register temp); 172 #endif // _LP64 173 174 // dst = reduce(op, src2) using vtmp as temps 175 void reduce_fp(int opcode, int vlen, 176 XMMRegister dst, XMMRegister src, 177 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 178 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 179 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 180 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 181 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 182 XMMRegister dst, XMMRegister src, 183 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 184 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 185 XMMRegister dst, XMMRegister src, 186 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 187 private: 188 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 189 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 190 191 // Int Reduction 192 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 195 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 196 197 // Byte Reduction 198 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 200 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 201 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 202 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 203 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 204 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 205 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 206 207 // Short Reduction 208 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 209 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 210 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 211 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 212 213 // Long Reduction 214 #ifdef _LP64 215 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 216 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 217 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 218 #endif // _LP64 219 220 // Float Reduction 221 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 222 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 223 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 224 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 225 226 // Double Reduction 227 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 228 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 229 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 230 231 // Base reduction instruction 232 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 233 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 234 235 public: 236 #ifdef _LP64 237 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); 238 239 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); 240 241 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, 242 Register tmp, int masklen, BasicType bt, int vec_enc); 243 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, 244 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); 245 #endif 246 247 void vector_maskall_operation(KRegister dst, Register src, int mask_len); 248 249 #ifndef _LP64 250 void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len); 251 #endif 252 253 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 254 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 255 256 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 257 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 258 259 // IndexOf strings. 260 // Small strings are loaded through stack if they cross page boundary. 261 void string_indexof(Register str1, Register str2, 262 Register cnt1, Register cnt2, 263 int int_cnt2, Register result, 264 XMMRegister vec, Register tmp, 265 int ae); 266 267 // IndexOf for constant substrings with size >= 8 elements 268 // which don't need to be loaded through stack. 269 void string_indexofC8(Register str1, Register str2, 270 Register cnt1, Register cnt2, 271 int int_cnt2, Register result, 272 XMMRegister vec, Register tmp, 273 int ae); 274 275 // Smallest code: we don't need to load through stack, 276 // check string tail. 277 278 // helper function for string_compare 279 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 280 Address::ScaleFactor scale, Address::ScaleFactor scale1, 281 Address::ScaleFactor scale2, Register index, int ae); 282 // Compare strings. 283 void string_compare(Register str1, Register str2, 284 Register cnt1, Register cnt2, Register result, 285 XMMRegister vec1, int ae, KRegister mask = knoreg); 286 287 // Search for Non-ASCII character (Negative byte value) in a byte array, 288 // return index of the first such character, otherwise len. 289 void count_positives(Register ary1, Register len, 290 Register result, Register tmp1, 291 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 292 // Compare char[] or byte[] arrays. 293 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, 294 Register limit, Register result, Register chr, 295 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); 296 297 void arrays_hashcode(Register str1, Register cnt1, Register result, 298 Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext, 299 XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3, 300 XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3, 301 XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, 302 BasicType eltype); 303 304 // helper functions for arrays_hashcode 305 int arrays_hashcode_elsize(BasicType eltype); 306 void arrays_hashcode_elload(Register dst, Address src, BasicType eltype); 307 void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype); 308 void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype); 309 void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype); 310 311 #ifdef _LP64 312 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src); 313 #endif 314 315 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 316 XMMRegister dst, XMMRegister src1, XMMRegister src2, 317 bool merge, int vlen_enc, bool is_varshift = false); 318 319 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 320 XMMRegister dst, XMMRegister src1, Address src2, 321 bool merge, int vlen_enc); 322 323 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, 324 XMMRegister src1, int imm8, bool merge, int vlen_enc); 325 326 void masked_op(int ideal_opc, int mask_len, KRegister dst, 327 KRegister src1, KRegister src2); 328 329 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 330 BasicType from_elem_bt, BasicType to_elem_bt); 331 332 void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 333 BasicType from_elem_bt, BasicType to_elem_bt); 334 335 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero, 336 XMMRegister xtmp, Register rscratch, int vec_enc); 337 338 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 339 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 340 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 341 342 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 343 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, 344 Register rscratch, int vec_enc); 345 346 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 347 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 348 Register rscratch, int vec_enc); 349 350 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 351 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip, 352 Register rscratch, int vec_enc); 353 354 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 355 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, 356 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 357 358 359 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 360 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch, 361 AddressLiteral float_sign_flip, int vec_enc); 362 363 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 364 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 365 int vec_enc); 366 367 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 368 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 369 int vec_enc); 370 371 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 372 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 373 int vec_enc); 374 375 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 376 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 377 int vec_enc); 378 379 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 380 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip, 381 int vec_enc); 382 383 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero, 384 XMMRegister xtmp, int index, int vec_enc); 385 386 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen); 387 388 #ifdef _LP64 389 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 390 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 391 392 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 393 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 394 395 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 396 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4); 397 #endif // _LP64 398 399 void udivI(Register rax, Register divisor, Register rdx); 400 void umodI(Register rax, Register divisor, Register rdx); 401 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); 402 403 #ifdef _LP64 404 void reverseI(Register dst, Register src, XMMRegister xtmp1, 405 XMMRegister xtmp2, Register rtmp); 406 void reverseL(Register dst, Register src, XMMRegister xtmp1, 407 XMMRegister xtmp2, Register rtmp1, Register rtmp2); 408 void udivL(Register rax, Register divisor, Register rdx); 409 void umodL(Register rax, Register divisor, Register rdx); 410 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); 411 #endif 412 413 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, 414 bool merge, BasicType bt, int vlen_enc); 415 416 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3, 417 bool merge, BasicType bt, int vlen_enc); 418 419 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 420 XMMRegister xtmp2, Register rtmp, int vec_enc); 421 422 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc, 423 XMMRegister xtmp, Register rscratch = noreg); 424 425 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc); 426 427 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 428 XMMRegister xtmp2, Register rtmp, int vec_enc); 429 430 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 431 XMMRegister xtmp2, Register rtmp, int vec_enc); 432 433 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 434 XMMRegister xtmp2, Register rtmp, int vec_enc); 435 436 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 437 XMMRegister xtmp2, Register rtmp, int vec_enc); 438 439 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 440 XMMRegister xtmp2, Register rtmp, int vec_enc); 441 442 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src, 443 KRegister mask, bool merge, int vec_enc); 444 445 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc); 446 447 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 448 XMMRegister xtmp2, Register rtmp, int vec_enc); 449 450 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, 451 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 452 KRegister ktmp, Register rtmp, bool merge, int vec_enc); 453 454 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 455 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 456 457 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 458 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 459 460 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 461 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc); 462 463 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 464 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 465 466 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 467 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 468 469 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 470 471 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 472 473 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 474 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp, 475 Register rtmp, int vec_enc); 476 477 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src, 478 XMMRegister xtmp1, Register rtmp, int vec_enc); 479 480 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 481 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 482 483 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 484 XMMRegister xtmp1, int vec_enc); 485 486 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 487 KRegister ktmp1, int vec_enc); 488 489 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc); 490 491 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc); 492 493 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, 494 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc); 495 496 void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle, 497 XMMRegister src, int vlen_enc); 498 499 void load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp); 500 501 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP