1 /* 2 * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 // C2 compiled method's prolog code. 32 void verified_entry(Compile* C, int sp_inc = 0); 33 34 void entry_barrier(); 35 void emit_entry_barrier_stub(C2EntryBarrierStub* stub); 36 static int entry_barrier_stub_size(); 37 38 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 39 40 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 41 // See full description in macroAssembler_x86.cpp. 42 void fast_lock(Register obj, Register box, Register tmp, 43 Register scr, Register cx1, Register cx2, 44 RTMLockingCounters* rtm_counters, 45 RTMLockingCounters* stack_rtm_counters, 46 Metadata* method_data, 47 bool use_rtm, bool profile_rtm); 48 void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); 49 50 #if INCLUDE_RTM_OPT 51 void rtm_counters_update(Register abort_status, Register rtm_counters); 52 void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); 53 void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, 54 RTMLockingCounters* rtm_counters, 55 Metadata* method_data); 56 void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, 57 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 58 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); 59 void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); 60 void rtm_stack_locking(Register obj, Register tmp, Register scr, 61 Register retry_on_abort_count, 62 RTMLockingCounters* stack_rtm_counters, 63 Metadata* method_data, bool profile_rtm, 64 Label& DONE_LABEL, Label& IsInflated); 65 void rtm_inflated_locking(Register obj, Register box, Register tmp, 66 Register scr, Register retry_on_busy_count, 67 Register retry_on_abort_count, 68 RTMLockingCounters* rtm_counters, 69 Metadata* method_data, bool profile_rtm, 70 Label& DONE_LABEL); 71 #endif 72 73 // Generic instructions support for use in .ad files C2 code generation 74 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); 75 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 76 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src); 77 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 78 79 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 80 XMMRegister tmp = xnoreg); 81 void vpminmax(int opcode, BasicType elem_bt, 82 XMMRegister dst, XMMRegister src1, XMMRegister src2, 83 int vlen_enc); 84 85 void vminmax_fp(int opcode, BasicType elem_bt, 86 XMMRegister dst, XMMRegister a, XMMRegister b, 87 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 88 int vlen_enc); 89 void evminmax_fp(int opcode, BasicType elem_bt, 90 XMMRegister dst, XMMRegister a, XMMRegister b, 91 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 92 int vlen_enc); 93 94 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one); 95 96 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, 97 bool merge, BasicType bt, int vec_enc); 98 99 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len); 100 101 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 102 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 103 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 104 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 105 106 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 107 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 108 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 109 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 110 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 111 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 112 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 113 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 114 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 115 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 116 117 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 118 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 119 120 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 121 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 122 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 123 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 124 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 125 126 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 127 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 128 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 129 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 130 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 131 132 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 133 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 134 135 // extract 136 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 137 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 138 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 139 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg); 140 141 // vector test 142 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes); 143 144 // Covert B2X 145 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc); 146 #ifdef _LP64 147 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc); 148 #endif 149 150 // blend 151 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 152 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg); 153 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 154 155 void load_vector(XMMRegister dst, Address src, int vlen_in_bytes); 156 void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg); 157 158 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 159 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc); 160 161 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen); 162 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt); 163 164 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 165 166 // dst = src1 reduce(op, src2) using vtmp as temps 167 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 168 #ifdef _LP64 169 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 170 void genmask(KRegister dst, Register len, Register temp); 171 #endif // _LP64 172 173 // dst = reduce(op, src2) using vtmp as temps 174 void reduce_fp(int opcode, int vlen, 175 XMMRegister dst, XMMRegister src, 176 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 177 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 178 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 179 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 180 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 181 XMMRegister dst, XMMRegister src, 182 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 183 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 184 XMMRegister dst, XMMRegister src, 185 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 186 private: 187 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 188 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 189 190 // Int Reduction 191 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 192 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 195 196 // Byte Reduction 197 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 198 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 200 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 201 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 202 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 203 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 204 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 205 206 // Short Reduction 207 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 208 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 209 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 210 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 211 212 // Long Reduction 213 #ifdef _LP64 214 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 215 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 216 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 217 #endif // _LP64 218 219 // Float Reduction 220 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 221 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 222 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 223 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 224 225 // Double Reduction 226 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 227 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 228 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 229 230 // Base reduction instruction 231 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 232 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 233 234 public: 235 #ifdef _LP64 236 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); 237 238 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); 239 240 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, 241 Register tmp, int masklen, BasicType bt, int vec_enc); 242 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, 243 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); 244 #endif 245 246 void vector_maskall_operation(KRegister dst, Register src, int mask_len); 247 248 #ifndef _LP64 249 void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len); 250 #endif 251 252 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 253 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 254 255 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 256 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 257 258 // IndexOf strings. 259 // Small strings are loaded through stack if they cross page boundary. 260 void string_indexof(Register str1, Register str2, 261 Register cnt1, Register cnt2, 262 int int_cnt2, Register result, 263 XMMRegister vec, Register tmp, 264 int ae); 265 266 // IndexOf for constant substrings with size >= 8 elements 267 // which don't need to be loaded through stack. 268 void string_indexofC8(Register str1, Register str2, 269 Register cnt1, Register cnt2, 270 int int_cnt2, Register result, 271 XMMRegister vec, Register tmp, 272 int ae); 273 274 // Smallest code: we don't need to load through stack, 275 // check string tail. 276 277 // helper function for string_compare 278 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 279 Address::ScaleFactor scale, Address::ScaleFactor scale1, 280 Address::ScaleFactor scale2, Register index, int ae); 281 // Compare strings. 282 void string_compare(Register str1, Register str2, 283 Register cnt1, Register cnt2, Register result, 284 XMMRegister vec1, int ae, KRegister mask = knoreg); 285 286 // Search for Non-ASCII character (Negative byte value) in a byte array, 287 // return index of the first such character, otherwise len. 288 void count_positives(Register ary1, Register len, 289 Register result, Register tmp1, 290 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 291 // Compare char[] or byte[] arrays. 292 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, 293 Register limit, Register result, Register chr, 294 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); 295 296 297 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 298 XMMRegister dst, XMMRegister src1, XMMRegister src2, 299 bool merge, int vlen_enc, bool is_varshift = false); 300 301 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 302 XMMRegister dst, XMMRegister src1, Address src2, 303 bool merge, int vlen_enc); 304 305 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, 306 XMMRegister src1, int imm8, bool merge, int vlen_enc); 307 308 void masked_op(int ideal_opc, int mask_len, KRegister dst, 309 KRegister src1, KRegister src2); 310 311 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 312 BasicType from_elem_bt, BasicType to_elem_bt); 313 314 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero, 315 XMMRegister xtmp, Register rscratch, int vec_enc); 316 317 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 318 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 319 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 320 321 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 322 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, 323 Register rscratch, int vec_enc); 324 325 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 326 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 327 Register rscratch, int vec_enc); 328 329 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 330 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip, 331 Register rscratch, int vec_enc); 332 333 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 334 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, 335 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 336 337 338 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 339 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch, 340 AddressLiteral float_sign_flip, int vec_enc); 341 342 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 343 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 344 int vec_enc); 345 346 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 347 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 348 int vec_enc); 349 350 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 351 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 352 int vec_enc); 353 354 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 355 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 356 int vec_enc); 357 358 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 359 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip, 360 int vec_enc); 361 362 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero, 363 XMMRegister xtmp, int index, int vec_enc); 364 365 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen); 366 367 #ifdef _LP64 368 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 369 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 370 371 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 372 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 373 374 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 375 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4); 376 #endif // _LP64 377 378 void udivI(Register rax, Register divisor, Register rdx); 379 void umodI(Register rax, Register divisor, Register rdx); 380 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); 381 382 #ifdef _LP64 383 void reverseI(Register dst, Register src, XMMRegister xtmp1, 384 XMMRegister xtmp2, Register rtmp); 385 void reverseL(Register dst, Register src, XMMRegister xtmp1, 386 XMMRegister xtmp2, Register rtmp1, Register rtmp2); 387 void udivL(Register rax, Register divisor, Register rdx); 388 void umodL(Register rax, Register divisor, Register rdx); 389 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); 390 #endif 391 392 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, 393 bool merge, BasicType bt, int vlen_enc); 394 395 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3, 396 bool merge, BasicType bt, int vlen_enc); 397 398 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 399 XMMRegister xtmp2, Register rtmp, int vec_enc); 400 401 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc, 402 XMMRegister xtmp, Register rscratch = noreg); 403 404 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc); 405 406 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 407 XMMRegister xtmp2, Register rtmp, int vec_enc); 408 409 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 410 XMMRegister xtmp2, Register rtmp, int vec_enc); 411 412 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 413 XMMRegister xtmp2, Register rtmp, int vec_enc); 414 415 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 416 XMMRegister xtmp2, Register rtmp, int vec_enc); 417 418 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 419 XMMRegister xtmp2, Register rtmp, int vec_enc); 420 421 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src, 422 KRegister mask, bool merge, int vec_enc); 423 424 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc); 425 426 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 427 XMMRegister xtmp2, Register rtmp, int vec_enc); 428 429 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, 430 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 431 KRegister ktmp, Register rtmp, bool merge, int vec_enc); 432 433 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 434 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 435 436 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 437 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 438 439 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 440 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc); 441 442 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 443 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 444 445 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 446 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 447 448 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 449 450 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 451 452 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 453 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp, 454 Register rtmp, int vec_enc); 455 456 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src, 457 XMMRegister xtmp1, Register rtmp, int vec_enc); 458 459 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 460 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 461 462 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 463 XMMRegister xtmp1, int vec_enc); 464 465 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 466 KRegister ktmp1, int vec_enc); 467 468 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc); 469 470 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc); 471 472 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, 473 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc); 474 475 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP