1 /* 2 * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 // C2 compiled method's prolog code. 32 void verified_entry(Compile* C, int sp_inc = 0); 33 34 void entry_barrier(); 35 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 36 37 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 38 // See full description in macroAssembler_x86.cpp. 39 void fast_lock(Register obj, Register box, Register tmp, 40 Register scr, Register cx1, Register cx2, Register thread, 41 Metadata* method_data); 42 void fast_unlock(Register obj, Register box, Register tmp); 43 44 void fast_lock_lightweight(Register obj, Register box, Register rax_reg, 45 Register t, Register thread); 46 void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread); 47 48 void verify_int_in_range(uint idx, const TypeInt* t, Register val); 49 void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp); 50 51 // Generic instructions support for use in .ad files C2 code generation 52 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); 53 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 54 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src); 55 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 56 57 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 58 XMMRegister tmp = xnoreg); 59 void vpminmax(int opcode, BasicType elem_bt, 60 XMMRegister dst, XMMRegister src1, XMMRegister src2, 61 int vlen_enc); 62 63 void vpuminmax(int opcode, BasicType elem_bt, 64 XMMRegister dst, XMMRegister src1, XMMRegister src2, 65 int vlen_enc); 66 67 void vpuminmax(int opcode, BasicType elem_bt, 68 XMMRegister dst, XMMRegister src1, Address src2, 69 int vlen_enc); 70 71 void vminmax_fp(int opcode, BasicType elem_bt, 72 XMMRegister dst, XMMRegister a, XMMRegister b, 73 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 74 int vlen_enc); 75 76 void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, 77 XMMRegister src1, XMMRegister src2, int vlen_enc); 78 79 void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 80 81 void evminmax_fp(int opcode, BasicType elem_bt, 82 XMMRegister dst, XMMRegister a, XMMRegister b, 83 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 84 int vlen_enc); 85 86 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one); 87 88 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, 89 bool merge, BasicType bt, int vec_enc); 90 91 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len); 92 93 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 94 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 95 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 96 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 97 98 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 99 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 100 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 101 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 102 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 103 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 104 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 105 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 106 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 107 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 108 109 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 110 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 111 112 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 113 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 114 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 115 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 116 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 117 118 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 119 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 120 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 121 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 122 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 123 124 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 125 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 126 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len); 127 128 // extract 129 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 130 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 131 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 132 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg); 133 void movsxl(BasicType typ, Register dst); 134 135 // vector test 136 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes); 137 138 // Covert B2X 139 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc); 140 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc); 141 142 // blend 143 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 144 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg); 145 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 146 147 void load_vector(BasicType bt, XMMRegister dst, Address src, int vlen_in_bytes); 148 void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg); 149 150 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 151 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc); 152 153 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen); 154 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt); 155 156 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 157 158 // dst = src1 reduce(op, src2) using vtmp as temps 159 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 160 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 161 void genmask(KRegister dst, Register len, Register temp); 162 163 // dst = reduce(op, src2) using vtmp as temps 164 void reduce_fp(int opcode, int vlen, 165 XMMRegister dst, XMMRegister src, 166 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 167 void unordered_reduce_fp(int opcode, int vlen, 168 XMMRegister dst, XMMRegister src, 169 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg); 170 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 171 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 172 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 173 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 174 XMMRegister dst, XMMRegister src, 175 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 176 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 177 XMMRegister dst, XMMRegister src, 178 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 179 private: 180 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 181 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 182 void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 183 void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 184 185 // Int Reduction 186 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 187 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 188 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 189 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 190 191 // Byte Reduction 192 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 195 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 196 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 197 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 198 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 200 201 // Short Reduction 202 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 203 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 204 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 205 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 206 207 // Long Reduction 208 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 209 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 210 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 211 212 // Float Reduction 213 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 214 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 215 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 216 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 217 218 // Unordered Float Reduction 219 void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src); 220 void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 221 void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 222 void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 223 224 // Double Reduction 225 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 226 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 227 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 228 229 // Unordered Double Reduction 230 void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src); 231 void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 232 void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 233 234 // Base reduction instruction 235 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 236 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 237 void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 238 void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 239 240 public: 241 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); 242 243 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); 244 245 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, 246 Register tmp, int masklen, BasicType bt, int vec_enc); 247 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, 248 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); 249 250 void vector_maskall_operation(KRegister dst, Register src, int mask_len); 251 252 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 253 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 254 255 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 256 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 257 258 // IndexOf strings. 259 // Small strings are loaded through stack if they cross page boundary. 260 void string_indexof(Register str1, Register str2, 261 Register cnt1, Register cnt2, 262 int int_cnt2, Register result, 263 XMMRegister vec, Register tmp, 264 int ae); 265 266 // IndexOf for constant substrings with size >= 8 elements 267 // which don't need to be loaded through stack. 268 void string_indexofC8(Register str1, Register str2, 269 Register cnt1, Register cnt2, 270 int int_cnt2, Register result, 271 XMMRegister vec, Register tmp, 272 int ae); 273 274 // Smallest code: we don't need to load through stack, 275 // check string tail. 276 277 // helper function for string_compare 278 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 279 Address::ScaleFactor scale, Address::ScaleFactor scale1, 280 Address::ScaleFactor scale2, Register index, int ae); 281 // Compare strings. 282 void string_compare(Register str1, Register str2, 283 Register cnt1, Register cnt2, Register result, 284 XMMRegister vec1, int ae, KRegister mask = knoreg); 285 286 // Search for Non-ASCII character (Negative byte value) in a byte array, 287 // return index of the first such character, otherwise len. 288 void count_positives(Register ary1, Register len, 289 Register result, Register tmp1, 290 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 291 292 // Compare char[] or byte[] arrays. 293 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, 294 Register result, Register chr, XMMRegister vec1, XMMRegister vec2, 295 bool is_char, KRegister mask = knoreg, bool expand_ary2 = false); 296 297 void arrays_hashcode(Register str1, Register cnt1, Register result, 298 Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext, 299 XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3, 300 XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3, 301 XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, 302 BasicType eltype); 303 304 // helper functions for arrays_hashcode 305 int arrays_hashcode_elsize(BasicType eltype); 306 void arrays_hashcode_elload(Register dst, Address src, BasicType eltype); 307 void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype); 308 void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype); 309 void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype); 310 311 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src); 312 313 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 314 XMMRegister dst, XMMRegister src1, XMMRegister src2, 315 bool merge, int vlen_enc, bool is_varshift = false); 316 317 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 318 XMMRegister dst, XMMRegister src1, Address src2, 319 bool merge, int vlen_enc); 320 321 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, 322 XMMRegister src1, int imm8, bool merge, int vlen_enc); 323 324 void masked_op(int ideal_opc, int mask_len, KRegister dst, 325 KRegister src1, KRegister src2); 326 327 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 328 BasicType from_elem_bt, BasicType to_elem_bt); 329 330 void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 331 BasicType from_elem_bt, BasicType to_elem_bt); 332 333 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero, 334 XMMRegister xtmp, Register rscratch, int vec_enc); 335 336 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 337 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 338 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 339 340 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 341 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, 342 Register rscratch, int vec_enc); 343 344 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 345 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 346 Register rscratch, int vec_enc); 347 348 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 349 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip, 350 Register rscratch, int vec_enc); 351 352 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 353 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, 354 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 355 356 357 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 358 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch, 359 AddressLiteral float_sign_flip, int vec_enc); 360 361 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 362 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 363 int vec_enc); 364 365 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 366 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 367 int vec_enc); 368 369 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 370 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 371 int vec_enc); 372 373 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 374 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 375 int vec_enc); 376 377 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 378 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip, 379 int vec_enc); 380 381 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero, 382 XMMRegister xtmp, int index, int vec_enc); 383 384 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen); 385 386 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 387 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 388 389 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 390 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 391 392 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 393 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4); 394 395 void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask, 396 Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp, 397 BasicType bt, int vec_enc); 398 399 void udivI(Register rax, Register divisor, Register rdx); 400 void umodI(Register rax, Register divisor, Register rdx); 401 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); 402 403 void reverseI(Register dst, Register src, XMMRegister xtmp1, 404 XMMRegister xtmp2, Register rtmp); 405 void reverseL(Register dst, Register src, XMMRegister xtmp1, 406 XMMRegister xtmp2, Register rtmp1, Register rtmp2); 407 void udivL(Register rax, Register divisor, Register rdx); 408 void umodL(Register rax, Register divisor, Register rdx); 409 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); 410 411 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, 412 bool merge, BasicType bt, int vlen_enc); 413 414 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3, 415 bool merge, BasicType bt, int vlen_enc); 416 417 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 418 XMMRegister xtmp2, Register rtmp, int vec_enc); 419 420 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc, 421 XMMRegister xtmp, Register rscratch = noreg); 422 423 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc); 424 425 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 426 XMMRegister xtmp2, Register rtmp, int vec_enc); 427 428 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 429 XMMRegister xtmp2, Register rtmp, int vec_enc); 430 431 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 432 XMMRegister xtmp2, Register rtmp, int vec_enc); 433 434 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 435 XMMRegister xtmp2, Register rtmp, int vec_enc); 436 437 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 438 XMMRegister xtmp2, Register rtmp, int vec_enc); 439 440 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src, 441 KRegister mask, bool merge, int vec_enc); 442 443 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc); 444 445 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 446 XMMRegister xtmp2, Register rtmp, int vec_enc); 447 448 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, 449 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 450 KRegister ktmp, Register rtmp, bool merge, int vec_enc); 451 452 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 453 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 454 455 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 456 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 457 458 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 459 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc); 460 461 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 462 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 463 464 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 465 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 466 467 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 468 469 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 470 471 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 472 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp, 473 Register rtmp, int vec_enc); 474 475 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src, 476 XMMRegister xtmp1, Register rtmp, int vec_enc); 477 478 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 479 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 480 481 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 482 XMMRegister xtmp1, int vec_enc); 483 484 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 485 KRegister ktmp1, int vec_enc); 486 487 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc); 488 489 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc); 490 491 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, 492 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc); 493 494 void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle, 495 XMMRegister src, int vlen_enc); 496 497 void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 498 499 void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset, 500 Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, 501 Register midx, Register length, int vector_len, int vlen_enc); 502 503 void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, 504 Register offset, Register mask, Register midx, Register rtmp, int vlen_enc); 505 506 void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, 507 Register offset, Register rtmp, int vlen_enc); 508 509 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc); 510 511 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc); 512 513 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 514 515 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 516 517 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 518 519 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 520 521 void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc); 522 523 void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 524 XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 525 526 void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 527 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc); 528 529 void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 530 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc); 531 532 void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 533 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc); 534 535 void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 536 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc); 537 538 void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false); 539 540 void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false); 541 542 void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc); 543 544 void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false); 545 546 void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false); 547 548 void evpcmpu(BasicType etype, KRegister kmask, XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc); 549 550 void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 551 552 void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 553 int vlen_enc, bool xtmp2_hold_M1 = false); 554 555 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, 556 bool is_unsigned, bool merge, int vlen_enc); 557 558 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2, 559 bool is_unsigned, bool merge, int vlen_enc); 560 561 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, 562 bool merge, int vlen_enc); 563 564 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2, 565 bool merge, int vlen_enc); 566 567 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, 568 XMMRegister src2, bool merge, int vlen_enc); 569 570 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, 571 Address src2, bool merge, int vlen_enc); 572 573 void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 574 575 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 576 577 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 578 579 void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, 580 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 581 582 void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, 583 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2); 584 585 void reconstruct_frame_pointer(Register rtmp); 586 587 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP