1 /* 2 * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 // C2 compiled method's prolog code. 32 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); 33 34 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 35 36 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 37 // See full description in macroAssembler_x86.cpp. 38 void fast_lock(Register obj, Register box, Register tmp, 39 Register scr, Register cx1, Register cx2, Register thread, 40 Metadata* method_data); 41 void fast_unlock(Register obj, Register box, Register tmp); 42 43 void fast_lock_lightweight(Register obj, Register box, Register rax_reg, 44 Register t, Register thread); 45 void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread); 46 47 void verify_int_in_range(uint idx, const TypeInt* t, Register val); 48 void verify_long_in_range(uint idx, const TypeLong* t, Register val, Register tmp); 49 50 // Generic instructions support for use in .ad files C2 code generation 51 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src); 52 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 53 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src); 54 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len); 55 56 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 57 XMMRegister tmp = xnoreg); 58 void vpminmax(int opcode, BasicType elem_bt, 59 XMMRegister dst, XMMRegister src1, XMMRegister src2, 60 int vlen_enc); 61 62 void vpuminmax(int opcode, BasicType elem_bt, 63 XMMRegister dst, XMMRegister src1, XMMRegister src2, 64 int vlen_enc); 65 66 void vpuminmax(int opcode, BasicType elem_bt, 67 XMMRegister dst, XMMRegister src1, Address src2, 68 int vlen_enc); 69 70 void vminmax_fp(int opcode, BasicType elem_bt, 71 XMMRegister dst, XMMRegister a, XMMRegister b, 72 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 73 int vlen_enc); 74 75 void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, 76 XMMRegister src1, XMMRegister src2, int vlen_enc); 77 78 void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 79 80 void evminmax_fp(int opcode, BasicType elem_bt, 81 XMMRegister dst, XMMRegister a, XMMRegister b, 82 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 83 int vlen_enc); 84 85 void signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one); 86 87 void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask, 88 bool merge, BasicType bt, int vec_enc); 89 90 void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len); 91 92 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 93 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 94 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 95 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 96 97 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 98 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 99 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 100 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 101 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 102 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 103 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 104 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 105 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 106 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 107 108 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 109 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 110 111 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 112 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 113 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 114 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 115 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp); 116 117 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 118 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 119 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 120 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 121 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 122 123 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len); 124 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len); 125 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, XMMRegister src, bool merge, int vector_len); 126 127 // extract 128 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 129 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 130 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 131 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, XMMRegister vtmp = xnoreg); 132 void movsxl(BasicType typ, Register dst); 133 134 // vector test 135 void vectortest(BasicType bt, XMMRegister src1, XMMRegister src2, XMMRegister vtmp, int vlen_in_bytes); 136 137 // Covert B2X 138 void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc); 139 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc); 140 141 // blend 142 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 143 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral src2, int comparison, int vector_len, Register rscratch = noreg); 144 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 145 146 void load_vector(BasicType bt, XMMRegister dst, Address src, int vlen_in_bytes); 147 void load_vector(BasicType bt, XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = noreg); 148 149 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 150 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, bool novlbwdq, int vlen_enc); 151 152 void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen); 153 void load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt); 154 155 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 156 157 // dst = src1 reduce(op, src2) using vtmp as temps 158 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 159 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 160 void genmask(KRegister dst, Register len, Register temp); 161 162 // dst = reduce(op, src2) using vtmp as temps 163 void reduce_fp(int opcode, int vlen, 164 XMMRegister dst, XMMRegister src, 165 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 166 void unordered_reduce_fp(int opcode, int vlen, 167 XMMRegister dst, XMMRegister src, 168 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg); 169 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 170 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 171 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 172 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 173 XMMRegister dst, XMMRegister src, 174 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 175 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 176 XMMRegister dst, XMMRegister src, 177 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 178 private: 179 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 180 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 181 void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 182 void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 183 184 // Int Reduction 185 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 186 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 187 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 188 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 189 190 // Byte Reduction 191 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 192 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 195 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 196 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 197 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 198 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 200 // Short Reduction 201 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 202 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 203 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 204 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 205 206 // Long Reduction 207 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 208 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 209 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 210 211 // Float Reduction 212 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 213 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 214 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 215 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 216 217 // Unordered Float Reduction 218 void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src); 219 void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 220 void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 221 void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 222 223 // Double Reduction 224 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 225 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 226 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 227 228 // Unordered Double Reduction 229 void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src); 230 void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 231 void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 232 233 // Base reduction instruction 234 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 235 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 236 void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 237 void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 238 239 public: 240 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); 241 242 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); 243 244 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, 245 Register tmp, int masklen, BasicType bt, int vec_enc); 246 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, 247 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); 248 249 void vector_maskall_operation(KRegister dst, Register src, int mask_len); 250 251 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 252 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 253 254 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 255 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 256 257 // IndexOf strings. 258 // Small strings are loaded through stack if they cross page boundary. 259 void string_indexof(Register str1, Register str2, 260 Register cnt1, Register cnt2, 261 int int_cnt2, Register result, 262 XMMRegister vec, Register tmp, 263 int ae); 264 265 // IndexOf for constant substrings with size >= 8 elements 266 // which don't need to be loaded through stack. 267 void string_indexofC8(Register str1, Register str2, 268 Register cnt1, Register cnt2, 269 int int_cnt2, Register result, 270 XMMRegister vec, Register tmp, 271 int ae); 272 273 // Smallest code: we don't need to load through stack, 274 // check string tail. 275 276 // helper function for string_compare 277 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 278 Address::ScaleFactor scale, Address::ScaleFactor scale1, 279 Address::ScaleFactor scale2, Register index, int ae); 280 // Compare strings. 281 void string_compare(Register str1, Register str2, 282 Register cnt1, Register cnt2, Register result, 283 XMMRegister vec1, int ae, KRegister mask = knoreg); 284 285 // Search for Non-ASCII character (Negative byte value) in a byte array, 286 // return index of the first such character, otherwise len. 287 void count_positives(Register ary1, Register len, 288 Register result, Register tmp1, 289 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 290 291 // Compare char[] or byte[] arrays. 292 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, 293 Register result, Register chr, XMMRegister vec1, XMMRegister vec2, 294 bool is_char, KRegister mask = knoreg, bool expand_ary2 = false); 295 296 void arrays_hashcode(Register str1, Register cnt1, Register result, 297 Register tmp1, Register tmp2, Register tmp3, XMMRegister vnext, 298 XMMRegister vcoef0, XMMRegister vcoef1, XMMRegister vcoef2, XMMRegister vcoef3, 299 XMMRegister vresult0, XMMRegister vresult1, XMMRegister vresult2, XMMRegister vresult3, 300 XMMRegister vtmp0, XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, 301 BasicType eltype); 302 303 // helper functions for arrays_hashcode 304 int arrays_hashcode_elsize(BasicType eltype); 305 void arrays_hashcode_elload(Register dst, Address src, BasicType eltype); 306 void arrays_hashcode_elvload(XMMRegister dst, Address src, BasicType eltype); 307 void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype); 308 void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype); 309 310 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src); 311 312 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 313 XMMRegister dst, XMMRegister src1, XMMRegister src2, 314 bool merge, int vlen_enc, bool is_varshift = false); 315 316 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 317 XMMRegister dst, XMMRegister src1, Address src2, 318 bool merge, int vlen_enc); 319 320 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, 321 XMMRegister src1, int imm8, bool merge, int vlen_enc); 322 323 void masked_op(int ideal_opc, int mask_len, KRegister dst, 324 KRegister src1, KRegister src2); 325 326 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 327 BasicType from_elem_bt, BasicType to_elem_bt); 328 329 void vector_signed_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 330 BasicType from_elem_bt, BasicType to_elem_bt); 331 332 void vector_cast_int_to_subword(BasicType to_elem_bt, XMMRegister dst, XMMRegister zero, 333 XMMRegister xtmp, Register rscratch, int vec_enc); 334 335 void vector_castF2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 336 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 337 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 338 339 void vector_castF2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 340 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, 341 Register rscratch, int vec_enc); 342 343 void vector_castF2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 344 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 345 Register rscratch, int vec_enc); 346 347 void vector_castD2X_evex(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 348 XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, AddressLiteral sign_flip, 349 Register rscratch, int vec_enc); 350 351 void vector_castD2X_avx(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 352 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, 353 AddressLiteral float_sign_flip, Register rscratch, int vec_enc); 354 355 356 void vector_cast_double_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 357 XMMRegister xtmp3, XMMRegister xtmp4, XMMRegister xtmp5, Register rscratch, 358 AddressLiteral float_sign_flip, int vec_enc); 359 360 void vector_cast_double_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 361 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 362 int vec_enc); 363 364 void vector_cast_double_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 365 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 366 int vec_enc); 367 368 void vector_cast_float_to_int_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 369 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral float_sign_flip, 370 int vec_enc); 371 372 void vector_cast_float_to_long_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 373 KRegister ktmp1, KRegister ktmp2, Register rscratch, AddressLiteral double_sign_flip, 374 int vec_enc); 375 376 void vector_cast_float_to_int_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 377 XMMRegister xtmp4, Register rscratch, AddressLiteral float_sign_flip, 378 int vec_enc); 379 380 void vector_crosslane_doubleword_pack_avx(XMMRegister dst, XMMRegister src, XMMRegister zero, 381 XMMRegister xtmp, int index, int vec_enc); 382 383 void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen); 384 385 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 386 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 387 388 void vector_round_float_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 389 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); 390 391 void vector_round_float_avx(XMMRegister dst, XMMRegister src, AddressLiteral float_sign_flip, AddressLiteral new_mxcsr, int vec_enc, 392 Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4); 393 394 void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask, 395 Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp, 396 BasicType bt, int vec_enc); 397 398 void udivI(Register rax, Register divisor, Register rdx); 399 void umodI(Register rax, Register divisor, Register rdx); 400 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); 401 402 void reverseI(Register dst, Register src, XMMRegister xtmp1, 403 XMMRegister xtmp2, Register rtmp); 404 void reverseL(Register dst, Register src, XMMRegister xtmp1, 405 XMMRegister xtmp2, Register rtmp1, Register rtmp2); 406 void udivL(Register rax, Register divisor, Register rdx); 407 void umodL(Register rax, Register divisor, Register rdx); 408 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); 409 410 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, 411 bool merge, BasicType bt, int vlen_enc); 412 413 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3, 414 bool merge, BasicType bt, int vlen_enc); 415 416 void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 417 XMMRegister xtmp2, Register rtmp, int vec_enc); 418 419 void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, AddressLiteral mask, int vec_enc, 420 XMMRegister xtmp, Register rscratch = noreg); 421 422 void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, int vec_enc); 423 424 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 425 XMMRegister xtmp2, Register rtmp, int vec_enc); 426 427 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 428 XMMRegister xtmp2, Register rtmp, int vec_enc); 429 430 void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 431 XMMRegister xtmp2, Register rtmp, int vec_enc); 432 433 void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 434 XMMRegister xtmp2, Register rtmp, int vec_enc); 435 436 void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 437 XMMRegister xtmp2, Register rtmp, int vec_enc); 438 439 void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src, 440 KRegister mask, bool merge, int vec_enc); 441 442 void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc); 443 444 void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 445 XMMRegister xtmp2, Register rtmp, int vec_enc); 446 447 void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, 448 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, 449 KRegister ktmp, Register rtmp, bool merge, int vec_enc); 450 451 void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 452 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 453 454 void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 455 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 456 457 void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 458 XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc); 459 460 void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 461 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 462 463 void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 464 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 465 466 void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 467 468 void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc); 469 470 void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 471 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp, 472 Register rtmp, int vec_enc); 473 474 void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src, 475 XMMRegister xtmp1, Register rtmp, int vec_enc); 476 477 void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 478 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc); 479 480 void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 481 XMMRegister xtmp1, int vec_enc); 482 483 void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, 484 KRegister ktmp1, int vec_enc); 485 486 void vmovmask(BasicType elem_bt, XMMRegister dst, Address src, XMMRegister mask, int vec_enc); 487 488 void vmovmask(BasicType elem_bt, Address dst, XMMRegister src, XMMRegister mask, int vec_enc); 489 490 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, 491 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc); 492 493 void vector_rearrange_int_float(BasicType bt, XMMRegister dst, XMMRegister shuffle, 494 XMMRegister src, int vlen_enc); 495 496 void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 497 498 void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset, 499 Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, 500 Register midx, Register length, int vector_len, int vlen_enc); 501 502 void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, 503 Register offset, Register mask, Register midx, Register rtmp, int vlen_enc); 504 505 void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, 506 Register offset, Register rtmp, int vlen_enc); 507 508 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc); 509 510 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, bool is_unsigned, int vlen_enc); 511 512 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 513 514 void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 515 516 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 517 518 void vector_saturating_unsigned_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 519 520 void vector_sub_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, KRegister ktmp, int vlen_enc); 521 522 void vector_sub_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 523 XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 524 525 void vector_add_dq_saturating_unsigned_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 526 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp, int vlen_enc); 527 528 void vector_add_dq_saturating_unsigned_avx(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 529 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, int vlen_enc); 530 531 void vector_addsub_dq_saturating_avx(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 532 XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, int vlen_enc); 533 534 void vector_addsub_dq_saturating_evex(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, 535 XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2, int vlen_enc); 536 537 void evpmovd2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false); 538 539 void evpmovq2m_emu(KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc, bool xtmp2_hold_M1 = false); 540 541 void vpsign_extend_dq(BasicType etype, XMMRegister dst, XMMRegister src, int vlen_enc); 542 543 void vpgenmin_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false); 544 545 void vpgenmax_value(BasicType etype, XMMRegister dst, XMMRegister allones, int vlen_enc, bool compute_allones = false); 546 547 void evpcmpu(BasicType etype, KRegister kmask, XMMRegister src1, XMMRegister src2, Assembler::ComparisonPredicate cond, int vlen_enc); 548 549 void vpcmpgt(BasicType etype, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 550 551 void evpmov_vec_to_mask(BasicType etype, KRegister ktmp, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 552 int vlen_enc, bool xtmp2_hold_M1 = false); 553 554 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, 555 bool is_unsigned, bool merge, int vlen_enc); 556 557 void evmasked_saturating_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2, 558 bool is_unsigned, bool merge, int vlen_enc); 559 560 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, 561 bool merge, int vlen_enc); 562 563 void evmasked_saturating_signed_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, Address src2, 564 bool merge, int vlen_enc); 565 566 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, 567 XMMRegister src2, bool merge, int vlen_enc); 568 569 void evmasked_saturating_unsigned_op(int ideal_opc, BasicType elem_bt, KRegister mask, XMMRegister dst, XMMRegister src1, 570 Address src2, bool merge, int vlen_enc); 571 572 void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 573 574 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); 575 576 void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); 577 578 void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, 579 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); 580 581 void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, 582 KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2); 583 584 void reconstruct_frame_pointer(Register rtmp); 585 586 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP