1 /* 2 * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 32 33 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 34 // See full description in macroAssembler_x86.cpp. 35 void fast_lock(Register obj, Register box, Register tmp, 36 Register scr, Register cx1, Register cx2, 37 RTMLockingCounters* rtm_counters, 38 RTMLockingCounters* stack_rtm_counters, 39 Metadata* method_data, 40 bool use_rtm, bool profile_rtm); 41 void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); 42 43 #if INCLUDE_RTM_OPT 44 void rtm_counters_update(Register abort_status, Register rtm_counters); 45 void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); 46 void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, 47 RTMLockingCounters* rtm_counters, 48 Metadata* method_data); 49 void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, 50 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 51 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); 52 void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); 53 void rtm_stack_locking(Register obj, Register tmp, Register scr, 54 Register retry_on_abort_count, 55 RTMLockingCounters* stack_rtm_counters, 56 Metadata* method_data, bool profile_rtm, 57 Label& DONE_LABEL, Label& IsInflated); 58 void rtm_inflated_locking(Register obj, Register box, Register tmp, 59 Register scr, Register retry_on_busy_count, 60 Register retry_on_abort_count, 61 RTMLockingCounters* rtm_counters, 62 Metadata* method_data, bool profile_rtm, 63 Label& DONE_LABEL); 64 #endif 65 66 // Generic instructions support for use in .ad files C2 code generation 67 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr); 68 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); 69 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr); 70 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); 71 72 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 73 XMMRegister tmp = xnoreg); 74 void vpminmax(int opcode, BasicType elem_bt, 75 XMMRegister dst, XMMRegister src1, XMMRegister src2, 76 int vlen_enc); 77 78 void vminmax_fp(int opcode, BasicType elem_bt, 79 XMMRegister dst, XMMRegister a, XMMRegister b, 80 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 81 int vlen_enc); 82 void evminmax_fp(int opcode, BasicType elem_bt, 83 XMMRegister dst, XMMRegister a, XMMRegister b, 84 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 85 int vlen_enc); 86 87 void signum_fp(int opcode, XMMRegister dst, 88 XMMRegister zero, XMMRegister one, 89 Register scratch); 90 91 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 92 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 93 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 94 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 95 96 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 97 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 98 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 99 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 100 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 101 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 102 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 103 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 104 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 105 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 106 107 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 108 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 109 110 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 111 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 112 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 113 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); 114 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); 115 116 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 117 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 118 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 119 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 120 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 121 122 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); 123 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); 124 125 // extract 126 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 127 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 128 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 129 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg); 130 131 // vector test 132 void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, 133 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg); 134 135 // blend 136 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1); 137 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 138 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 139 140 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 141 void load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, Register tmp, bool novlbwdq, int vlen_enc); 142 143 void load_vector(XMMRegister dst, Address src, int vlen_in_bytes); 144 void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1); 145 void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); 146 147 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 148 149 // dst = src1 reduce(op, src2) using vtmp as temps 150 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 151 #ifdef _LP64 152 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 153 void genmask(KRegister dst, Register len, Register temp); 154 #endif // _LP64 155 156 // dst = reduce(op, src2) using vtmp as temps 157 void reduce_fp(int opcode, int vlen, 158 XMMRegister dst, XMMRegister src, 159 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 160 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 161 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 162 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 163 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 164 XMMRegister dst, XMMRegister src, 165 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 166 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 167 XMMRegister dst, XMMRegister src, 168 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 169 private: 170 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 171 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 172 173 // Int Reduction 174 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 175 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 176 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 177 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 178 179 // Byte Reduction 180 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 181 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 182 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 183 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 184 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 185 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 186 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 187 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 188 189 // Short Reduction 190 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 191 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 192 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 195 // Long Reduction 196 #ifdef _LP64 197 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 198 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 200 #endif // _LP64 201 202 // Float Reduction 203 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 204 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 205 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 206 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 207 208 // Double Reduction 209 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 210 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 211 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 212 213 // Base reduction instruction 214 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 215 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 216 217 public: 218 #ifdef _LP64 219 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); 220 221 void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); 222 223 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, 224 Register tmp, int masklen, BasicType bt, int vec_enc); 225 void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, 226 Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); 227 #endif 228 229 void vector_maskall_operation(KRegister dst, Register src, int mask_len); 230 231 #ifndef _LP64 232 void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len); 233 #endif 234 235 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 236 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 237 238 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 239 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 240 241 // IndexOf strings. 242 // Small strings are loaded through stack if they cross page boundary. 243 void string_indexof(Register str1, Register str2, 244 Register cnt1, Register cnt2, 245 int int_cnt2, Register result, 246 XMMRegister vec, Register tmp, 247 int ae); 248 249 // IndexOf for constant substrings with size >= 8 elements 250 // which don't need to be loaded through stack. 251 void string_indexofC8(Register str1, Register str2, 252 Register cnt1, Register cnt2, 253 int int_cnt2, Register result, 254 XMMRegister vec, Register tmp, 255 int ae); 256 257 // Smallest code: we don't need to load through stack, 258 // check string tail. 259 260 // helper function for string_compare 261 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 262 Address::ScaleFactor scale, Address::ScaleFactor scale1, 263 Address::ScaleFactor scale2, Register index, int ae); 264 // Compare strings. 265 void string_compare(Register str1, Register str2, 266 Register cnt1, Register cnt2, Register result, 267 XMMRegister vec1, int ae, KRegister mask = knoreg); 268 269 // Search for Non-ASCII character (Negative byte value) in a byte array, 270 // return index of the first such character, otherwise len. 271 void count_positives(Register ary1, Register len, 272 Register result, Register tmp1, 273 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 274 // Compare char[] or byte[] arrays. 275 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, 276 Register limit, Register result, Register chr, 277 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); 278 279 280 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 281 XMMRegister dst, XMMRegister src1, XMMRegister src2, 282 bool merge, int vlen_enc, bool is_varshift = false); 283 284 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, 285 XMMRegister dst, XMMRegister src1, Address src2, 286 bool merge, int vlen_enc); 287 288 void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, 289 XMMRegister src1, int imm8, bool merge, int vlen_enc); 290 291 void masked_op(int ideal_opc, int mask_len, KRegister dst, 292 KRegister src1, KRegister src2); 293 294 void vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 295 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 296 AddressLiteral float_sign_flip, Register scratch, int vec_enc); 297 298 void vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 299 KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, 300 Register scratch, int vec_enc); 301 302 303 void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 304 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 305 Register scratch, int vec_enc); 306 307 void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, 308 BasicType from_elem_bt, BasicType to_elem_bt); 309 310 void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 311 KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral double_sign_flip, 312 int vec_enc); 313 314 void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 315 KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip, 316 int vec_enc); 317 318 void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 319 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, 320 Register scratch, AddressLiteral float_sign_flip, 321 int vec_enc); 322 323 #ifdef _LP64 324 void vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 325 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 326 AddressLiteral new_mxcsr, Register scratch, int vec_enc); 327 328 void vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 329 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, 330 AddressLiteral new_mxcsr, Register scratch, int vec_enc); 331 332 void vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, 333 XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip, 334 AddressLiteral new_mxcsr, Register scratch, int vec_enc); 335 #endif 336 337 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, 338 bool merge, BasicType bt, int vlen_enc); 339 340 void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3, 341 bool merge, BasicType bt, int vlen_enc); 342 343 void udivI(Register rax, Register divisor, Register rdx); 344 void umodI(Register rax, Register divisor, Register rdx); 345 void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); 346 347 #ifdef _LP64 348 void udivL(Register rax, Register divisor, Register rdx); 349 void umodL(Register rax, Register divisor, Register rdx); 350 void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); 351 #endif 352 void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 353 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, 354 int vec_enc); 355 356 void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, 357 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, 358 int vec_enc); 359 360 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP