1 /* 2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP 26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 public: 31 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); 32 33 // special instructions for EVEX 34 void setvectmask(Register dst, Register src, KRegister mask); 35 void restorevectmask(KRegister mask); 36 37 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 38 // See full desription in macroAssembler_x86.cpp. 39 void fast_lock(Register obj, Register box, Register tmp, 40 Register scr, Register cx1, Register cx2, Register thread, 41 BiasedLockingCounters* counters, 42 RTMLockingCounters* rtm_counters, 43 RTMLockingCounters* stack_rtm_counters, 44 Metadata* method_data, 45 bool use_rtm, bool profile_rtm); 46 void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); 47 48 void fast_lock_lightweight(Register obj, Register box, Register rax_reg, 49 Register t, Register thread); 50 void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread); 51 52 #if INCLUDE_RTM_OPT 53 void rtm_counters_update(Register abort_status, Register rtm_counters); 54 void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); 55 void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, 56 RTMLockingCounters* rtm_counters, 57 Metadata* method_data); 58 void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, 59 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 60 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); 61 void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); 62 void rtm_stack_locking(Register obj, Register tmp, Register scr, 63 Register retry_on_abort_count, 64 RTMLockingCounters* stack_rtm_counters, 65 Metadata* method_data, bool profile_rtm, 66 Label& DONE_LABEL, Label& IsInflated); 67 void rtm_inflated_locking(Register obj, Register box, Register tmp, 68 Register scr, Register retry_on_busy_count, 69 Register retry_on_abort_count, 70 RTMLockingCounters* rtm_counters, 71 Metadata* method_data, bool profile_rtm, 72 Label& DONE_LABEL); 73 #endif 74 75 // Generic instructions support for use in .ad files C2 code generation 76 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr); 77 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); 78 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr); 79 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); 80 81 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, 82 XMMRegister tmp = xnoreg); 83 void vpminmax(int opcode, BasicType elem_bt, 84 XMMRegister dst, XMMRegister src1, XMMRegister src2, 85 int vlen_enc); 86 87 void vminmax_fp(int opcode, BasicType elem_bt, 88 XMMRegister dst, XMMRegister a, XMMRegister b, 89 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, 90 int vlen_enc); 91 void evminmax_fp(int opcode, BasicType elem_bt, 92 XMMRegister dst, XMMRegister a, XMMRegister b, 93 KRegister ktmp, XMMRegister atmp, XMMRegister btmp, 94 int vlen_enc); 95 96 void signum_fp(int opcode, XMMRegister dst, 97 XMMRegister zero, XMMRegister one, 98 Register scratch); 99 100 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 101 void vextendbw(bool sign, XMMRegister dst, XMMRegister src); 102 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 103 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); 104 105 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); 106 void vshiftd_imm(int opcode, XMMRegister dst, int shift); 107 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 108 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 109 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); 110 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 111 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); 112 void vshiftq_imm(int opcode, XMMRegister dst, int shift); 113 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 114 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); 115 116 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); 117 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); 118 119 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 120 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); 121 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); 122 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); 123 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); 124 125 void insert(BasicType typ, XMMRegister dst, Register val, int idx); 126 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); 127 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); 128 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); 129 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); 130 131 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len); 132 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len); 133 134 // extract 135 void extract(BasicType typ, Register dst, XMMRegister src, int idx); 136 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); 137 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); 138 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg); 139 void movsxl(BasicType typ, Register dst); 140 141 // vector test 142 void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, 143 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg); 144 145 // blend 146 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1); 147 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); 148 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); 149 150 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy); 151 void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); 152 153 // vector compare 154 void vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes, 155 XMMRegister vtmp1, XMMRegister vtmp2, Register scratch); 156 void vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes, 157 XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch); 158 159 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. 160 161 // dst = src1 reduce(op, src2) using vtmp as temps 162 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 163 #ifdef _LP64 164 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 165 void genmask(KRegister dst, Register len, Register temp); 166 #endif // _LP64 167 168 // dst = reduce(op, src2) using vtmp as temps 169 void reduce_fp(int opcode, int vlen, 170 XMMRegister dst, XMMRegister src, 171 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); 172 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 173 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 174 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 175 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, 176 XMMRegister dst, XMMRegister src, 177 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 178 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, 179 XMMRegister dst, XMMRegister src, 180 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); 181 private: 182 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 183 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 184 185 // Int Reduction 186 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 187 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 188 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 189 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 190 191 // Byte Reduction 192 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 193 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 194 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 195 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 196 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 197 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 198 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 199 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 200 201 // Short Reduction 202 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 203 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 204 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 205 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 206 207 // Long Reduction 208 #ifdef _LP64 209 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 210 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 211 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); 212 #endif // _LP64 213 214 // Float Reduction 215 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 216 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 217 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 218 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 219 220 // Double Reduction 221 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); 222 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 223 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); 224 225 // Base reduction instruction 226 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); 227 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); 228 229 public: 230 #ifdef _LP64 231 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp, 232 KRegister ktmp, int masklen, int vec_enc); 233 234 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1, 235 Register tmp, int masklen, int vec_enc); 236 #endif 237 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, 238 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 239 240 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, 241 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); 242 243 // IndexOf strings. 244 // Small strings are loaded through stack if they cross page boundary. 245 void string_indexof(Register str1, Register str2, 246 Register cnt1, Register cnt2, 247 int int_cnt2, Register result, 248 XMMRegister vec, Register tmp, 249 int ae); 250 251 // IndexOf for constant substrings with size >= 8 elements 252 // which don't need to be loaded through stack. 253 void string_indexofC8(Register str1, Register str2, 254 Register cnt1, Register cnt2, 255 int int_cnt2, Register result, 256 XMMRegister vec, Register tmp, 257 int ae); 258 259 // Smallest code: we don't need to load through stack, 260 // check string tail. 261 262 // helper function for string_compare 263 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, 264 Address::ScaleFactor scale, Address::ScaleFactor scale1, 265 Address::ScaleFactor scale2, Register index, int ae); 266 // Compare strings. 267 void string_compare(Register str1, Register str2, 268 Register cnt1, Register cnt2, Register result, 269 XMMRegister vec1, int ae, KRegister mask = knoreg); 270 271 // Search for Non-ASCII character (Negative byte value) in a byte array, 272 // return true if it has any and false otherwise. 273 void has_negatives(Register ary1, Register len, 274 Register result, Register tmp1, 275 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg); 276 277 // Compare char[] or byte[] arrays. 278 void arrays_equals(bool is_array_equ, Register ary1, Register ary2, 279 Register limit, Register result, Register chr, 280 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg); 281 282 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, 283 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc); 284 285 void load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp); 286 287 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP