1 /* 2 * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP 26 #define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 private: 31 // Return true if the phase output is in the scratch emit size mode. 32 virtual bool in_scratch_emit_size() override; 33 34 void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm, 35 enum shift_kind kind = Assembler::LSL, unsigned shift = 0); 36 37 public: 38 void entry_barrier(); 39 40 // jdk.internal.util.ArraysSupport.vectorizedHashCode 41 address arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0, 42 FloatRegister vdata1, FloatRegister vdata2, FloatRegister vdata3, 43 FloatRegister vmul0, FloatRegister vmul1, FloatRegister vmul2, 44 FloatRegister vmul3, FloatRegister vpow, FloatRegister vpowm, 45 BasicType eltype); 46 47 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 48 void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3); 49 void fast_unlock(Register object, Register box, Register tmp, Register tmp2); 50 // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. 51 void fast_lock_lightweight(Register object, Register box, Register t1, Register t2, Register t3); 52 void fast_unlock_lightweight(Register object, Register box, Register t1, Register t2, Register t3); 53 54 void string_compare(Register str1, Register str2, 55 Register cnt1, Register cnt2, Register result, 56 Register tmp1, Register tmp2, FloatRegister vtmp1, 57 FloatRegister vtmp2, FloatRegister vtmp3, 58 PRegister pgtmp1, PRegister pgtmp2, int ae); 59 60 void string_indexof(Register str1, Register str2, 61 Register cnt1, Register cnt2, 62 Register tmp1, Register tmp2, 63 Register tmp3, Register tmp4, 64 Register tmp5, Register tmp6, 65 int int_cnt1, Register result, int ae); 66 67 void string_indexof_char(Register str1, Register cnt1, 68 Register ch, Register result, 69 Register tmp1, Register tmp2, Register tmp3); 70 71 void stringL_indexof_char(Register str1, Register cnt1, 72 Register ch, Register result, 73 Register tmp1, Register tmp2, Register tmp3); 74 75 void string_indexof_char_sve(Register str1, Register cnt1, 76 Register ch, Register result, 77 FloatRegister ztmp1, FloatRegister ztmp2, 78 PRegister pgtmp, PRegister ptmp, bool isL); 79 80 // Compress the least significant bit of each byte to the rightmost and clear 81 // the higher garbage bits. 82 void bytemask_compress(Register dst); 83 84 // Pack the lowest-numbered bit of each mask element in src into a long value 85 // in dst, at most the first 64 lane elements. 86 void sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt, 87 FloatRegister vtmp1, FloatRegister vtmp2); 88 89 // Unpack the mask, a long value in src, into predicate register dst based on the 90 // corresponding data type. Note that dst can support at most 64 lanes. 91 void sve_vmask_fromlong(PRegister dst, Register src, BasicType bt, int lane_cnt, 92 FloatRegister vtmp1, FloatRegister vtmp2); 93 94 // SIMD&FP comparison 95 void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, 96 FloatRegister src2, Condition cond, bool isQ); 97 98 void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src, 99 Condition cond, bool isQ); 100 101 void sve_compare(PRegister pd, BasicType bt, PRegister pg, 102 FloatRegister zn, FloatRegister zm, Condition cond); 103 104 void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp); 105 106 // Vector cast 107 void neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, 108 FloatRegister src, BasicType src_bt, bool is_unsigned = false); 109 110 void neon_vector_narrow(FloatRegister dst, BasicType dst_bt, 111 FloatRegister src, BasicType src_bt, unsigned src_vlen_in_bytes); 112 113 void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, 114 FloatRegister src, SIMD_RegVariant src_size, bool is_unsigned = false); 115 116 void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, 117 FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp); 118 119 void sve_vmaskcast_extend(PRegister dst, PRegister src, 120 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); 121 122 void sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp, 123 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); 124 125 // Vector reduction 126 void neon_reduce_add_integral(Register dst, BasicType bt, 127 Register isrc, FloatRegister vsrc, 128 unsigned vector_length_in_bytes, FloatRegister vtmp); 129 130 void neon_reduce_mul_integral(Register dst, BasicType bt, 131 Register isrc, FloatRegister vsrc, 132 unsigned vector_length_in_bytes, 133 FloatRegister vtmp1, FloatRegister vtmp2); 134 135 void neon_reduce_mul_fp(FloatRegister dst, BasicType bt, 136 FloatRegister fsrc, FloatRegister vsrc, 137 unsigned vector_length_in_bytes, FloatRegister vtmp); 138 139 void neon_reduce_logical(int opc, Register dst, BasicType bt, Register isrc, 140 FloatRegister vsrc, unsigned vector_length_in_bytes); 141 142 void neon_reduce_minmax_integral(int opc, Register dst, BasicType bt, 143 Register isrc, FloatRegister vsrc, 144 unsigned vector_length_in_bytes, FloatRegister vtmp); 145 146 void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1, 147 FloatRegister src2, PRegister pg, FloatRegister tmp); 148 149 // Set elements of the dst predicate to true for lanes in the range of 150 // [0, lane_cnt), or to false otherwise. The input "lane_cnt" should be 151 // smaller than or equal to the supported max vector length of the basic 152 // type. Clobbers: rscratch1 and the rFlagsReg. 153 void sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t lane_cnt); 154 155 // Extract a scalar element from an sve vector at position 'idx'. 156 // The input elements in src are expected to be of integral type. 157 void sve_extract_integral(Register dst, BasicType bt, FloatRegister src, 158 int idx, FloatRegister vtmp); 159 160 // java.lang.Math::round intrinsics 161 void vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1, 162 FloatRegister tmp2, FloatRegister tmp3, 163 SIMD_Arrangement T); 164 void vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1, 165 FloatRegister tmp2, PRegister pgtmp, 166 SIMD_RegVariant T); 167 168 // Pack active elements of src, under the control of mask, into the 169 // lowest-numbered elements of dst. Any remaining elements of dst will 170 // be filled with zero. 171 void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask, 172 FloatRegister vtmp1, FloatRegister vtmp2, 173 FloatRegister vtmp3, FloatRegister vtmp4, 174 PRegister ptmp, PRegister pgtmp); 175 176 void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask, 177 FloatRegister vtmp1, FloatRegister vtmp2, 178 PRegister pgtmp); 179 180 void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); 181 182 void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); 183 184 // java.lang.Math::signum intrinsics 185 void vector_signum_neon(FloatRegister dst, FloatRegister src, FloatRegister zero, 186 FloatRegister one, SIMD_Arrangement T); 187 188 void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero, 189 FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T); 190 191 #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP