1 /* 2 * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP 26 #define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP 27 28 // C2_MacroAssembler contains high-level macros for C2 29 30 private: 31 // Return true if the phase output is in the scratch emit size mode. 32 virtual bool in_scratch_emit_size() override; 33 34 void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm, 35 enum shift_kind kind = Assembler::LSL, unsigned shift = 0); 36 37 public: 38 void entry_barrier(); 39 40 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. 41 void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3); 42 void fast_unlock(Register object, Register box, Register tmp, Register tmp2); 43 // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. 44 void fast_lock_lightweight(Register object, Register box, Register t1, Register t2, Register t3); 45 void fast_unlock_lightweight(Register object, Register box, Register t1, Register t2, Register t3); 46 47 void string_compare(Register str1, Register str2, 48 Register cnt1, Register cnt2, Register result, 49 Register tmp1, Register tmp2, FloatRegister vtmp1, 50 FloatRegister vtmp2, FloatRegister vtmp3, 51 PRegister pgtmp1, PRegister pgtmp2, int ae); 52 53 void string_indexof(Register str1, Register str2, 54 Register cnt1, Register cnt2, 55 Register tmp1, Register tmp2, 56 Register tmp3, Register tmp4, 57 Register tmp5, Register tmp6, 58 int int_cnt1, Register result, int ae); 59 60 void string_indexof_char(Register str1, Register cnt1, 61 Register ch, Register result, 62 Register tmp1, Register tmp2, Register tmp3); 63 64 void stringL_indexof_char(Register str1, Register cnt1, 65 Register ch, Register result, 66 Register tmp1, Register tmp2, Register tmp3); 67 68 void string_indexof_char_sve(Register str1, Register cnt1, 69 Register ch, Register result, 70 FloatRegister ztmp1, FloatRegister ztmp2, 71 PRegister pgtmp, PRegister ptmp, bool isL); 72 73 // Compress the least significant bit of each byte to the rightmost and clear 74 // the higher garbage bits. 75 void bytemask_compress(Register dst); 76 77 // Pack the lowest-numbered bit of each mask element in src into a long value 78 // in dst, at most the first 64 lane elements. 79 void sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt, 80 FloatRegister vtmp1, FloatRegister vtmp2); 81 82 // Unpack the mask, a long value in src, into predicate register dst based on the 83 // corresponding data type. Note that dst can support at most 64 lanes. 84 void sve_vmask_fromlong(PRegister dst, Register src, BasicType bt, int lane_cnt, 85 FloatRegister vtmp1, FloatRegister vtmp2); 86 87 // SIMD&FP comparison 88 void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, 89 FloatRegister src2, Condition cond, bool isQ); 90 91 void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src, 92 Condition cond, bool isQ); 93 94 void sve_compare(PRegister pd, BasicType bt, PRegister pg, 95 FloatRegister zn, FloatRegister zm, Condition cond); 96 97 void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp); 98 99 // Vector cast 100 void neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, 101 FloatRegister src, BasicType src_bt, bool is_unsigned = false); 102 103 void neon_vector_narrow(FloatRegister dst, BasicType dst_bt, 104 FloatRegister src, BasicType src_bt, unsigned src_vlen_in_bytes); 105 106 void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, 107 FloatRegister src, SIMD_RegVariant src_size, bool is_unsigned = false); 108 109 void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, 110 FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp); 111 112 void sve_vmaskcast_extend(PRegister dst, PRegister src, 113 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); 114 115 void sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp, 116 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes); 117 118 // Vector reduction 119 void neon_reduce_add_integral(Register dst, BasicType bt, 120 Register isrc, FloatRegister vsrc, 121 unsigned vector_length_in_bytes, FloatRegister vtmp); 122 123 void neon_reduce_mul_integral(Register dst, BasicType bt, 124 Register isrc, FloatRegister vsrc, 125 unsigned vector_length_in_bytes, 126 FloatRegister vtmp1, FloatRegister vtmp2); 127 128 void neon_reduce_mul_fp(FloatRegister dst, BasicType bt, 129 FloatRegister fsrc, FloatRegister vsrc, 130 unsigned vector_length_in_bytes, FloatRegister vtmp); 131 132 void neon_reduce_logical(int opc, Register dst, BasicType bt, Register isrc, 133 FloatRegister vsrc, unsigned vector_length_in_bytes); 134 135 void neon_reduce_minmax_integral(int opc, Register dst, BasicType bt, 136 Register isrc, FloatRegister vsrc, 137 unsigned vector_length_in_bytes, FloatRegister vtmp); 138 139 void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1, 140 FloatRegister src2, PRegister pg, FloatRegister tmp); 141 142 // Set elements of the dst predicate to true for lanes in the range of 143 // [0, lane_cnt), or to false otherwise. The input "lane_cnt" should be 144 // smaller than or equal to the supported max vector length of the basic 145 // type. Clobbers: rscratch1 and the rFlagsReg. 146 void sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t lane_cnt); 147 148 // Extract a scalar element from an sve vector at position 'idx'. 149 // The input elements in src are expected to be of integral type. 150 void sve_extract_integral(Register dst, BasicType bt, FloatRegister src, 151 int idx, FloatRegister vtmp); 152 153 // java.lang.Math::round intrinsics 154 void vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1, 155 FloatRegister tmp2, FloatRegister tmp3, 156 SIMD_Arrangement T); 157 void vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1, 158 FloatRegister tmp2, PRegister pgtmp, 159 SIMD_RegVariant T); 160 161 // Pack active elements of src, under the control of mask, into the 162 // lowest-numbered elements of dst. Any remaining elements of dst will 163 // be filled with zero. 164 void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask, 165 FloatRegister vtmp1, FloatRegister vtmp2, 166 FloatRegister vtmp3, FloatRegister vtmp4, 167 PRegister ptmp, PRegister pgtmp); 168 169 void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask, 170 FloatRegister vtmp1, FloatRegister vtmp2, 171 PRegister pgtmp); 172 173 void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); 174 175 void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ); 176 177 // java.lang.Math::signum intrinsics 178 void vector_signum_neon(FloatRegister dst, FloatRegister src, FloatRegister zero, 179 FloatRegister one, SIMD_Arrangement T); 180 181 void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero, 182 FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T); 183 184 #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP