1 /*
2 * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
26 #define CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
27
28 // C2_MacroAssembler contains high-level macros for C2
29
30 private:
31 // Return true if the phase output is in the scratch emit size mode.
32 virtual bool in_scratch_emit_size() override;
33
34 void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm,
35 enum shift_kind kind = Assembler::LSL, unsigned shift = 0);
36
37 public:
38 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
39 void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3);
40 void fast_unlock(Register object, Register box, Register tmp, Register tmp2);
41 // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file.
42 void fast_lock_lightweight(Register object, Register t1, Register t2, Register t3);
43 void fast_unlock_lightweight(Register object, Register t1, Register t2, Register t3);
44
45 void string_compare(Register str1, Register str2,
46 Register cnt1, Register cnt2, Register result,
47 Register tmp1, Register tmp2, FloatRegister vtmp1,
48 FloatRegister vtmp2, FloatRegister vtmp3,
49 PRegister pgtmp1, PRegister pgtmp2, int ae);
50
51 void string_indexof(Register str1, Register str2,
52 Register cnt1, Register cnt2,
53 Register tmp1, Register tmp2,
54 Register tmp3, Register tmp4,
55 Register tmp5, Register tmp6,
56 int int_cnt1, Register result, int ae);
57
58 void string_indexof_char(Register str1, Register cnt1,
59 Register ch, Register result,
60 Register tmp1, Register tmp2, Register tmp3);
61
62 void stringL_indexof_char(Register str1, Register cnt1,
63 Register ch, Register result,
64 Register tmp1, Register tmp2, Register tmp3);
65
66 void string_indexof_char_sve(Register str1, Register cnt1,
67 Register ch, Register result,
68 FloatRegister ztmp1, FloatRegister ztmp2,
69 PRegister pgtmp, PRegister ptmp, bool isL);
70
71 // Compress the least significant bit of each byte to the rightmost and clear
72 // the higher garbage bits.
73 void bytemask_compress(Register dst);
74
75 // Pack the lowest-numbered bit of each mask element in src into a long value
76 // in dst, at most the first 64 lane elements.
77 void sve_vmask_tolong(Register dst, PRegister src, BasicType bt, int lane_cnt,
78 FloatRegister vtmp1, FloatRegister vtmp2);
79
80 // Unpack the mask, a long value in src, into predicate register dst based on the
81 // corresponding data type. Note that dst can support at most 64 lanes.
82 void sve_vmask_fromlong(PRegister dst, Register src, BasicType bt, int lane_cnt,
83 FloatRegister vtmp1, FloatRegister vtmp2);
84
85 // SIMD&FP comparison
86 void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
87 FloatRegister src2, Condition cond, bool isQ);
88
89 void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
90 Condition cond, bool isQ);
91
92 void sve_compare(PRegister pd, BasicType bt, PRegister pg,
93 FloatRegister zn, FloatRegister zm, Condition cond);
94
95 void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp);
96
97 // Vector cast
98 void neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes,
99 FloatRegister src, BasicType src_bt);
100
101 void neon_vector_narrow(FloatRegister dst, BasicType dst_bt,
102 FloatRegister src, BasicType src_bt, unsigned src_vlen_in_bytes);
103
104 void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size,
105 FloatRegister src, SIMD_RegVariant src_size);
106
107 void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size,
108 FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp);
109
110 void sve_vmaskcast_extend(PRegister dst, PRegister src,
111 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
112
113 void sve_vmaskcast_narrow(PRegister dst, PRegister src, PRegister ptmp,
114 uint dst_element_length_in_bytes, uint src_element_lenght_in_bytes);
115
116 // Vector reduction
117 void neon_reduce_add_integral(Register dst, BasicType bt,
118 Register isrc, FloatRegister vsrc,
119 unsigned vector_length_in_bytes, FloatRegister vtmp);
120
121 void neon_reduce_mul_integral(Register dst, BasicType bt,
122 Register isrc, FloatRegister vsrc,
123 unsigned vector_length_in_bytes,
124 FloatRegister vtmp1, FloatRegister vtmp2);
125
126 void neon_reduce_mul_fp(FloatRegister dst, BasicType bt,
127 FloatRegister fsrc, FloatRegister vsrc,
128 unsigned vector_length_in_bytes, FloatRegister vtmp);
129
130 void neon_reduce_logical(int opc, Register dst, BasicType bt, Register isrc,
131 FloatRegister vsrc, unsigned vector_length_in_bytes);
132
133 void neon_reduce_minmax_integral(int opc, Register dst, BasicType bt,
134 Register isrc, FloatRegister vsrc,
135 unsigned vector_length_in_bytes, FloatRegister vtmp);
136
137 void sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1,
138 FloatRegister src2, PRegister pg, FloatRegister tmp);
139
140 // Set elements of the dst predicate to true for lanes in the range of
141 // [0, lane_cnt), or to false otherwise. The input "lane_cnt" should be
142 // smaller than or equal to the supported max vector length of the basic
143 // type. Clobbers: rscratch1 and the rFlagsReg.
144 void sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t lane_cnt);
145
146 // Extract a scalar element from an sve vector at position 'idx'.
147 // The input elements in src are expected to be of integral type.
148 void sve_extract_integral(Register dst, BasicType bt, FloatRegister src,
149 int idx, FloatRegister vtmp);
150
151 // java.lang.Math::round intrinsics
152 void vector_round_neon(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
153 FloatRegister tmp2, FloatRegister tmp3,
154 SIMD_Arrangement T);
155 void vector_round_sve(FloatRegister dst, FloatRegister src, FloatRegister tmp1,
156 FloatRegister tmp2, PRegister pgtmp,
157 SIMD_RegVariant T);
158
159 // Pack active elements of src, under the control of mask, into the
160 // lowest-numbered elements of dst. Any remaining elements of dst will
161 // be filled with zero.
162 void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
163 FloatRegister vtmp1, FloatRegister vtmp2,
164 FloatRegister vtmp3, FloatRegister vtmp4,
165 PRegister ptmp, PRegister pgtmp);
166
167 void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
168 FloatRegister vtmp1, FloatRegister vtmp2,
169 PRegister pgtmp);
170
171 void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
172
173 void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
174
175 // java.lang.Math::signum intrinsics
176 void vector_signum_neon(FloatRegister dst, FloatRegister src, FloatRegister zero,
177 FloatRegister one, SIMD_Arrangement T);
178
179 void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero,
180 FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T);
181
182 void load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp);
183
184 #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP