1 /*
2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_C2_MACROASSEMBLER_X86_HPP
26 #define CPU_X86_C2_MACROASSEMBLER_X86_HPP
27
28 // C2_MacroAssembler contains high-level macros for C2
29
30 public:
31 Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
32
33 // special instructions for EVEX
34 void setvectmask(Register dst, Register src, KRegister mask);
35 void restorevectmask(KRegister mask);
36
37 // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
38 // See full desription in macroAssembler_x86.cpp.
39 void fast_lock(Register obj, Register box, Register tmp,
40 Register scr, Register cx1, Register cx2, Register thread,
41 BiasedLockingCounters* counters,
42 RTMLockingCounters* rtm_counters,
43 RTMLockingCounters* stack_rtm_counters,
44 Metadata* method_data,
45 bool use_rtm, bool profile_rtm);
46 void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
47
48 void fast_lock_lightweight(Register obj, Register box, Register rax_reg,
49 Register t, Register thread);
50 void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread);
51
52 #if INCLUDE_RTM_OPT
53 void rtm_counters_update(Register abort_status, Register rtm_counters);
54 void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
55 void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
56 RTMLockingCounters* rtm_counters,
57 Metadata* method_data);
58 void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
59 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
60 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
61 void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
62 void rtm_stack_locking(Register obj, Register tmp, Register scr,
63 Register retry_on_abort_count,
64 RTMLockingCounters* stack_rtm_counters,
65 Metadata* method_data, bool profile_rtm,
66 Label& DONE_LABEL, Label& IsInflated);
67 void rtm_inflated_locking(Register obj, Register box, Register tmp,
68 Register scr, Register retry_on_busy_count,
69 Register retry_on_abort_count,
70 RTMLockingCounters* rtm_counters,
71 Metadata* method_data, bool profile_rtm,
72 Label& DONE_LABEL);
73 #endif
74
75 // Generic instructions support for use in .ad files C2 code generation
76 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr);
77 void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
78 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
79 void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
80
81 void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
82 XMMRegister tmp = xnoreg);
83 void vpminmax(int opcode, BasicType elem_bt,
84 XMMRegister dst, XMMRegister src1, XMMRegister src2,
85 int vlen_enc);
86
87 void vminmax_fp(int opcode, BasicType elem_bt,
88 XMMRegister dst, XMMRegister a, XMMRegister b,
89 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
90 int vlen_enc);
91 void evminmax_fp(int opcode, BasicType elem_bt,
92 XMMRegister dst, XMMRegister a, XMMRegister b,
93 KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
94 int vlen_enc);
95
96 void signum_fp(int opcode, XMMRegister dst,
97 XMMRegister zero, XMMRegister one,
98 Register scratch);
99
100 void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
101 void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
102 void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
103 void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
104
105 void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
106 void vshiftd_imm(int opcode, XMMRegister dst, int shift);
107 void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
108 void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
109 void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
110 void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
111 void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
112 void vshiftq_imm(int opcode, XMMRegister dst, int shift);
113 void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
114 void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
115
116 void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
117 void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
118
119 void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
120 void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
121 void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
122 void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
123 void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
124
125 void insert(BasicType typ, XMMRegister dst, Register val, int idx);
126 void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
127 void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
128 void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
129 void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
130
131 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
132 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
133
134 // extract
135 void extract(BasicType typ, Register dst, XMMRegister src, int idx);
136 XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
137 void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
138 void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
139 void movsxl(BasicType typ, Register dst);
140
141 // vector test
142 void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
143 XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
144
145 // blend
146 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
147 void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
148 void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
149
150 void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy);
151 void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
152
153 // vector compare
154 void vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
155 XMMRegister vtmp1, XMMRegister vtmp2, Register scratch);
156 void vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
157 XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch);
158
159 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
160
161 // dst = src1 reduce(op, src2) using vtmp as temps
162 void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
163 #ifdef _LP64
164 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
165 void genmask(KRegister dst, Register len, Register temp);
166 #endif // _LP64
167
168 // dst = reduce(op, src2) using vtmp as temps
169 void reduce_fp(int opcode, int vlen,
170 XMMRegister dst, XMMRegister src,
171 XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
172 void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
173 void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
174 void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
175 void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
176 XMMRegister dst, XMMRegister src,
177 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
178 void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
179 XMMRegister dst, XMMRegister src,
180 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
181 private:
182 void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
183 void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
184
185 // Int Reduction
186 void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
187 void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
188 void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
189 void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
190
191 // Byte Reduction
192 void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
193 void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
194 void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
195 void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
196 void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
197 void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
198 void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
199 void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
200
201 // Short Reduction
202 void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
203 void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
204 void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
205 void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
206
207 // Long Reduction
208 #ifdef _LP64
209 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
210 void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
211 void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
212 #endif // _LP64
213
214 // Float Reduction
215 void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
216 void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
217 void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
218 void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
219
220 // Double Reduction
221 void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
222 void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
223 void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
224
225 // Base reduction instruction
226 void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
227 void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
228
229 public:
230 #ifdef _LP64
231 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, Register tmp,
232 KRegister ktmp, int masklen, int vec_enc);
233
234 void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1,
235 Register tmp, int masklen, int vec_enc);
236 #endif
237 void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
238 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
239
240 void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
241 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
242
243 // IndexOf strings.
244 // Small strings are loaded through stack if they cross page boundary.
245 void string_indexof(Register str1, Register str2,
246 Register cnt1, Register cnt2,
247 int int_cnt2, Register result,
248 XMMRegister vec, Register tmp,
249 int ae);
250
251 // IndexOf for constant substrings with size >= 8 elements
252 // which don't need to be loaded through stack.
253 void string_indexofC8(Register str1, Register str2,
254 Register cnt1, Register cnt2,
255 int int_cnt2, Register result,
256 XMMRegister vec, Register tmp,
257 int ae);
258
259 // Smallest code: we don't need to load through stack,
260 // check string tail.
261
262 // helper function for string_compare
263 void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
264 Address::ScaleFactor scale, Address::ScaleFactor scale1,
265 Address::ScaleFactor scale2, Register index, int ae);
266 // Compare strings.
267 void string_compare(Register str1, Register str2,
268 Register cnt1, Register cnt2, Register result,
269 XMMRegister vec1, int ae, KRegister mask = knoreg);
270
271 // Search for Non-ASCII character (Negative byte value) in a byte array,
272 // return true if it has any and false otherwise.
273 void has_negatives(Register ary1, Register len,
274 Register result, Register tmp1,
275 XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
276
277 // Compare char[] or byte[] arrays.
278 void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
279 Register limit, Register result, Register chr,
280 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
281
282 void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
283 XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
284
285 void load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp);
286
287 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP