1 /*
2 * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "classfile/classLoaderData.hpp"
27 #include "gc/shared/barrierSet.hpp"
28 #include "gc/shared/barrierSetAssembler.hpp"
29 #include "gc/shared/barrierSetNMethod.hpp"
30 #include "gc/shared/barrierSetRuntime.hpp"
31 #include "gc/shared/collectedHeap.hpp"
32 #include "interpreter/interp_masm.hpp"
33 #include "memory/universe.hpp"
34 #include "runtime/javaThread.hpp"
35 #include "runtime/jniHandles.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/stubRoutines.hpp"
38 #ifdef COMPILER2
39 #include "gc/shared/c2/barrierSetC2.hpp"
40 #endif // COMPILER2
41
42 #define __ masm->
43
44 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
45 Register dst, Address src, Register tmp1, Register tmp2) {
46 // RA is live. It must be saved around calls.
47
48 bool in_heap = (decorators & IN_HEAP) != 0;
49 bool in_native = (decorators & IN_NATIVE) != 0;
50 bool is_not_null = (decorators & IS_NOT_NULL) != 0;
51 switch (type) {
52 case T_OBJECT: // fall through
53 case T_ARRAY: {
54 if (in_heap) {
55 if (UseCompressedOops) {
56 __ lwu(dst, src);
57 if (is_not_null) {
58 __ decode_heap_oop_not_null(dst);
59 } else {
60 __ decode_heap_oop(dst);
61 }
62 } else {
63 __ ld(dst, src);
64 }
65 } else {
66 assert(in_native, "why else?");
67 __ ld(dst, src);
68 }
69 break;
70 }
71 case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
72 case T_BYTE: __ load_signed_byte (dst, src); break;
73 case T_CHAR: __ load_unsigned_short(dst, src); break;
74 case T_SHORT: __ load_signed_short (dst, src); break;
75 case T_INT: __ lw (dst, src); break;
76 case T_LONG: __ ld (dst, src); break;
77 case T_ADDRESS: __ ld (dst, src); break;
78 case T_FLOAT: __ flw (f10, src); break;
79 case T_DOUBLE: __ fld (f10, src); break;
80 default: Unimplemented();
81 }
82 }
83
84 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
85 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
86 bool in_heap = (decorators & IN_HEAP) != 0;
87 bool in_native = (decorators & IN_NATIVE) != 0;
88 bool is_not_null = (decorators & IS_NOT_NULL) != 0;
89
90 switch (type) {
91 case T_OBJECT: // fall through
92 case T_ARRAY: {
93 if (in_heap) {
94 if (val == noreg) {
95 assert(!is_not_null, "inconsistent access");
96 if (UseCompressedOops) {
97 __ sw(zr, dst);
98 } else {
99 __ sd(zr, dst);
100 }
101 } else {
102 if (UseCompressedOops) {
103 assert(!dst.uses(val), "not enough registers");
104 if (is_not_null) {
105 __ encode_heap_oop_not_null(val);
106 } else {
107 __ encode_heap_oop(val);
108 }
109 __ sw(val, dst);
110 } else {
111 __ sd(val, dst);
112 }
113 }
114 } else {
115 assert(in_native, "why else?");
116 assert(val != noreg, "not supported");
117 __ sd(val, dst);
118 }
119 break;
120 }
121 case T_BOOLEAN:
122 __ andi(val, val, 0x1); // boolean is true if LSB is 1
123 __ sb(val, dst);
124 break;
125 case T_BYTE: __ sb(val, dst); break;
126 case T_CHAR: __ sh(val, dst); break;
127 case T_SHORT: __ sh(val, dst); break;
128 case T_INT: __ sw(val, dst); break;
129 case T_LONG: __ sd(val, dst); break;
130 case T_ADDRESS: __ sd(val, dst); break;
131 case T_FLOAT: __ fsw(f10, dst); break;
132 case T_DOUBLE: __ fsd(f10, dst); break;
133 default: Unimplemented();
134 }
135
136 }
137
138 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
139 Register src, Register dst, Register inline_layout_info) {
140 // flat_field_copy implementation is fairly complex, and there are not any
141 // "short-cuts" to be made from asm. What there is, appears to have the same
142 // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
143 // of hand-rolled instructions...
144 if (decorators & IS_DEST_UNINITIALIZED) {
145 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info);
146 } else {
147 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info);
148 }
149 }
150
151 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
152 DecoratorSet decorators,
153 BasicType type,
154 size_t bytes,
155 Register dst,
156 Address src,
157 Register tmp) {
158 if (bytes == 1) {
159 __ lbu(dst, src);
160 } else if (bytes == 2) {
161 __ lhu(dst, src);
162 } else if (bytes == 4) {
163 __ lwu(dst, src);
164 } else if (bytes == 8) {
165 __ ld(dst, src);
166 } else {
167 // Not the right size
168 ShouldNotReachHere();
169 }
170 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
171 __ decode_heap_oop(dst);
172 }
173 }
174
175 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
176 DecoratorSet decorators,
177 BasicType type,
178 size_t bytes,
179 Address dst,
180 Register src,
181 Register tmp1,
182 Register tmp2,
183 Register tmp3) {
184 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
185 __ encode_heap_oop(src);
186 }
187
188 if (bytes == 1) {
189 __ sb(src, dst);
190 } else if (bytes == 2) {
191 __ sh(src, dst);
192 } else if (bytes == 4) {
193 __ sw(src, dst);
194 } else if (bytes == 8) {
195 __ sd(src, dst);
196 } else {
197 // Not the right size
198 ShouldNotReachHere();
199 }
200 }
201
202 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
203 Register obj, Register tmp, Label& slowpath) {
204 // If mask changes we need to ensure that the inverse is still encodable as an immediate
205 STATIC_ASSERT(JNIHandles::tag_mask == 3);
206 __ andi(obj, obj, ~JNIHandles::tag_mask);
207 __ ld(obj, Address(obj, 0)); // *obj
208 }
209
210 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
211 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
212 Register var_size_in_bytes,
213 int con_size_in_bytes,
214 Register tmp1,
215 Register tmp2,
216 Label& slow_case,
217 bool is_far) {
218 assert_different_registers(obj, tmp2);
219 assert_different_registers(obj, var_size_in_bytes);
220 Register end = tmp2;
221
222 __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
223 if (var_size_in_bytes == noreg) {
224 __ la(end, Address(obj, con_size_in_bytes));
225 } else {
226 __ add(end, obj, var_size_in_bytes);
227 }
228 __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
229 __ bgtu(end, t0, slow_case, is_far);
230
231 // update the tlab top pointer
232 __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
233
234 // recover var_size_in_bytes if necessary
235 if (var_size_in_bytes == end) {
236 __ sub(var_size_in_bytes, var_size_in_bytes, obj);
237 }
238 }
239
240 static volatile uint32_t _patching_epoch = 0;
241
242 address BarrierSetAssembler::patching_epoch_addr() {
243 return (address)&_patching_epoch;
244 }
245
246 void BarrierSetAssembler::increment_patching_epoch() {
247 AtomicAccess::inc(&_patching_epoch);
248 }
249
250 void BarrierSetAssembler::clear_patching_epoch() {
251 _patching_epoch = 0;
252 }
253
254 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
255 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
256 Assembler::IncompressibleScope scope(masm); // Fixed length: see entry_barrier_offset()
257
258 Label local_guard, skip_barrier;
259 NMethodPatchingType patching_type = nmethod_patching_type();
260
261 if (slow_path == nullptr) {
262 guard = &local_guard;
263
264 // RISCV atomic operations require that the memory address be naturally aligned.
265 __ align(4);
266 }
267
268 __ lwu(t0, *guard);
269
270 switch (patching_type) {
271 case NMethodPatchingType::stw_instruction_and_data_patch:
272 {
273 // With STW patching, no data or instructions are updated concurrently,
274 // which means there isn't really any need for any fencing for neither
275 // data nor instruction modification happening concurrently. The
276 // instruction patching is synchronized with global icache_flush() by
277 // the write hart on riscv. So here we can do a plain conditional
278 // branch with no fencing.
279 Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
280 __ lwu(t1, thread_disarmed_addr);
281 break;
282 }
283 case NMethodPatchingType::conc_instruction_and_data_patch:
284 {
285 // If we patch code we need both a cmodx fence and a loadload
286 // fence. It's not super cheap, so we use a global epoch mechanism
287 // to hide them in a slow path.
288 // The high level idea of the global epoch mechanism is to detect
289 // when any thread has performed the required fencing, after the
290 // last nmethod was disarmed. This implies that the required
291 // fencing has been performed for all preceding nmethod disarms
292 // as well. Therefore, we do not need any further fencing.
293
294 __ la(t1, ExternalAddress((address)&_patching_epoch));
295 if (!UseZtso) {
296 // Embed a synthetic data dependency between the load of the guard and
297 // the load of the epoch. This guarantees that these loads occur in
298 // order, while allowing other independent instructions to be reordered.
299 // Note: This may be slower than using a membar(load|load) (fence r,r).
300 // Because processors will not start the second load until the first comes back.
301 // This means you can't overlap the two loads,
302 // which is stronger than needed for ordering (stronger than TSO).
303 __ srli(ra, t0, 32);
304 __ orr(t1, t1, ra);
305 }
306 // Read the global epoch value.
307 __ lwu(t1, t1);
308 // Combine the guard value (low order) with the epoch value (high order).
309 __ slli(t1, t1, 32);
310 __ orr(t0, t0, t1);
311 // Compare the global values with the thread-local values
312 Address thread_disarmed_and_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
313 __ ld(t1, thread_disarmed_and_epoch_addr);
314 break;
315 }
316 default:
317 ShouldNotReachHere();
318 }
319
320 Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
321 if (slow_path == nullptr) {
322 __ beq(t0, t1, barrier_target, /* is_far */ true);
323 } else {
324 __ bne(t0, t1, barrier_target, /* is_far */ true);
325 }
326
327 if (slow_path == nullptr) {
328 __ rt_call(StubRoutines::method_entry_barrier());
329 __ j(skip_barrier);
330
331 __ bind(local_guard);
332
333 MacroAssembler::assert_alignment(__ pc());
334 __ emit_int32(0); // nmethod guard value. Skipped over in common case.
335 } else {
336 __ bind(*continuation);
337 }
338
339 __ bind(skip_barrier);
340 }
341
342 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
343 Label bad_call;
344 __ beqz(xmethod, bad_call);
345
346 // Pointer chase to the method holder to find out if the method is concurrently unloading.
347 Label method_live;
348 __ load_method_holder_cld(t0, xmethod);
349
350 // Is it a strong CLD?
351 __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_ref_count_offset()));
352 __ bnez(t1, method_live);
353
354 // Is it a weak but alive CLD?
355 __ push_reg(RegSet::of(x28), sp);
356
357 __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
358
359 __ resolve_weak_handle(x28, t0, t1);
360 __ mv(t0, x28);
361
362 __ pop_reg(RegSet::of(x28), sp);
363
364 __ bnez(t0, method_live);
365
366 __ bind(bad_call);
367
368 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
369 __ bind(method_live);
370 }
371
372 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
373 // Check if the oop is in the right area of memory
374 __ mv(tmp2, (intptr_t) Universe::verify_oop_mask());
375 __ andr(tmp1, obj, tmp2);
376 __ mv(tmp2, (intptr_t) Universe::verify_oop_bits());
377
378 // Compare tmp1 and tmp2.
379 __ bne(tmp1, tmp2, error);
380
381 // Make sure klass is 'reasonable', which is not zero.
382 __ load_klass(obj, obj, tmp1); // get klass
383 __ beqz(obj, error); // if klass is null it is broken
384 }
385
386 void BarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
387 Register tmp, Label& slow_path) {
388 // Load the oop from the weak handle without barriers.
389 __ ld(obj, Address(weak_handle));
390 }
391
392 #ifdef COMPILER2
393
394 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
395 if (!OptoReg::is_reg(opto_reg)) {
396 return OptoReg::Bad;
397 }
398
399 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
400 if (vm_reg->is_FloatRegister()) {
401 return opto_reg & ~1;
402 }
403
404 return opto_reg;
405 }
406 #undef __
407 #define __ _masm->
408
409 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
410 // Record registers that needs to be saved/restored
411 RegMaskIterator rmi(stub->preserve_set());
412 while (rmi.has_next()) {
413 const OptoReg::Name opto_reg = rmi.next();
414 if (OptoReg::is_reg(opto_reg)) {
415 const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
416 if (vm_reg->is_Register()) {
417 _gp_regs += RegSet::of(vm_reg->as_Register());
418 } else if (vm_reg->is_FloatRegister()) {
419 _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
420 } else if (vm_reg->is_VectorRegister()) {
421 const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegister::max_slots_per_register - 1));
422 _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
423 } else {
424 fatal("Unknown register type");
425 }
426 }
427 }
428
429 // Remove C-ABI SOE registers and tmp regs
430 _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2, x5) + RegSet::of(x8, x9);
431 }
432
433 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
434 : _masm(masm),
435 _gp_regs(),
436 _fp_regs(),
437 _vp_regs() {
438 // Figure out what registers to save/restore
439 initialize(stub);
440
441 // Save registers
442 __ push_reg(_gp_regs, sp);
443 __ push_fp(_fp_regs, sp);
444 __ push_v(_vp_regs, sp);
445 }
446
447 SaveLiveRegisters::~SaveLiveRegisters() {
448 // Restore registers
449 __ pop_v(_vp_regs, sp);
450 __ pop_fp(_fp_regs, sp);
451 __ pop_reg(_gp_regs, sp);
452 }
453
454 #endif // COMPILER2