1 /*
  2  * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "classfile/classLoaderData.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/barrierSetAssembler.hpp"
 29 #include "gc/shared/barrierSetNMethod.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "interpreter/interp_masm.hpp"
 32 #include "memory/universe.hpp"
 33 #include "runtime/javaThread.hpp"
 34 #include "runtime/jniHandles.hpp"
 35 #include "runtime/sharedRuntime.hpp"
 36 #include "runtime/stubRoutines.hpp"
 37 #ifdef COMPILER2
 38 #include "gc/shared/c2/barrierSetC2.hpp"
 39 #endif // COMPILER2
 40 
 41 #define __ masm->
 42 
 43 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 44                                   Register dst, Address src, Register tmp1, Register tmp2) {
 45   // RA is live. It must be saved around calls.
 46 
 47   bool in_heap = (decorators & IN_HEAP) != 0;
 48   bool in_native = (decorators & IN_NATIVE) != 0;
 49   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 50   switch (type) {
 51     case T_OBJECT:  // fall through
 52     case T_ARRAY: {
 53       if (in_heap) {
 54         if (UseCompressedOops) {
 55           __ lwu(dst, src);
 56           if (is_not_null) {
 57             __ decode_heap_oop_not_null(dst);
 58           } else {
 59             __ decode_heap_oop(dst);
 60           }
 61         } else {
 62           __ ld(dst, src);
 63         }
 64       } else {
 65         assert(in_native, "why else?");
 66         __ ld(dst, src);
 67       }
 68       break;
 69     }
 70     case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
 71     case T_BYTE:    __ load_signed_byte   (dst, src); break;
 72     case T_CHAR:    __ load_unsigned_short(dst, src); break;
 73     case T_SHORT:   __ load_signed_short  (dst, src); break;
 74     case T_INT:     __ lw                 (dst, src); break;
 75     case T_LONG:    __ ld                 (dst, src); break;
 76     case T_ADDRESS: __ ld                 (dst, src); break;
 77     case T_FLOAT:   __ flw                (f10, src); break;
 78     case T_DOUBLE:  __ fld                (f10, src); break;
 79     default: Unimplemented();
 80   }
 81 }
 82 
 83 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 84                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 85   bool in_heap = (decorators & IN_HEAP) != 0;
 86   bool in_native = (decorators & IN_NATIVE) != 0;
 87   switch (type) {
 88     case T_OBJECT: // fall through
 89     case T_ARRAY: {
 90       val = val == noreg ? zr : val;
 91       if (in_heap) {
 92         if (UseCompressedOops) {
 93           assert(!dst.uses(val), "not enough registers");
 94           if (val != zr) {
 95             __ encode_heap_oop(val);
 96           }
 97           __ sw(val, dst);
 98         } else {
 99           __ sd(val, dst);
100         }
101       } else {
102         assert(in_native, "why else?");
103         __ sd(val, dst);
104       }
105       break;
106     }
107     case T_BOOLEAN:
108       __ andi(val, val, 0x1);  // boolean is true if LSB is 1
109       __ sb(val, dst);
110       break;
111     case T_BYTE:    __ sb(val, dst); break;
112     case T_CHAR:    __ sh(val, dst); break;
113     case T_SHORT:   __ sh(val, dst); break;
114     case T_INT:     __ sw(val, dst); break;
115     case T_LONG:    __ sd(val, dst); break;
116     case T_ADDRESS: __ sd(val, dst); break;
117     case T_FLOAT:   __ fsw(f10,  dst); break;
118     case T_DOUBLE:  __ fsd(f10,  dst); break;
119     default: Unimplemented();
120   }
121 
122 }
123 
124 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
125                                        DecoratorSet decorators,
126                                        BasicType type,
127                                        size_t bytes,
128                                        Register dst,
129                                        Address src,
130                                        Register tmp) {
131   if (bytes == 1) {
132     __ lbu(dst, src);
133   } else if (bytes == 2) {
134     __ lhu(dst, src);
135   } else if (bytes == 4) {
136     __ lwu(dst, src);
137   } else if (bytes == 8) {
138     __ ld(dst, src);
139   } else {
140     // Not the right size
141     ShouldNotReachHere();
142   }
143   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
144     __ decode_heap_oop(dst);
145   }
146 }
147 
148 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
149                                         DecoratorSet decorators,
150                                         BasicType type,
151                                         size_t bytes,
152                                         Address dst,
153                                         Register src,
154                                         Register tmp1,
155                                         Register tmp2,
156                                         Register tmp3) {
157   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
158     __ encode_heap_oop(src);
159   }
160 
161   if (bytes == 1) {
162     __ sb(src, dst);
163   } else if (bytes == 2) {
164     __ sh(src, dst);
165   } else if (bytes == 4) {
166     __ sw(src, dst);
167   } else if (bytes == 8) {
168     __ sd(src, dst);
169   } else {
170     // Not the right size
171     ShouldNotReachHere();
172   }
173 }
174 
175 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
176                                                         Register obj, Register tmp, Label& slowpath) {
177   // If mask changes we need to ensure that the inverse is still encodable as an immediate
178   STATIC_ASSERT(JNIHandles::tag_mask == 3);
179   __ andi(obj, obj, ~JNIHandles::tag_mask);
180   __ ld(obj, Address(obj, 0));             // *obj
181 }
182 
183 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
184 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
185                                         Register var_size_in_bytes,
186                                         int con_size_in_bytes,
187                                         Register tmp1,
188                                         Register tmp2,
189                                         Label& slow_case,
190                                         bool is_far) {
191   assert_different_registers(obj, tmp2);
192   assert_different_registers(obj, var_size_in_bytes);
193   Register end = tmp2;
194 
195   __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
196   if (var_size_in_bytes == noreg) {
197     __ la(end, Address(obj, con_size_in_bytes));
198   } else {
199     __ add(end, obj, var_size_in_bytes);
200   }
201   __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
202   __ bgtu(end, t0, slow_case, is_far);
203 
204   // update the tlab top pointer
205   __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
206 
207   // recover var_size_in_bytes if necessary
208   if (var_size_in_bytes == end) {
209     __ sub(var_size_in_bytes, var_size_in_bytes, obj);
210   }
211 }
212 
213 static volatile uint32_t _patching_epoch = 0;
214 
215 address BarrierSetAssembler::patching_epoch_addr() {
216   return (address)&_patching_epoch;
217 }
218 
219 void BarrierSetAssembler::increment_patching_epoch() {
220   AtomicAccess::inc(&_patching_epoch);
221 }
222 
223 void BarrierSetAssembler::clear_patching_epoch() {
224   _patching_epoch = 0;
225 }
226 
227 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
228   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
229   Assembler::IncompressibleScope scope(masm); // Fixed length: see entry_barrier_offset()
230 
231   Label local_guard, skip_barrier;
232   NMethodPatchingType patching_type = nmethod_patching_type();
233 
234   if (slow_path == nullptr) {
235     guard = &local_guard;
236 
237     // RISCV atomic operations require that the memory address be naturally aligned.
238     __ align(4);
239   }
240 
241   __ lwu(t0, *guard);
242 
243   switch (patching_type) {
244     case NMethodPatchingType::stw_instruction_and_data_patch:
245       {
246         // With STW patching, no data or instructions are updated concurrently,
247         // which means there isn't really any need for any fencing for neither
248         // data nor instruction modification happening concurrently. The
249         // instruction patching is synchronized with global icache_flush() by
250         // the write hart on riscv. So here we can do a plain conditional
251         // branch with no fencing.
252         Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
253         __ lwu(t1, thread_disarmed_addr);
254         break;
255       }
256     case NMethodPatchingType::conc_instruction_and_data_patch:
257       {
258         // If we patch code we need both a cmodx fence and a loadload
259         // fence. It's not super cheap, so we use a global epoch mechanism
260         // to hide them in a slow path.
261         // The high level idea of the global epoch mechanism is to detect
262         // when any thread has performed the required fencing, after the
263         // last nmethod was disarmed. This implies that the required
264         // fencing has been performed for all preceding nmethod disarms
265         // as well. Therefore, we do not need any further fencing.
266 
267         __ la(t1, ExternalAddress((address)&_patching_epoch));
268         if (!UseZtso) {
269           // Embed a synthetic data dependency between the load of the guard and
270           // the load of the epoch. This guarantees that these loads occur in
271           // order, while allowing other independent instructions to be reordered.
272           // Note: This may be slower than using a membar(load|load) (fence r,r).
273           // Because processors will not start the second load until the first comes back.
274           // This means you can't overlap the two loads,
275           // which is stronger than needed for ordering (stronger than TSO).
276           __ srli(ra, t0, 32);
277           __ orr(t1, t1, ra);
278         }
279         // Read the global epoch value.
280         __ lwu(t1, t1);
281         // Combine the guard value (low order) with the epoch value (high order).
282         __ slli(t1, t1, 32);
283         __ orr(t0, t0, t1);
284         // Compare the global values with the thread-local values
285         Address thread_disarmed_and_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
286         __ ld(t1, thread_disarmed_and_epoch_addr);
287         break;
288       }
289     default:
290       ShouldNotReachHere();
291   }
292 
293   Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
294   if (slow_path == nullptr) {
295     __ beq(t0, t1, barrier_target, true /* is_far */);
296   } else {
297     __ bne(t0, t1, barrier_target, true /* is_far */);
298   }
299 
300   if (slow_path == nullptr) {
301     __ rt_call(StubRoutines::method_entry_barrier());
302     __ j(skip_barrier);
303 
304     __ bind(local_guard);
305 
306     MacroAssembler::assert_alignment(__ pc());
307     __ emit_int32(0); // nmethod guard value. Skipped over in common case.
308   } else {
309     __ bind(*continuation);
310   }
311 
312   __ bind(skip_barrier);
313 }
314 
315 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
316   Label bad_call;
317   __ beqz(xmethod, bad_call);
318 
319   // Pointer chase to the method holder to find out if the method is concurrently unloading.
320   Label method_live;
321   __ load_method_holder_cld(t0, xmethod);
322 
323   // Is it a strong CLD?
324   __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_ref_count_offset()));
325   __ bnez(t1, method_live);
326 
327   // Is it a weak but alive CLD?
328   __ push_reg(RegSet::of(x28), sp);
329 
330   __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
331 
332   __ resolve_weak_handle(x28, t0, t1);
333   __ mv(t0, x28);
334 
335   __ pop_reg(RegSet::of(x28), sp);
336 
337   __ bnez(t0, method_live);
338 
339   __ bind(bad_call);
340 
341   __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
342   __ bind(method_live);
343 }
344 
345 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
346   // Check if the oop is in the right area of memory
347   __ mv(tmp2, (intptr_t) Universe::verify_oop_mask());
348   __ andr(tmp1, obj, tmp2);
349   __ mv(tmp2, (intptr_t) Universe::verify_oop_bits());
350 
351   // Compare tmp1 and tmp2.
352   __ bne(tmp1, tmp2, error);
353 
354   // Make sure klass is 'reasonable', which is not zero.
355   __ load_klass(obj, obj, tmp1); // get klass
356   __ beqz(obj, error);           // if klass is null it is broken
357 }
358 
359 void BarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
360                                                           Register tmp, Label& slow_path) {
361   // Load the oop from the weak handle without barriers.
362   __ ld(obj, Address(weak_handle));
363 }
364 
365 #ifdef COMPILER2
366 
367 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
368   if (!OptoReg::is_reg(opto_reg)) {
369     return OptoReg::Bad;
370   }
371 
372   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
373   if (vm_reg->is_FloatRegister()) {
374     return opto_reg & ~1;
375   }
376 
377   return opto_reg;
378 }
379 #undef __
380 #define __ _masm->
381 
382 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
383   // Record registers that needs to be saved/restored
384   RegMaskIterator rmi(stub->preserve_set());
385   while (rmi.has_next()) {
386     const OptoReg::Name opto_reg = rmi.next();
387     if (OptoReg::is_reg(opto_reg)) {
388       const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
389       if (vm_reg->is_Register()) {
390         _gp_regs += RegSet::of(vm_reg->as_Register());
391       } else if (vm_reg->is_FloatRegister()) {
392         _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
393       } else if (vm_reg->is_VectorRegister()) {
394         const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegister::max_slots_per_register - 1));
395         _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
396       } else {
397         fatal("Unknown register type");
398       }
399     }
400   }
401 
402   // Remove C-ABI SOE registers and tmp regs
403   _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2, x5) + RegSet::of(x8, x9);
404 }
405 
406 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
407   : _masm(masm),
408     _gp_regs(),
409     _fp_regs(),
410     _vp_regs() {
411   // Figure out what registers to save/restore
412   initialize(stub);
413 
414   // Save registers
415   __ push_reg(_gp_regs, sp);
416   __ push_fp(_fp_regs, sp);
417   __ push_v(_vp_regs, sp);
418 }
419 
420 SaveLiveRegisters::~SaveLiveRegisters() {
421   // Restore registers
422   __ pop_v(_vp_regs, sp);
423   __ pop_fp(_fp_regs, sp);
424   __ pop_reg(_gp_regs, sp);
425 }
426 
427 #endif // COMPILER2