1 /*
  2  * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "classfile/classLoaderData.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/barrierSetAssembler.hpp"
 29 #include "gc/shared/barrierSetNMethod.hpp"
 30 #include "gc/shared/barrierSetRuntime.hpp"
 31 #include "gc/shared/collectedHeap.hpp"
 32 #include "interpreter/interp_masm.hpp"
 33 #include "memory/universe.hpp"
 34 #include "runtime/javaThread.hpp"
 35 #include "runtime/jniHandles.hpp"
 36 #include "runtime/sharedRuntime.hpp"
 37 #include "runtime/stubRoutines.hpp"
 38 #ifdef COMPILER2
 39 #include "gc/shared/c2/barrierSetC2.hpp"
 40 #endif // COMPILER2
 41 
 42 #define __ masm->
 43 
 44 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 45                                   Register dst, Address src, Register tmp1, Register tmp2) {
 46   // RA is live. It must be saved around calls.
 47 
 48   bool in_heap = (decorators & IN_HEAP) != 0;
 49   bool in_native = (decorators & IN_NATIVE) != 0;
 50   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 51   switch (type) {
 52     case T_OBJECT:  // fall through
 53     case T_ARRAY: {
 54       if (in_heap) {
 55         if (UseCompressedOops) {
 56           __ lwu(dst, src);
 57           if (is_not_null) {
 58             __ decode_heap_oop_not_null(dst);
 59           } else {
 60             __ decode_heap_oop(dst);
 61           }
 62         } else {
 63           __ ld(dst, src);
 64         }
 65       } else {
 66         assert(in_native, "why else?");
 67         __ ld(dst, src);
 68       }
 69       break;
 70     }
 71     case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
 72     case T_BYTE:    __ load_signed_byte   (dst, src); break;
 73     case T_CHAR:    __ load_unsigned_short(dst, src); break;
 74     case T_SHORT:   __ load_signed_short  (dst, src); break;
 75     case T_INT:     __ lw                 (dst, src); break;
 76     case T_LONG:    __ ld                 (dst, src); break;
 77     case T_ADDRESS: __ ld                 (dst, src); break;
 78     case T_FLOAT:   __ flw                (f10, src); break;
 79     case T_DOUBLE:  __ fld                (f10, src); break;
 80     default: Unimplemented();
 81   }
 82 }
 83 
 84 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 85                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 86   bool in_heap = (decorators & IN_HEAP) != 0;
 87   bool in_native = (decorators & IN_NATIVE) != 0;
 88   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 89 
 90   switch (type) {
 91     case T_OBJECT: // fall through
 92     case T_ARRAY: {
 93       if (in_heap) {
 94         if (val == noreg) {
 95           assert(!is_not_null, "inconsistent access");
 96           if (UseCompressedOops) {
 97             __ sw(zr, dst);
 98           } else {
 99             __ sd(zr, dst);
100           }
101         } else {
102           if (UseCompressedOops) {
103             assert(!dst.uses(val), "not enough registers");
104             if (is_not_null) {
105               __ encode_heap_oop_not_null(val);
106             } else {
107               __ encode_heap_oop(val);
108             }
109             __ sw(val, dst);
110           } else {
111             __ sd(val, dst);
112           }
113         }
114       } else {
115         assert(in_native, "why else?");
116         assert(val != noreg, "not supported");
117         __ sd(val, dst);
118       }
119       break;
120     }
121     case T_BOOLEAN:
122       __ andi(val, val, 0x1);  // boolean is true if LSB is 1
123       __ sb(val, dst);
124       break;
125     case T_BYTE:    __ sb(val, dst); break;
126     case T_CHAR:    __ sh(val, dst); break;
127     case T_SHORT:   __ sh(val, dst); break;
128     case T_INT:     __ sw(val, dst); break;
129     case T_LONG:    __ sd(val, dst); break;
130     case T_ADDRESS: __ sd(val, dst); break;
131     case T_FLOAT:   __ fsw(f10,  dst); break;
132     case T_DOUBLE:  __ fsd(f10,  dst); break;
133     default: Unimplemented();
134   }
135 
136 }
137 
138 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
139                                           Register src, Register dst, Register inline_layout_info) {
140   // flat_field_copy implementation is fairly complex, and there are not any
141   // "short-cuts" to be made from asm. What there is, appears to have the same
142   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
143   // of hand-rolled instructions...
144   if (decorators & IS_DEST_UNINITIALIZED) {
145     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info);
146   } else {
147     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info);
148   }
149 }
150 
151 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
152                                        DecoratorSet decorators,
153                                        BasicType type,
154                                        size_t bytes,
155                                        Register dst,
156                                        Address src,
157                                        Register tmp) {
158   if (bytes == 1) {
159     __ lbu(dst, src);
160   } else if (bytes == 2) {
161     __ lhu(dst, src);
162   } else if (bytes == 4) {
163     __ lwu(dst, src);
164   } else if (bytes == 8) {
165     __ ld(dst, src);
166   } else {
167     // Not the right size
168     ShouldNotReachHere();
169   }
170   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
171     __ decode_heap_oop(dst);
172   }
173 }
174 
175 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
176                                         DecoratorSet decorators,
177                                         BasicType type,
178                                         size_t bytes,
179                                         Address dst,
180                                         Register src,
181                                         Register tmp1,
182                                         Register tmp2,
183                                         Register tmp3) {
184   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
185     __ encode_heap_oop(src);
186   }
187 
188   if (bytes == 1) {
189     __ sb(src, dst);
190   } else if (bytes == 2) {
191     __ sh(src, dst);
192   } else if (bytes == 4) {
193     __ sw(src, dst);
194   } else if (bytes == 8) {
195     __ sd(src, dst);
196   } else {
197     // Not the right size
198     ShouldNotReachHere();
199   }
200 }
201 
202 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
203                                                         Register obj, Register tmp, Label& slowpath) {
204   // If mask changes we need to ensure that the inverse is still encodable as an immediate
205   STATIC_ASSERT(JNIHandles::tag_mask == 3);
206   __ andi(obj, obj, ~JNIHandles::tag_mask);
207   __ ld(obj, Address(obj, 0));             // *obj
208 }
209 
210 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
211 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
212                                         Register var_size_in_bytes,
213                                         int con_size_in_bytes,
214                                         Register tmp1,
215                                         Register tmp2,
216                                         Label& slow_case,
217                                         bool is_far) {
218   assert_different_registers(obj, tmp2);
219   assert_different_registers(obj, var_size_in_bytes);
220   Register end = tmp2;
221 
222   __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
223   if (var_size_in_bytes == noreg) {
224     __ la(end, Address(obj, con_size_in_bytes));
225   } else {
226     __ add(end, obj, var_size_in_bytes);
227   }
228   __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
229   __ bgtu(end, t0, slow_case, is_far);
230 
231   // update the tlab top pointer
232   __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
233 
234   // recover var_size_in_bytes if necessary
235   if (var_size_in_bytes == end) {
236     __ sub(var_size_in_bytes, var_size_in_bytes, obj);
237   }
238 }
239 
240 static volatile uint32_t _patching_epoch = 0;
241 
242 address BarrierSetAssembler::patching_epoch_addr() {
243   return (address)&_patching_epoch;
244 }
245 
246 void BarrierSetAssembler::increment_patching_epoch() {
247   AtomicAccess::inc(&_patching_epoch);
248 }
249 
250 void BarrierSetAssembler::clear_patching_epoch() {
251   _patching_epoch = 0;
252 }
253 
254 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
255   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
256   Assembler::IncompressibleScope scope(masm); // Fixed length: see entry_barrier_offset()
257 
258   Label local_guard, skip_barrier;
259   NMethodPatchingType patching_type = nmethod_patching_type();
260 
261   if (slow_path == nullptr) {
262     guard = &local_guard;
263 
264     // RISCV atomic operations require that the memory address be naturally aligned.
265     __ align(4);
266   }
267 
268   __ lwu(t0, *guard);
269 
270   switch (patching_type) {
271     case NMethodPatchingType::stw_instruction_and_data_patch:
272       {
273         // With STW patching, no data or instructions are updated concurrently,
274         // which means there isn't really any need for any fencing for neither
275         // data nor instruction modification happening concurrently. The
276         // instruction patching is synchronized with global icache_flush() by
277         // the write hart on riscv. So here we can do a plain conditional
278         // branch with no fencing.
279         Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
280         __ lwu(t1, thread_disarmed_addr);
281         break;
282       }
283     case NMethodPatchingType::conc_instruction_and_data_patch:
284       {
285         // If we patch code we need both a cmodx fence and a loadload
286         // fence. It's not super cheap, so we use a global epoch mechanism
287         // to hide them in a slow path.
288         // The high level idea of the global epoch mechanism is to detect
289         // when any thread has performed the required fencing, after the
290         // last nmethod was disarmed. This implies that the required
291         // fencing has been performed for all preceding nmethod disarms
292         // as well. Therefore, we do not need any further fencing.
293 
294         __ la(t1, ExternalAddress((address)&_patching_epoch));
295         if (!UseZtso) {
296           // Embed a synthetic data dependency between the load of the guard and
297           // the load of the epoch. This guarantees that these loads occur in
298           // order, while allowing other independent instructions to be reordered.
299           // Note: This may be slower than using a membar(load|load) (fence r,r).
300           // Because processors will not start the second load until the first comes back.
301           // This means you can't overlap the two loads,
302           // which is stronger than needed for ordering (stronger than TSO).
303           __ srli(ra, t0, 32);
304           __ orr(t1, t1, ra);
305         }
306         // Read the global epoch value.
307         __ lwu(t1, t1);
308         // Combine the guard value (low order) with the epoch value (high order).
309         __ slli(t1, t1, 32);
310         __ orr(t0, t0, t1);
311         // Compare the global values with the thread-local values
312         Address thread_disarmed_and_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
313         __ ld(t1, thread_disarmed_and_epoch_addr);
314         break;
315       }
316     default:
317       ShouldNotReachHere();
318   }
319 
320   Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
321   if (slow_path == nullptr) {
322     __ beq(t0, t1, barrier_target, /* is_far */ true);
323   } else {
324     __ bne(t0, t1, barrier_target, /* is_far */ true);
325   }
326 
327   if (slow_path == nullptr) {
328     __ rt_call(StubRoutines::method_entry_barrier());
329     __ j(skip_barrier);
330 
331     __ bind(local_guard);
332 
333     MacroAssembler::assert_alignment(__ pc());
334     __ emit_int32(0); // nmethod guard value. Skipped over in common case.
335   } else {
336     __ bind(*continuation);
337   }
338 
339   __ bind(skip_barrier);
340 }
341 
342 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
343   Label bad_call;
344   __ beqz(xmethod, bad_call);
345 
346   // Pointer chase to the method holder to find out if the method is concurrently unloading.
347   Label method_live;
348   __ load_method_holder_cld(t0, xmethod);
349 
350   // Is it a strong CLD?
351   __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_ref_count_offset()));
352   __ bnez(t1, method_live);
353 
354   // Is it a weak but alive CLD?
355   __ push_reg(RegSet::of(x28), sp);
356 
357   __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
358 
359   __ resolve_weak_handle(x28, t0, t1);
360   __ mv(t0, x28);
361 
362   __ pop_reg(RegSet::of(x28), sp);
363 
364   __ bnez(t0, method_live);
365 
366   __ bind(bad_call);
367 
368   __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
369   __ bind(method_live);
370 }
371 
372 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
373   // Check if the oop is in the right area of memory
374   __ mv(tmp2, (intptr_t) Universe::verify_oop_mask());
375   __ andr(tmp1, obj, tmp2);
376   __ mv(tmp2, (intptr_t) Universe::verify_oop_bits());
377 
378   // Compare tmp1 and tmp2.
379   __ bne(tmp1, tmp2, error);
380 
381   // Make sure klass is 'reasonable', which is not zero.
382   __ load_klass(obj, obj, tmp1); // get klass
383   __ beqz(obj, error);           // if klass is null it is broken
384 }
385 
386 void BarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
387                                                           Register tmp, Label& slow_path) {
388   // Load the oop from the weak handle without barriers.
389   __ ld(obj, Address(weak_handle));
390 }
391 
392 #ifdef COMPILER2
393 
394 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
395   if (!OptoReg::is_reg(opto_reg)) {
396     return OptoReg::Bad;
397   }
398 
399   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
400   if (vm_reg->is_FloatRegister()) {
401     return opto_reg & ~1;
402   }
403 
404   return opto_reg;
405 }
406 #undef __
407 #define __ _masm->
408 
409 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
410   // Record registers that needs to be saved/restored
411   RegMaskIterator rmi(stub->preserve_set());
412   while (rmi.has_next()) {
413     const OptoReg::Name opto_reg = rmi.next();
414     if (OptoReg::is_reg(opto_reg)) {
415       const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
416       if (vm_reg->is_Register()) {
417         _gp_regs += RegSet::of(vm_reg->as_Register());
418       } else if (vm_reg->is_FloatRegister()) {
419         _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
420       } else if (vm_reg->is_VectorRegister()) {
421         const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegister::max_slots_per_register - 1));
422         _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
423       } else {
424         fatal("Unknown register type");
425       }
426     }
427   }
428 
429   // Remove C-ABI SOE registers and tmp regs
430   _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2, x5) + RegSet::of(x8, x9);
431 }
432 
433 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
434   : _masm(masm),
435     _gp_regs(),
436     _fp_regs(),
437     _vp_regs() {
438   // Figure out what registers to save/restore
439   initialize(stub);
440 
441   // Save registers
442   __ push_reg(_gp_regs, sp);
443   __ push_fp(_fp_regs, sp);
444   __ push_v(_vp_regs, sp);
445 }
446 
447 SaveLiveRegisters::~SaveLiveRegisters() {
448   // Restore registers
449   __ pop_v(_vp_regs, sp);
450   __ pop_fp(_fp_regs, sp);
451   __ pop_reg(_gp_regs, sp);
452 }
453 
454 #endif // COMPILER2