1 /*
  2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "classfile/classLoaderData.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/barrierSetAssembler.hpp"
 29 #include "gc/shared/barrierSetNMethod.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "interpreter/interp_masm.hpp"
 32 #include "memory/universe.hpp"
 33 #include "runtime/javaThread.hpp"
 34 #include "runtime/jniHandles.hpp"
 35 #include "runtime/sharedRuntime.hpp"
 36 #include "runtime/stubRoutines.hpp"
 37 #ifdef COMPILER2
 38 #include "code/vmreg.inline.hpp"
 39 #include "gc/shared/c2/barrierSetC2.hpp"
 40 #endif // COMPILER2
 41 
 42 
 43 #define __ masm->
 44 
 45 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 46                                   Register dst, Address src, Register tmp1, Register tmp2) {
 47 
 48   // LR is live.  It must be saved around calls.
 49 
 50   bool in_heap = (decorators & IN_HEAP) != 0;
 51   bool in_native = (decorators & IN_NATIVE) != 0;
 52   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 53   switch (type) {
 54   case T_OBJECT:
 55   case T_ARRAY: {
 56     if (in_heap) {
 57       if (UseCompressedOops) {
 58         __ ldrw(dst, src);
 59         if (is_not_null) {
 60           __ decode_heap_oop_not_null(dst);
 61         } else {
 62           __ decode_heap_oop(dst);
 63         }
 64       } else {
 65         __ ldr(dst, src);
 66       }
 67     } else {
 68       assert(in_native, "why else?");
 69       __ ldr(dst, src);
 70     }
 71     break;
 72   }
 73   case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
 74   case T_BYTE:    __ load_signed_byte   (dst, src); break;
 75   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 76   case T_SHORT:   __ load_signed_short  (dst, src); break;
 77   case T_INT:     __ ldrw               (dst, src); break;
 78   case T_LONG:    __ ldr                (dst, src); break;
 79   case T_ADDRESS: __ ldr                (dst, src); break;
 80   case T_FLOAT:   __ ldrs               (v0, src);  break;
 81   case T_DOUBLE:  __ ldrd               (v0, src);  break;
 82   default: Unimplemented();
 83   }
 84 }
 85 
 86 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 87                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
 88   bool in_heap = (decorators & IN_HEAP) != 0;
 89   bool in_native = (decorators & IN_NATIVE) != 0;
 90   switch (type) {
 91   case T_OBJECT:
 92   case T_ARRAY: {
 93     val = val == noreg ? zr : val;
 94     if (in_heap) {
 95       if (UseCompressedOops) {
 96         assert(!dst.uses(val), "not enough registers");
 97         if (val != zr) {
 98           __ encode_heap_oop(val);
 99         }
100         __ strw(val, dst);
101       } else {
102         __ str(val, dst);
103       }
104     } else {
105       assert(in_native, "why else?");
106       __ str(val, dst);
107     }
108     break;
109   }
110   case T_BOOLEAN:
111     __ andw(val, val, 0x1);  // boolean is true if LSB is 1
112     __ strb(val, dst);
113     break;
114   case T_BYTE:    __ strb(val, dst); break;
115   case T_CHAR:    __ strh(val, dst); break;
116   case T_SHORT:   __ strh(val, dst); break;
117   case T_INT:     __ strw(val, dst); break;
118   case T_LONG:    __ str (val, dst); break;
119   case T_ADDRESS: __ str (val, dst); break;
120   case T_FLOAT:   __ strs(v0,  dst); break;
121   case T_DOUBLE:  __ strd(v0,  dst); break;
122   default: Unimplemented();
123   }
124 }
125 
126 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
127                                        DecoratorSet decorators,
128                                        BasicType type,
129                                        size_t bytes,
130                                        Register dst1,
131                                        Register dst2,
132                                        Address src,
133                                        Register tmp) {
134   if (bytes == 1) {
135     assert(dst2 == noreg, "invariant");
136     __ ldrb(dst1, src);
137   } else if (bytes == 2) {
138     assert(dst2 == noreg, "invariant");
139     __ ldrh(dst1, src);
140   } else if (bytes == 4) {
141     assert(dst2 == noreg, "invariant");
142     __ ldrw(dst1, src);
143   } else if (bytes == 8) {
144     assert(dst2 == noreg, "invariant");
145     __ ldr(dst1, src);
146   } else if (bytes == 16) {
147     assert(dst2 != noreg, "invariant");
148     assert(dst2 != dst1, "invariant");
149     __ ldp(dst1, dst2, src);
150   } else {
151     // Not the right size
152     ShouldNotReachHere();
153   }
154   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
155     __ decode_heap_oop(dst1);
156   }
157 }
158 
159 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
160                                         DecoratorSet decorators,
161                                         BasicType type,
162                                         size_t bytes,
163                                         Address dst,
164                                         Register src1,
165                                         Register src2,
166                                         Register tmp1,
167                                         Register tmp2,
168                                         Register tmp3) {
169   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
170     __ encode_heap_oop(src1);
171   }
172   if (bytes == 1) {
173     assert(src2 == noreg, "invariant");
174     __ strb(src1, dst);
175   } else if (bytes == 2) {
176     assert(src2 == noreg, "invariant");
177     __ strh(src1, dst);
178   } else if (bytes == 4) {
179     assert(src2 == noreg, "invariant");
180     __ strw(src1, dst);
181   } else if (bytes == 8) {
182     assert(src2 == noreg, "invariant");
183     __ str(src1, dst);
184   } else if (bytes == 16) {
185     assert(src2 != noreg, "invariant");
186     assert(src2 != src1, "invariant");
187     __ stp(src1, src2, dst);
188   } else {
189     // Not the right size
190     ShouldNotReachHere();
191   }
192 }
193 
194 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
195                                        DecoratorSet decorators,
196                                        BasicType type,
197                                        size_t bytes,
198                                        FloatRegister dst1,
199                                        FloatRegister dst2,
200                                        Address src,
201                                        Register tmp1,
202                                        Register tmp2,
203                                        FloatRegister vec_tmp) {
204   if (bytes == 32) {
205     __ ldpq(dst1, dst2, src);
206   } else {
207     ShouldNotReachHere();
208   }
209 }
210 
211 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
212                                         DecoratorSet decorators,
213                                         BasicType type,
214                                         size_t bytes,
215                                         Address dst,
216                                         FloatRegister src1,
217                                         FloatRegister src2,
218                                         Register tmp1,
219                                         Register tmp2,
220                                         Register tmp3,
221                                         FloatRegister vec_tmp1,
222                                         FloatRegister vec_tmp2,
223                                         FloatRegister vec_tmp3) {
224   if (bytes == 32) {
225     __ stpq(src1, src2, dst);
226   } else {
227     ShouldNotReachHere();
228   }
229 }
230 
231 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
232                                                         Register obj, Register tmp, Label& slowpath) {
233   // If mask changes we need to ensure that the inverse is still encodable as an immediate
234   STATIC_ASSERT(JNIHandles::tag_mask == 0b11);
235   __ andr(obj, obj, ~JNIHandles::tag_mask);
236   __ ldr(obj, Address(obj, 0));             // *obj
237 }
238 
239 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
240 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
241                                         Register var_size_in_bytes,
242                                         int con_size_in_bytes,
243                                         Register t1,
244                                         Register t2,
245                                         Label& slow_case) {
246   assert_different_registers(obj, t2);
247   assert_different_registers(obj, var_size_in_bytes);
248   Register end = t2;
249 
250   // verify_tlab();
251 
252   __ ldr(obj, Address(rthread, JavaThread::tlab_top_offset()));
253   if (var_size_in_bytes == noreg) {
254     __ lea(end, Address(obj, con_size_in_bytes));
255   } else {
256     __ lea(end, Address(obj, var_size_in_bytes));
257   }
258   __ ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset()));
259   __ cmp(end, rscratch1);
260   __ br(Assembler::HI, slow_case);
261 
262   // update the tlab top pointer
263   __ str(end, Address(rthread, JavaThread::tlab_top_offset()));
264 
265   // recover var_size_in_bytes if necessary
266   if (var_size_in_bytes == end) {
267     __ sub(var_size_in_bytes, var_size_in_bytes, obj);
268   }
269   // verify_tlab();
270 }
271 
272 static volatile uint32_t _patching_epoch = 0;
273 
274 address BarrierSetAssembler::patching_epoch_addr() {
275   return (address)&_patching_epoch;
276 }
277 
278 void BarrierSetAssembler::increment_patching_epoch() {
279   Atomic::inc(&_patching_epoch);
280 }
281 
282 void BarrierSetAssembler::clear_patching_epoch() {
283   _patching_epoch = 0;
284 }
285 
286 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
287   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
288 
289   if (bs_nm == nullptr) {
290     return;
291   }
292 
293   Label local_guard;
294   Label skip_barrier;
295   NMethodPatchingType patching_type = nmethod_patching_type();
296 
297   if (slow_path == nullptr) {
298     guard = &local_guard;
299   }
300 
301   // If the slow path is out of line in a stub, we flip the condition
302   Assembler::Condition condition = slow_path == nullptr ? Assembler::EQ : Assembler::NE;
303   Label& barrier_target = slow_path == nullptr ? skip_barrier : *slow_path;
304 
305   __ ldrw(rscratch1, *guard);
306 
307   if (patching_type == NMethodPatchingType::stw_instruction_and_data_patch) {
308     // With STW patching, no data or instructions are updated concurrently,
309     // which means there isn't really any need for any fencing for neither
310     // data nor instruction modifications happening concurrently. The
311     // instruction patching is handled with isb fences on the way back
312     // from the safepoint to Java. So here we can do a plain conditional
313     // branch with no fencing.
314     Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
315     __ ldrw(rscratch2, thread_disarmed_addr);
316     __ cmp(rscratch1, rscratch2);
317   } else if (patching_type == NMethodPatchingType::conc_instruction_and_data_patch) {
318     // If we patch code we need both a code patching and a loadload
319     // fence. It's not super cheap, so we use a global epoch mechanism
320     // to hide them in a slow path.
321     // The high level idea of the global epoch mechanism is to detect
322     // when any thread has performed the required fencing, after the
323     // last nmethod was disarmed. This implies that the required
324     // fencing has been performed for all preceding nmethod disarms
325     // as well. Therefore, we do not need any further fencing.
326     __ lea(rscratch2, ExternalAddress((address)&_patching_epoch));
327     // Embed an artificial data dependency to order the guard load
328     // before the epoch load.
329     __ orr(rscratch2, rscratch2, rscratch1, Assembler::LSR, 32);
330     // Read the global epoch value.
331     __ ldrw(rscratch2, rscratch2);
332     // Combine the guard value (low order) with the epoch value (high order).
333     __ orr(rscratch1, rscratch1, rscratch2, Assembler::LSL, 32);
334     // Compare the global values with the thread-local values.
335     Address thread_disarmed_and_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
336     __ ldr(rscratch2, thread_disarmed_and_epoch_addr);
337     __ cmp(rscratch1, rscratch2);
338   } else {
339     assert(patching_type == NMethodPatchingType::conc_data_patch, "must be");
340     // Subsequent loads of oops must occur after load of guard value.
341     // BarrierSetNMethod::disarm sets guard with release semantics.
342     __ membar(__ LoadLoad);
343     Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
344     __ ldrw(rscratch2, thread_disarmed_addr);
345     __ cmpw(rscratch1, rscratch2);
346   }
347   __ br(condition, barrier_target);
348 
349   if (slow_path == nullptr) {
350     __ lea(rscratch1, RuntimeAddress(StubRoutines::method_entry_barrier()));
351     __ blr(rscratch1);
352     __ b(skip_barrier);
353 
354     __ bind(local_guard);
355 
356     __ emit_int32(0);   // nmethod guard value. Skipped over in common case.
357   } else {
358     __ bind(*continuation);
359   }
360 
361   __ bind(skip_barrier);
362 }
363 
364 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
365   BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
366   if (bs == nullptr) {
367     return;
368   }
369 
370   Label bad_call;
371   __ cbz(rmethod, bad_call);
372 
373   // Pointer chase to the method holder to find out if the method is concurrently unloading.
374   Label method_live;
375   __ load_method_holder_cld(rscratch1, rmethod);
376 
377   // Is it a strong CLD?
378   __ ldrw(rscratch2, Address(rscratch1, ClassLoaderData::keep_alive_ref_count_offset()));
379   __ cbnz(rscratch2, method_live);
380 
381   // Is it a weak but alive CLD?
382   __ push(RegSet::of(r10), sp);
383   __ ldr(r10, Address(rscratch1, ClassLoaderData::holder_offset()));
384 
385   __ resolve_weak_handle(r10, rscratch1, rscratch2);
386   __ mov(rscratch1, r10);
387   __ pop(RegSet::of(r10), sp);
388   __ cbnz(rscratch1, method_live);
389 
390   __ bind(bad_call);
391 
392   __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
393   __ bind(method_live);
394 }
395 
396 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
397   // Check if the oop is in the right area of memory
398   __ mov(tmp2, (intptr_t) Universe::verify_oop_mask());
399   __ andr(tmp1, obj, tmp2);
400   __ mov(tmp2, (intptr_t) Universe::verify_oop_bits());
401 
402   // Compare tmp1 and tmp2.  We don't use a compare
403   // instruction here because the flags register is live.
404   __ eor(tmp1, tmp1, tmp2);
405   __ cbnz(tmp1, error);
406 
407   // make sure klass is 'reasonable', which is not zero.
408   __ load_klass(obj, obj); // get klass
409   __ cbz(obj, error);      // if klass is null it is broken
410 }
411 
412 #ifdef COMPILER2
413 
414 OptoReg::Name BarrierSetAssembler::encode_float_vector_register_size(const Node* node, OptoReg::Name opto_reg) {
415   switch (node->ideal_reg()) {
416     case Op_RegF:
417       // No need to refine. The original encoding is already fine to distinguish.
418       assert(opto_reg % 4 == 0, "Float register should only occupy a single slot");
419       break;
420     // Use different encoding values of the same fp/vector register to help distinguish different sizes.
421     // Such as V16. The OptoReg::name and its corresponding slot value are
422     // "V16": 64, "V16_H": 65, "V16_J": 66, "V16_K": 67.
423     case Op_RegD:
424     case Op_VecD:
425       opto_reg &= ~3;
426       opto_reg |= 1;
427       break;
428     case Op_VecX:
429       opto_reg &= ~3;
430       opto_reg |= 2;
431       break;
432     case Op_VecA:
433       opto_reg &= ~3;
434       opto_reg |= 3;
435       break;
436     default:
437       assert(false, "unexpected ideal register");
438       ShouldNotReachHere();
439   }
440   return opto_reg;
441 }
442 
443 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
444   if (!OptoReg::is_reg(opto_reg)) {
445     return OptoReg::Bad;
446   }
447 
448   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
449   if (vm_reg->is_FloatRegister()) {
450     opto_reg = encode_float_vector_register_size(node, opto_reg);
451   }
452 
453   return opto_reg;
454 }
455 
456 #undef __
457 #define __ _masm->
458 
459 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
460   int index = -1;
461   GrowableArray<RegisterData> registers;
462   VMReg prev_vm_reg = VMRegImpl::Bad();
463 
464   RegMaskIterator rmi(stub->preserve_set());
465   while (rmi.has_next()) {
466     OptoReg::Name opto_reg = rmi.next();
467     VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
468 
469     if (vm_reg->is_Register()) {
470       // GPR may have one or two slots in regmask
471       // Determine whether the current vm_reg is the same physical register as the previous one
472       if (is_same_register(vm_reg, prev_vm_reg)) {
473         registers.at(index)._slots++;
474       } else {
475         RegisterData reg_data = { vm_reg, 1 };
476         index = registers.append(reg_data);
477       }
478     } else if (vm_reg->is_FloatRegister()) {
479       // We have size encoding in OptoReg of stub->preserve_set()
480       // After encoding, float/neon/sve register has only one slot in regmask
481       // Decode it to get the actual size
482       VMReg vm_reg_base = vm_reg->as_FloatRegister()->as_VMReg();
483       int slots = decode_float_vector_register_size(opto_reg);
484       RegisterData reg_data = { vm_reg_base, slots };
485       index = registers.append(reg_data);
486     } else if (vm_reg->is_PRegister()) {
487       // PRegister has only one slot in regmask
488       RegisterData reg_data = { vm_reg, 1 };
489       index = registers.append(reg_data);
490     } else {
491       assert(false, "Unknown register type");
492       ShouldNotReachHere();
493     }
494     prev_vm_reg = vm_reg;
495   }
496 
497   // Record registers that needs to be saved/restored
498   for (GrowableArrayIterator<RegisterData> it = registers.begin(); it != registers.end(); ++it) {
499     RegisterData reg_data = *it;
500     VMReg vm_reg = reg_data._reg;
501     int slots = reg_data._slots;
502     if (vm_reg->is_Register()) {
503       assert(slots == 1 || slots == 2, "Unexpected register save size");
504       _gp_regs += RegSet::of(vm_reg->as_Register());
505     } else if (vm_reg->is_FloatRegister()) {
506       if (slots == 1 || slots == 2) {
507         _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
508       } else if (slots == 4) {
509         _neon_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
510       } else {
511         assert(slots == Matcher::scalable_vector_reg_size(T_FLOAT), "Unexpected register save size");
512         _sve_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
513       }
514     } else {
515       assert(vm_reg->is_PRegister() && slots == 1, "Unknown register type");
516       _p_regs += PRegSet::of(vm_reg->as_PRegister());
517     }
518   }
519 
520   // Remove C-ABI SOE registers and scratch regs
521   _gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9);
522 
523   // Remove C-ABI SOE fp registers
524   _fp_regs -= FloatRegSet::range(v8, v15);
525 }
526 
527 enum RC SaveLiveRegisters::rc_class(VMReg reg) {
528   if (reg->is_reg()) {
529     if (reg->is_Register()) {
530       return rc_int;
531     } else if (reg->is_FloatRegister()) {
532       return rc_float;
533     } else if (reg->is_PRegister()) {
534       return rc_predicate;
535     }
536   }
537   if (reg->is_stack()) {
538     return rc_stack;
539   }
540   return rc_bad;
541 }
542 
543 bool SaveLiveRegisters::is_same_register(VMReg reg1, VMReg reg2) {
544   if (reg1 == reg2) {
545     return true;
546   }
547   if (rc_class(reg1) == rc_class(reg2)) {
548     if (reg1->is_Register()) {
549       return reg1->as_Register() == reg2->as_Register();
550     } else if (reg1->is_FloatRegister()) {
551       return reg1->as_FloatRegister() == reg2->as_FloatRegister();
552     } else if (reg1->is_PRegister()) {
553       return reg1->as_PRegister() == reg2->as_PRegister();
554     }
555   }
556   return false;
557 }
558 
559 int SaveLiveRegisters::decode_float_vector_register_size(OptoReg::Name opto_reg) {
560   switch (opto_reg & 3) {
561     case 0:
562       return 1;
563     case 1:
564       return 2;
565     case 2:
566       return 4;
567     case 3:
568       return Matcher::scalable_vector_reg_size(T_FLOAT);
569     default:
570       ShouldNotReachHere();
571       return 0;
572   }
573 }
574 
575 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
576   : _masm(masm),
577     _gp_regs(),
578     _fp_regs(),
579     _neon_regs(),
580     _sve_regs(),
581     _p_regs() {
582 
583   // Figure out what registers to save/restore
584   initialize(stub);
585 
586   // Save registers
587   __ push(_gp_regs, sp);
588   __ push_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
589   __ push_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
590   __ push_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
591   __ push_p(_p_regs, sp);
592 }
593 
594 SaveLiveRegisters::~SaveLiveRegisters() {
595   // Restore registers
596   __ pop_p(_p_regs, sp);
597   __ pop_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
598   __ pop_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
599   __ pop_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
600 
601   // External runtime call may clobber ptrue reg
602   __ reinitialize_ptrue();
603 
604   __ pop(_gp_regs, sp);
605 }
606 
607 #endif // COMPILER2