1 /*
  2  * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "asm/macroAssembler.hpp"
 25 #include "classfile/javaClasses.hpp"
 26 #include "gc/z/c2/zBarrierSetC2.hpp"
 27 #include "gc/z/zBarrierSet.hpp"
 28 #include "gc/z/zBarrierSetAssembler.hpp"
 29 #include "gc/z/zBarrierSetRuntime.hpp"
 30 #include "opto/arraycopynode.hpp"
 31 #include "opto/block.hpp"
 32 #include "opto/compile.hpp"
 33 #include "opto/graphKit.hpp"
 34 #include "opto/machnode.hpp"
 35 #include "opto/macro.hpp"
 36 #include "opto/memnode.hpp"
 37 #include "opto/node.hpp"
 38 #include "opto/output.hpp"
 39 #include "opto/regalloc.hpp"
 40 #include "opto/runtime.hpp"
 41 #include "opto/type.hpp"
 42 #include "utilities/debug.hpp"
 43 #include "utilities/growableArray.hpp"
 44 #include "utilities/macros.hpp"
 45 
 46 template<typename K, typename V, size_t TableSize>
 47 class ZArenaHashtable : public ResourceObj {
 48   class ZArenaHashtableEntry : public ResourceObj {
 49   public:
 50     ZArenaHashtableEntry* _next;
 51     K _key;
 52     V _value;
 53   };
 54 
 55   static const size_t TableMask = TableSize - 1;
 56 
 57   Arena* _arena;
 58   ZArenaHashtableEntry* _table[TableSize];
 59 
 60 public:
 61   class Iterator {
 62     ZArenaHashtable* _table;
 63     ZArenaHashtableEntry* _current_entry;
 64     size_t _current_index;
 65 
 66   public:
 67     Iterator(ZArenaHashtable* table)
 68       : _table(table),
 69         _current_entry(table->_table[0]),
 70         _current_index(0) {
 71       if (_current_entry == nullptr) {
 72         next();
 73       }
 74     }
 75 
 76     bool has_next() { return _current_entry != nullptr; }
 77     K key()         { return _current_entry->_key; }
 78     V value()       { return _current_entry->_value; }
 79 
 80     void next() {
 81       if (_current_entry != nullptr) {
 82         _current_entry = _current_entry->_next;
 83       }
 84       while (_current_entry == nullptr && ++_current_index < TableSize) {
 85         _current_entry = _table->_table[_current_index];
 86       }
 87     }
 88   };
 89 
 90   ZArenaHashtable(Arena* arena)
 91     : _arena(arena),
 92       _table() {
 93     Copy::zero_to_bytes(&_table, sizeof(_table));
 94   }
 95 
 96   void add(K key, V value) {
 97     ZArenaHashtableEntry* entry = new (_arena) ZArenaHashtableEntry();
 98     entry->_key = key;
 99     entry->_value = value;
100     entry->_next = _table[key & TableMask];
101     _table[key & TableMask] = entry;
102   }
103 
104   V* get(K key) const {
105     for (ZArenaHashtableEntry* e = _table[key & TableMask]; e != nullptr; e = e->_next) {
106       if (e->_key == key) {
107         return &(e->_value);
108       }
109     }
110     return nullptr;
111   }
112 
113   Iterator iterator() {
114     return Iterator(this);
115   }
116 };
117 
118 typedef ZArenaHashtable<intptr_t, bool, 4> ZOffsetTable;
119 
120 class ZBarrierSetC2State : public BarrierSetC2State {
121 private:
122   GrowableArray<ZBarrierStubC2*>* _stubs;
123   int                             _trampoline_stubs_count;
124   int                             _stubs_start_offset;
125 
126 public:
127   ZBarrierSetC2State(Arena* arena)
128     : BarrierSetC2State(arena),
129       _stubs(new (arena) GrowableArray<ZBarrierStubC2*>(arena, 8,  0, nullptr)),
130       _trampoline_stubs_count(0),
131       _stubs_start_offset(0) {}
132 
133   GrowableArray<ZBarrierStubC2*>* stubs() {
134     return _stubs;
135   }
136 
137   bool needs_liveness_data(const MachNode* mach) const {
138     // Don't need liveness data for nodes without barriers
139     return mach->barrier_data() != ZBarrierElided;
140   }
141 
142   bool needs_livein_data() const {
143     return true;
144   }
145 
146   void inc_trampoline_stubs_count() {
147     assert(_trampoline_stubs_count != INT_MAX, "Overflow");
148     ++_trampoline_stubs_count;
149   }
150 
151   int trampoline_stubs_count() {
152     return _trampoline_stubs_count;
153   }
154 
155   void set_stubs_start_offset(int offset) {
156     _stubs_start_offset = offset;
157   }
158 
159   int stubs_start_offset() {
160     return _stubs_start_offset;
161   }
162 };
163 
164 static ZBarrierSetC2State* barrier_set_state() {
165   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
166 }
167 
168 void ZBarrierStubC2::register_stub(ZBarrierStubC2* stub) {
169   if (!Compile::current()->output()->in_scratch_emit_size()) {
170     barrier_set_state()->stubs()->append(stub);
171   }
172 }
173 
174 void ZBarrierStubC2::inc_trampoline_stubs_count() {
175   if (!Compile::current()->output()->in_scratch_emit_size()) {
176     barrier_set_state()->inc_trampoline_stubs_count();
177   }
178 }
179 
180 int ZBarrierStubC2::trampoline_stubs_count() {
181   return barrier_set_state()->trampoline_stubs_count();
182 }
183 
184 int ZBarrierStubC2::stubs_start_offset() {
185   return barrier_set_state()->stubs_start_offset();
186 }
187 
188 ZBarrierStubC2::ZBarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
189 
190 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref) {
191   AARCH64_ONLY(fatal("Should use ZLoadBarrierStubC2Aarch64::create"));
192   ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref);
193   register_stub(stub);
194 
195   return stub;
196 }
197 
198 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref)
199   : ZBarrierStubC2(node),
200     _ref_addr(ref_addr),
201     _ref(ref) {
202   assert_different_registers(ref, ref_addr.base());
203   assert_different_registers(ref, ref_addr.index());
204   // The runtime call updates the value of ref, so we should not spill and
205   // reload its outdated value.
206   dont_preserve(ref);
207 }
208 
209 Address ZLoadBarrierStubC2::ref_addr() const {
210   return _ref_addr;
211 }
212 
213 Register ZLoadBarrierStubC2::ref() const {
214   return _ref;
215 }
216 
217 address ZLoadBarrierStubC2::slow_path() const {
218   const uint8_t barrier_data = _node->barrier_data();
219   DecoratorSet decorators = DECORATORS_NONE;
220   if (barrier_data & ZBarrierStrong) {
221     decorators |= ON_STRONG_OOP_REF;
222   }
223   if (barrier_data & ZBarrierWeak) {
224     decorators |= ON_WEAK_OOP_REF;
225   }
226   if (barrier_data & ZBarrierPhantom) {
227     decorators |= ON_PHANTOM_OOP_REF;
228   }
229   if (barrier_data & ZBarrierNoKeepalive) {
230     decorators |= AS_NO_KEEPALIVE;
231   }
232   return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
233 }
234 
235 void ZLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
236   ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, static_cast<ZLoadBarrierStubC2*>(this));
237 }
238 
239 ZStoreBarrierStubC2* ZStoreBarrierStubC2::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive) {
240   AARCH64_ONLY(fatal("Should use ZStoreBarrierStubC2Aarch64::create"));
241   ZStoreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic, is_nokeepalive);
242   register_stub(stub);
243 
244   return stub;
245 }
246 
247 ZStoreBarrierStubC2::ZStoreBarrierStubC2(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer,
248                                          bool is_native, bool is_atomic, bool is_nokeepalive)
249   : ZBarrierStubC2(node),
250     _ref_addr(ref_addr),
251     _new_zaddress(new_zaddress),
252     _new_zpointer(new_zpointer),
253     _is_native(is_native),
254     _is_atomic(is_atomic),
255     _is_nokeepalive(is_nokeepalive) {}
256 
257 Address ZStoreBarrierStubC2::ref_addr() const {
258   return _ref_addr;
259 }
260 
261 Register ZStoreBarrierStubC2::new_zaddress() const {
262   return _new_zaddress;
263 }
264 
265 Register ZStoreBarrierStubC2::new_zpointer() const {
266   return _new_zpointer;
267 }
268 
269 bool ZStoreBarrierStubC2::is_native() const {
270   return _is_native;
271 }
272 
273 bool ZStoreBarrierStubC2::is_atomic() const {
274   return _is_atomic;
275 }
276 
277 bool ZStoreBarrierStubC2::is_nokeepalive() const {
278   return _is_nokeepalive;
279 }
280 
281 void ZStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
282   ZBarrierSet::assembler()->generate_c2_store_barrier_stub(&masm, static_cast<ZStoreBarrierStubC2*>(this));
283 }
284 
285 uint ZBarrierSetC2::estimated_barrier_size(const Node* node) const {
286   uint8_t barrier_data = MemNode::barrier_data(node);
287   assert(barrier_data != 0, "should be a barrier node");
288   uint uncolor_or_color_size = node->is_Load() ? 1 : 2;
289   if ((barrier_data & ZBarrierElided) != 0) {
290     return uncolor_or_color_size;
291   }
292   // A compare and branch corresponds to approximately four fast-path Ideal
293   // nodes (Cmp, Bool, If, If projection). The slow path (If projection and
294   // runtime call) is excluded since the corresponding code is laid out
295   // separately and does not directly affect performance.
296   return uncolor_or_color_size + 4;
297 }
298 
299 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
300   return new (comp_arena) ZBarrierSetC2State(comp_arena);
301 }
302 
303 void ZBarrierSetC2::late_barrier_analysis() const {
304   compute_liveness_at_stubs();
305   analyze_dominating_barriers();
306 }
307 
308 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
309   MacroAssembler masm(&cb);
310   GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
311   barrier_set_state()->set_stubs_start_offset(masm.offset());
312 
313   for (int i = 0; i < stubs->length(); i++) {
314     // Make sure there is enough space in the code buffer
315     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::max_inst_gcstub_size()) && cb.blob() == nullptr) {
316       ciEnv::current()->record_failure("CodeCache is full");
317       return;
318     }
319 
320     stubs->at(i)->emit_code(masm);
321   }
322 
323   masm.flush();
324 }
325 
326 int ZBarrierSetC2::estimate_stub_size() const {
327   Compile* const C = Compile::current();
328   BufferBlob* const blob = C->output()->scratch_buffer_blob();
329   GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
330   int size = 0;
331 
332   for (int i = 0; i < stubs->length(); i++) {
333     CodeBuffer cb(blob->content_begin(), checked_cast<CodeBuffer::csize_t>((address)C->output()->scratch_locs_memory() - blob->content_begin()));
334     MacroAssembler masm(&cb);
335     stubs->at(i)->emit_code(masm);
336     size += cb.insts_size();
337   }
338 
339   return size;
340 }
341 
342 static void set_barrier_data(C2Access& access) {
343   if (!ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
344     return;
345   }
346 
347   if (access.decorators() & C2_TIGHTLY_COUPLED_ALLOC) {
348     access.set_barrier_data(ZBarrierElided);
349     return;
350   }
351 
352   uint8_t barrier_data = 0;
353 
354   if (access.decorators() & ON_PHANTOM_OOP_REF) {
355     barrier_data |= ZBarrierPhantom;
356   } else if (access.decorators() & ON_WEAK_OOP_REF) {
357     barrier_data |= ZBarrierWeak;
358   } else {
359     barrier_data |= ZBarrierStrong;
360   }
361 
362   if (access.decorators() & IN_NATIVE) {
363     barrier_data |= ZBarrierNative;
364   }
365 
366   if (access.decorators() & AS_NO_KEEPALIVE) {
367     barrier_data |= ZBarrierNoKeepalive;
368   }
369 
370   access.set_barrier_data(barrier_data);
371 }
372 
373 Node* ZBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
374   set_barrier_data(access);
375   return BarrierSetC2::store_at_resolved(access, val);
376 }
377 
378 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
379   set_barrier_data(access);
380   return BarrierSetC2::load_at_resolved(access, val_type);
381 }
382 
383 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
384                                                     Node* new_val, const Type* val_type) const {
385   set_barrier_data(access);
386   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
387 }
388 
389 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
390                                                      Node* new_val, const Type* value_type) const {
391   set_barrier_data(access);
392   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
393 }
394 
395 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
396   set_barrier_data(access);
397   return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
398 }
399 
400 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
401                                                     bool is_clone, bool is_clone_instance,
402                                                     ArrayCopyPhase phase) const {
403   if (phase == ArrayCopyPhase::Parsing) {
404     return false;
405   }
406   if (phase == ArrayCopyPhase::Optimization) {
407     return is_clone_instance;
408   }
409   // else ArrayCopyPhase::Expansion
410   return type == T_OBJECT || type == T_ARRAY;
411 }
412 
413 #define XTOP LP64_ONLY(COMMA phase->top())
414 
415 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
416   Node* const src = ac->in(ArrayCopyNode::Src);
417   const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
418 
419   if (ac->is_clone_array() && ary_ptr != nullptr) {
420     BasicType bt = ary_ptr->elem()->array_element_basic_type();
421     if (is_reference_type(bt)) {
422       // Clone object array
423       bt = T_OBJECT;
424     } else {
425       // Clone primitive array
426       bt = T_LONG;
427     }
428 
429     Node* const ctrl = ac->in(TypeFunc::Control);
430     Node* const mem = ac->in(TypeFunc::Memory);
431     Node* const src = ac->in(ArrayCopyNode::Src);
432     Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
433     Node* const dest = ac->in(ArrayCopyNode::Dest);
434     Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
435     Node* length = ac->in(ArrayCopyNode::Length);
436 
437     if (bt == T_OBJECT) {
438       // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset
439       // which 8-byte aligns them to allow for word size copies. Make sure the offsets point
440       // to the first element in the array when cloning object arrays. Otherwise, load
441       // barriers are applied to parts of the header. Also adjust the length accordingly.
442       assert(src_offset == dest_offset, "should be equal");
443       const jlong offset = src_offset->get_long();
444       if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
445         assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers");
446         assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
447         length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
448         src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
449         dest_offset = src_offset;
450       }
451     }
452     Node* const payload_src = phase->basic_plus_adr(src, src_offset);
453     Node* const payload_dst = phase->basic_plus_adr(dest, dest_offset);
454 
455     const char*   copyfunc_name = "arraycopy";
456     const address copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true);
457 
458     const TypePtr* const raw_adr_type = TypeRawPtr::BOTTOM;
459     const TypeFunc* const call_type = OptoRuntime::fast_arraycopy_Type();
460 
461     Node* const call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
462     phase->transform_later(call);
463 
464     phase->igvn().replace_node(ac, call);
465     return;
466   }
467 
468   // Clone instance or array where 'src' is only known to be an object (ary_ptr
469   // is null). This can happen in bytecode generated dynamically to implement
470   // reflective array clones.
471   clone_in_runtime(phase, ac, ZBarrierSetRuntime::clone_addr(), "ZBarrierSetRuntime::clone");
472 }
473 
474 #undef XTOP
475 
476 void ZBarrierSetC2::elide_dominated_barrier(MachNode* mach) const {
477   mach->set_barrier_data(ZBarrierElided);
478 }
479 
480 void ZBarrierSetC2::analyze_dominating_barriers() const {
481   ResourceMark rm;
482   Compile* const C = Compile::current();
483   PhaseCFG* const cfg = C->cfg();
484 
485   Node_List loads;
486   Node_List load_dominators;
487 
488   Node_List stores;
489   Node_List store_dominators;
490 
491   Node_List atomics;
492   Node_List atomic_dominators;
493 
494   // Step 1 - Find accesses and allocations, and track them in lists
495   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
496     const Block* const block = cfg->get_block(i);
497     for (uint j = 0; j < block->number_of_nodes(); ++j) {
498       Node* const node = block->get_node(j);
499       if (node->is_Phi()) {
500         if (is_allocation(node)) {
501           load_dominators.push(node);
502           store_dominators.push(node);
503           // An allocation can't be considered to "dominate" an atomic operation.
504           // For example a CAS requires the memory location to be store-good.
505           // When you have a dominating store or atomic instruction, that is
506           // indeed ensured to be the case. However, as for allocations, the
507           // initialized memory location could be raw null, which isn't store-good.
508         }
509         continue;
510       } else if (!node->is_Mach()) {
511         continue;
512       }
513 
514       MachNode* const mach = node->as_Mach();
515       switch (mach->ideal_Opcode()) {
516       case Op_LoadP:
517         if ((mach->barrier_data() & ZBarrierStrong) != 0 &&
518             (mach->barrier_data() & ZBarrierNoKeepalive) == 0) {
519           loads.push(mach);
520           load_dominators.push(mach);
521         }
522         break;
523       case Op_StoreP:
524         if (mach->barrier_data() != 0) {
525           stores.push(mach);
526           load_dominators.push(mach);
527           store_dominators.push(mach);
528           atomic_dominators.push(mach);
529         }
530         break;
531       case Op_CompareAndExchangeP:
532       case Op_CompareAndSwapP:
533       case Op_GetAndSetP:
534         if (mach->barrier_data() != 0) {
535           atomics.push(mach);
536           load_dominators.push(mach);
537           store_dominators.push(mach);
538           atomic_dominators.push(mach);
539         }
540         break;
541 
542       default:
543         break;
544       }
545     }
546   }
547 
548   // Step 2 - Find dominating accesses or allocations for each access
549   elide_dominated_barriers(loads, load_dominators);
550   elide_dominated_barriers(stores, store_dominators);
551   elide_dominated_barriers(atomics, atomic_dominators);
552 }
553 
554 void ZBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
555   eliminate_gc_barrier_data(node);
556 }
557 
558 void ZBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
559   if (node->is_LoadStore()) {
560     LoadStoreNode* loadstore = node->as_LoadStore();
561     loadstore->set_barrier_data(ZBarrierElided);
562   } else if (node->is_Mem()) {
563     MemNode* mem = node->as_Mem();
564     mem->set_barrier_data(ZBarrierElided);
565   }
566 }
567 
568 #ifndef PRODUCT
569 void ZBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
570   if ((mach->barrier_data() & ZBarrierStrong) != 0) {
571     st->print("strong ");
572   }
573   if ((mach->barrier_data() & ZBarrierWeak) != 0) {
574     st->print("weak ");
575   }
576   if ((mach->barrier_data() & ZBarrierPhantom) != 0) {
577     st->print("phantom ");
578   }
579   if ((mach->barrier_data() & ZBarrierNoKeepalive) != 0) {
580     st->print("nokeepalive ");
581   }
582   if ((mach->barrier_data() & ZBarrierNative) != 0) {
583     st->print("native ");
584   }
585   if ((mach->barrier_data() & ZBarrierElided) != 0) {
586     st->print("elided ");
587   }
588 }
589 #endif // !PRODUCT