1 /*
  2  * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "precompiled.hpp"
 25 #include "asm/macroAssembler.hpp"
 26 #include "classfile/javaClasses.hpp"
 27 #include "gc/z/c2/zBarrierSetC2.hpp"
 28 #include "gc/z/zBarrierSet.hpp"
 29 #include "gc/z/zBarrierSetAssembler.hpp"
 30 #include "gc/z/zBarrierSetRuntime.hpp"
 31 #include "opto/arraycopynode.hpp"
 32 #include "opto/addnode.hpp"
 33 #include "opto/block.hpp"
 34 #include "opto/compile.hpp"
 35 #include "opto/graphKit.hpp"
 36 #include "opto/machnode.hpp"
 37 #include "opto/macro.hpp"
 38 #include "opto/memnode.hpp"
 39 #include "opto/node.hpp"
 40 #include "opto/output.hpp"
 41 #include "opto/regalloc.hpp"
 42 #include "opto/rootnode.hpp"
 43 #include "opto/runtime.hpp"
 44 #include "opto/type.hpp"
 45 #include "utilities/debug.hpp"
 46 #include "utilities/growableArray.hpp"
 47 #include "utilities/macros.hpp"
 48 
 49 template<typename K, typename V, size_t _table_size>
 50 class ZArenaHashtable : public ResourceObj {
 51   class ZArenaHashtableEntry : public ResourceObj {
 52   public:
 53     ZArenaHashtableEntry* _next;
 54     K _key;
 55     V _value;
 56   };
 57 
 58   static const size_t _table_mask = _table_size - 1;
 59 
 60   Arena* _arena;
 61   ZArenaHashtableEntry* _table[_table_size];
 62 
 63 public:
 64   class Iterator {
 65     ZArenaHashtable* _table;
 66     ZArenaHashtableEntry* _current_entry;
 67     size_t _current_index;
 68 
 69   public:
 70     Iterator(ZArenaHashtable* table)
 71       : _table(table),
 72         _current_entry(table->_table[0]),
 73         _current_index(0) {
 74       if (_current_entry == nullptr) {
 75         next();
 76       }
 77     }
 78 
 79     bool has_next() { return _current_entry != nullptr; }
 80     K key()         { return _current_entry->_key; }
 81     V value()       { return _current_entry->_value; }
 82 
 83     void next() {
 84       if (_current_entry != nullptr) {
 85         _current_entry = _current_entry->_next;
 86       }
 87       while (_current_entry == nullptr && ++_current_index < _table_size) {
 88         _current_entry = _table->_table[_current_index];
 89       }
 90     }
 91   };
 92 
 93   ZArenaHashtable(Arena* arena)
 94     : _arena(arena),
 95       _table() {
 96     Copy::zero_to_bytes(&_table, sizeof(_table));
 97   }
 98 
 99   void add(K key, V value) {
100     ZArenaHashtableEntry* entry = new (_arena) ZArenaHashtableEntry();
101     entry->_key = key;
102     entry->_value = value;
103     entry->_next = _table[key & _table_mask];
104     _table[key & _table_mask] = entry;
105   }
106 
107   V* get(K key) const {
108     for (ZArenaHashtableEntry* e = _table[key & _table_mask]; e != nullptr; e = e->_next) {
109       if (e->_key == key) {
110         return &(e->_value);
111       }
112     }
113     return nullptr;
114   }
115 
116   Iterator iterator() {
117     return Iterator(this);
118   }
119 };
120 
121 typedef ZArenaHashtable<intptr_t, bool, 4> ZOffsetTable;
122 
123 class ZBarrierSetC2State : public ArenaObj {
124 private:
125   GrowableArray<ZBarrierStubC2*>* _stubs;
126   Node_Array                      _live;
127   int                             _trampoline_stubs_count;
128   int                             _stubs_start_offset;
129 
130 public:
131   ZBarrierSetC2State(Arena* arena)
132     : _stubs(new (arena) GrowableArray<ZBarrierStubC2*>(arena, 8,  0, nullptr)),
133       _live(arena),
134       _trampoline_stubs_count(0),
135       _stubs_start_offset(0) {}
136 
137   GrowableArray<ZBarrierStubC2*>* stubs() {
138     return _stubs;
139   }
140 
141   RegMask* live(const Node* node) {
142     if (!node->is_Mach()) {
143       // Don't need liveness for non-MachNodes
144       return nullptr;
145     }
146 
147     const MachNode* const mach = node->as_Mach();
148     if (mach->barrier_data() == ZBarrierElided) {
149       // Don't need liveness data for nodes without barriers
150       return nullptr;
151     }
152 
153     RegMask* live = (RegMask*)_live[node->_idx];
154     if (live == nullptr) {
155       live = new (Compile::current()->comp_arena()->AmallocWords(sizeof(RegMask))) RegMask();
156       _live.map(node->_idx, (Node*)live);
157     }
158 
159     return live;
160   }
161 
162   void inc_trampoline_stubs_count() {
163     assert(_trampoline_stubs_count != INT_MAX, "Overflow");
164     ++_trampoline_stubs_count;
165   }
166 
167   int trampoline_stubs_count() {
168     return _trampoline_stubs_count;
169   }
170 
171   void set_stubs_start_offset(int offset) {
172     _stubs_start_offset = offset;
173   }
174 
175   int stubs_start_offset() {
176     return _stubs_start_offset;
177   }
178 };
179 
180 static ZBarrierSetC2State* barrier_set_state() {
181   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
182 }
183 
184 void ZBarrierStubC2::register_stub(ZBarrierStubC2* stub) {
185   if (!Compile::current()->output()->in_scratch_emit_size()) {
186     barrier_set_state()->stubs()->append(stub);
187   }
188 }
189 
190 void ZBarrierStubC2::inc_trampoline_stubs_count() {
191   if (!Compile::current()->output()->in_scratch_emit_size()) {
192     barrier_set_state()->inc_trampoline_stubs_count();
193   }
194 }
195 
196 int ZBarrierStubC2::trampoline_stubs_count() {
197   return barrier_set_state()->trampoline_stubs_count();
198 }
199 
200 int ZBarrierStubC2::stubs_start_offset() {
201   return barrier_set_state()->stubs_start_offset();
202 }
203 
204 ZBarrierStubC2::ZBarrierStubC2(const MachNode* node)
205   : _node(node),
206     _entry(),
207     _continuation() {}
208 
209 Register ZBarrierStubC2::result() const {
210   return noreg;
211 }
212 
213 RegMask& ZBarrierStubC2::live() const {
214   return *barrier_set_state()->live(_node);
215 }
216 
217 Label* ZBarrierStubC2::entry() {
218   // The _entry will never be bound when in_scratch_emit_size() is true.
219   // However, we still need to return a label that is not bound now, but
220   // will eventually be bound. Any eventually bound label will do, as it
221   // will only act as a placeholder, so we return the _continuation label.
222   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
223 }
224 
225 Label* ZBarrierStubC2::continuation() {
226   return &_continuation;
227 }
228 
229 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref) {
230   AARCH64_ONLY(fatal("Should use ZLoadBarrierStubC2Aarch64::create"));
231   ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref);
232   register_stub(stub);
233 
234   return stub;
235 }
236 
237 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref)
238   : ZBarrierStubC2(node),
239     _ref_addr(ref_addr),
240     _ref(ref) {
241   assert_different_registers(ref, ref_addr.base());
242   assert_different_registers(ref, ref_addr.index());
243 }
244 
245 Address ZLoadBarrierStubC2::ref_addr() const {
246   return _ref_addr;
247 }
248 
249 Register ZLoadBarrierStubC2::ref() const {
250   return _ref;
251 }
252 
253 Register ZLoadBarrierStubC2::result() const {
254   return ref();
255 }
256 
257 address ZLoadBarrierStubC2::slow_path() const {
258   const uint8_t barrier_data = _node->barrier_data();
259   DecoratorSet decorators = DECORATORS_NONE;
260   if (barrier_data & ZBarrierStrong) {
261     decorators |= ON_STRONG_OOP_REF;
262   }
263   if (barrier_data & ZBarrierWeak) {
264     decorators |= ON_WEAK_OOP_REF;
265   }
266   if (barrier_data & ZBarrierPhantom) {
267     decorators |= ON_PHANTOM_OOP_REF;
268   }
269   if (barrier_data & ZBarrierNoKeepalive) {
270     decorators |= AS_NO_KEEPALIVE;
271   }
272   return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
273 }
274 
275 void ZLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
276   ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, static_cast<ZLoadBarrierStubC2*>(this));
277 }
278 
279 ZStoreBarrierStubC2* ZStoreBarrierStubC2::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic) {
280   AARCH64_ONLY(fatal("Should use ZStoreBarrierStubC2Aarch64::create"));
281   ZStoreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic);
282   register_stub(stub);
283 
284   return stub;
285 }
286 
287 ZStoreBarrierStubC2::ZStoreBarrierStubC2(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic)
288   : ZBarrierStubC2(node),
289     _ref_addr(ref_addr),
290     _new_zaddress(new_zaddress),
291     _new_zpointer(new_zpointer),
292     _is_native(is_native),
293     _is_atomic(is_atomic) {}
294 
295 Address ZStoreBarrierStubC2::ref_addr() const {
296   return _ref_addr;
297 }
298 
299 Register ZStoreBarrierStubC2::new_zaddress() const {
300   return _new_zaddress;
301 }
302 
303 Register ZStoreBarrierStubC2::new_zpointer() const {
304   return _new_zpointer;
305 }
306 
307 bool ZStoreBarrierStubC2::is_native() const {
308   return _is_native;
309 }
310 
311 bool ZStoreBarrierStubC2::is_atomic() const {
312   return _is_atomic;
313 }
314 
315 Register ZStoreBarrierStubC2::result() const {
316   return noreg;
317 }
318 
319 void ZStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
320   ZBarrierSet::assembler()->generate_c2_store_barrier_stub(&masm, static_cast<ZStoreBarrierStubC2*>(this));
321 }
322 
323 uint ZBarrierSetC2::estimated_barrier_size(const Node* node) const {
324   uint8_t barrier_data = MemNode::barrier_data(node);
325   assert(barrier_data != 0, "should be a barrier node");
326   uint uncolor_or_color_size = node->is_Load() ? 1 : 2;
327   if ((barrier_data & ZBarrierElided) != 0) {
328     return uncolor_or_color_size;
329   }
330   // A compare and branch corresponds to approximately four fast-path Ideal
331   // nodes (Cmp, Bool, If, If projection). The slow path (If projection and
332   // runtime call) is excluded since the corresponding code is laid out
333   // separately and does not directly affect performance.
334   return uncolor_or_color_size + 4;
335 }
336 
337 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
338   return new (comp_arena) ZBarrierSetC2State(comp_arena);
339 }
340 
341 void ZBarrierSetC2::late_barrier_analysis() const {
342   compute_liveness_at_stubs();
343   analyze_dominating_barriers();
344 }
345 
346 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
347   MacroAssembler masm(&cb);
348   GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
349   barrier_set_state()->set_stubs_start_offset(masm.offset());
350 
351   for (int i = 0; i < stubs->length(); i++) {
352     // Make sure there is enough space in the code buffer
353     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
354       ciEnv::current()->record_failure("CodeCache is full");
355       return;
356     }
357 
358     stubs->at(i)->emit_code(masm);
359   }
360 
361   masm.flush();
362 }
363 
364 int ZBarrierSetC2::estimate_stub_size() const {
365   Compile* const C = Compile::current();
366   BufferBlob* const blob = C->output()->scratch_buffer_blob();
367   GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
368   int size = 0;
369 
370   for (int i = 0; i < stubs->length(); i++) {
371     CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
372     MacroAssembler masm(&cb);
373     stubs->at(i)->emit_code(masm);
374     size += cb.insts_size();
375   }
376 
377   return size;
378 }
379 
380 static void set_barrier_data(C2Access& access) {
381   if (!ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
382     return;
383   }
384 
385   if (access.decorators() & C2_TIGHTLY_COUPLED_ALLOC) {
386     access.set_barrier_data(ZBarrierElided);
387     return;
388   }
389 
390   uint8_t barrier_data = 0;
391 
392   if (access.decorators() & ON_PHANTOM_OOP_REF) {
393     barrier_data |= ZBarrierPhantom;
394   } else if (access.decorators() & ON_WEAK_OOP_REF) {
395     barrier_data |= ZBarrierWeak;
396   } else {
397     barrier_data |= ZBarrierStrong;
398   }
399 
400   if (access.decorators() & IN_NATIVE) {
401     barrier_data |= ZBarrierNative;
402   }
403 
404   if (access.decorators() & AS_NO_KEEPALIVE) {
405     barrier_data |= ZBarrierNoKeepalive;
406   }
407 
408   access.set_barrier_data(barrier_data);
409 }
410 
411 Node* ZBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
412   set_barrier_data(access);
413   return BarrierSetC2::store_at_resolved(access, val);
414 }
415 
416 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
417   set_barrier_data(access);
418   return BarrierSetC2::load_at_resolved(access, val_type);
419 }
420 
421 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
422                                                     Node* new_val, const Type* val_type) const {
423   set_barrier_data(access);
424   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
425 }
426 
427 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
428                                                      Node* new_val, const Type* value_type) const {
429   set_barrier_data(access);
430   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
431 }
432 
433 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
434   set_barrier_data(access);
435   return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
436 }
437 
438 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
439                                                     bool is_clone, bool is_clone_instance,
440                                                     ArrayCopyPhase phase) const {
441   if (phase == ArrayCopyPhase::Parsing) {
442     return false;
443   }
444   if (phase == ArrayCopyPhase::Optimization) {
445     return is_clone_instance;
446   }
447   // else ArrayCopyPhase::Expansion
448   return type == T_OBJECT || type == T_ARRAY;
449 }
450 
451 // This TypeFunc assumes a 64bit system
452 static const TypeFunc* clone_type() {
453   // Create input type (domain)
454   const Type** const domain_fields = TypeTuple::fields(4);
455   domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL;  // src
456   domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL;  // dst
457   domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG;        // size lower
458   domain_fields[TypeFunc::Parms + 3] = Type::HALF;            // size upper
459   const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
460 
461   // Create result type (range)
462   const Type** const range_fields = TypeTuple::fields(0);
463   const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
464 
465   return TypeFunc::make(domain, range);
466 }
467 
468 #define XTOP LP64_ONLY(COMMA phase->top())
469 
470 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
471   Node* const src = ac->in(ArrayCopyNode::Src);
472   const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
473 
474   if (ac->is_clone_array() && ary_ptr != nullptr) {
475     BasicType bt = ary_ptr->elem()->array_element_basic_type();
476     if (is_reference_type(bt)) {
477       // Clone object array
478       bt = T_OBJECT;
479     } else {
480       // Clone primitive array
481       bt = T_LONG;
482     }
483 
484     Node* const ctrl = ac->in(TypeFunc::Control);
485     Node* const mem = ac->in(TypeFunc::Memory);
486     Node* const src = ac->in(ArrayCopyNode::Src);
487     Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
488     Node* const dest = ac->in(ArrayCopyNode::Dest);
489     Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
490     Node* length = ac->in(ArrayCopyNode::Length);
491 
492     if (bt == T_OBJECT) {
493       // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset
494       // which 8-byte aligns them to allow for word size copies. Make sure the offsets point
495       // to the first element in the array when cloning object arrays. Otherwise, load
496       // barriers are applied to parts of the header. Also adjust the length accordingly.
497       assert(src_offset == dest_offset, "should be equal");
498       const jlong offset = src_offset->get_long();
499       if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
500         assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers");
501         assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
502         length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
503         src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
504         dest_offset = src_offset;
505       }
506     }
507     Node* const payload_src = phase->basic_plus_adr(src, src_offset);
508     Node* const payload_dst = phase->basic_plus_adr(dest, dest_offset);
509 
510     const char*   copyfunc_name = "arraycopy";
511     const address copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true);
512 
513     const TypePtr* const raw_adr_type = TypeRawPtr::BOTTOM;
514     const TypeFunc* const call_type = OptoRuntime::fast_arraycopy_Type();
515 
516     Node* const call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
517     phase->transform_later(call);
518 
519     phase->igvn().replace_node(ac, call);
520     return;
521   }
522 
523   // Clone instance
524   Node* const ctrl       = ac->in(TypeFunc::Control);
525   Node* const mem        = ac->in(TypeFunc::Memory);
526   Node* const dst        = ac->in(ArrayCopyNode::Dest);
527   Node* const size       = ac->in(ArrayCopyNode::Length);
528 
529   assert(size->bottom_type()->is_long(), "Should be long");
530 
531   // The native clone we are calling here expects the instance size in words
532   // Add header/offset size to payload size to get instance size.
533   Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
534   Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
535 
536   Node* const call = phase->make_leaf_call(ctrl,
537                                            mem,
538                                            clone_type(),
539                                            ZBarrierSetRuntime::clone_addr(),
540                                            "ZBarrierSetRuntime::clone",
541                                            TypeRawPtr::BOTTOM,
542                                            src,
543                                            dst,
544                                            full_size,
545                                            phase->top());
546   phase->transform_later(call);
547   phase->igvn().replace_node(ac, call);
548 }
549 
550 #undef XTOP
551 
552 // == Dominating barrier elision ==
553 
554 static bool block_has_safepoint(const Block* block, uint from, uint to) {
555   for (uint i = from; i < to; i++) {
556     if (block->get_node(i)->is_MachSafePoint()) {
557       // Safepoint found
558       return true;
559     }
560   }
561 
562   // Safepoint not found
563   return false;
564 }
565 
566 static bool block_has_safepoint(const Block* block) {
567   return block_has_safepoint(block, 0, block->number_of_nodes());
568 }
569 
570 static uint block_index(const Block* block, const Node* node) {
571   for (uint j = 0; j < block->number_of_nodes(); ++j) {
572     if (block->get_node(j) == node) {
573       return j;
574     }
575   }
576   ShouldNotReachHere();
577   return 0;
578 }
579 
580 // Look through various node aliases
581 static const Node* look_through_node(const Node* node) {
582   while (node != nullptr) {
583     const Node* new_node = node;
584     if (node->is_Mach()) {
585       const MachNode* const node_mach = node->as_Mach();
586       if (node_mach->ideal_Opcode() == Op_CheckCastPP) {
587         new_node = node->in(1);
588       }
589       if (node_mach->is_SpillCopy()) {
590         new_node = node->in(1);
591       }
592     }
593     if (new_node == node || new_node == nullptr) {
594       break;
595     } else {
596       node = new_node;
597     }
598   }
599 
600   return node;
601 }
602 
603 // Whether the given offset is undefined.
604 static bool is_undefined(intptr_t offset) {
605   return offset == Type::OffsetTop;
606 }
607 
608 // Whether the given offset is unknown.
609 static bool is_unknown(intptr_t offset) {
610   return offset == Type::OffsetBot;
611 }
612 
613 // Whether the given offset is concrete (defined and compile-time known).
614 static bool is_concrete(intptr_t offset) {
615   return !is_undefined(offset) && !is_unknown(offset);
616 }
617 
618 // Compute base + offset components of the memory address accessed by mach.
619 // Return a node representing the base address, or null if the base cannot be
620 // found or the offset is undefined or a concrete negative value. If a non-null
621 // base is returned, the offset is a concrete, nonnegative value or unknown.
622 static const Node* get_base_and_offset(const MachNode* mach, intptr_t& offset) {
623   const TypePtr* adr_type = nullptr;
624   offset = 0;
625   const Node* base = mach->get_base_and_disp(offset, adr_type);
626 
627   if (base == nullptr || base == NodeSentinel) {
628     return nullptr;
629   }
630 
631   if (offset == 0 && base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_AddP) {
632     // The memory address is computed by 'base' and fed to 'mach' via an
633     // indirect memory operand (indicated by offset == 0). The ultimate base and
634     // offset can be fetched directly from the inputs and Ideal type of 'base'.
635     offset = base->bottom_type()->isa_oopptr()->offset();
636     // Even if 'base' is not an Ideal AddP node anymore, Matcher::ReduceInst()
637     // guarantees that the base address is still available at the same slot.
638     base = base->in(AddPNode::Base);
639     assert(base != nullptr, "");
640   }
641 
642   if (is_undefined(offset) || (is_concrete(offset) && offset < 0)) {
643     return nullptr;
644   }
645 
646   return look_through_node(base);
647 }
648 
649 // Whether a phi node corresponds to an array allocation.
650 // This test is incomplete: in some edge cases, it might return false even
651 // though the node does correspond to an array allocation.
652 static bool is_array_allocation(const Node* phi) {
653   precond(phi->is_Phi());
654   // Check whether phi has a successor cast (CheckCastPP) to Java array pointer,
655   // possibly below spill copies and other cast nodes. Limit the exploration to
656   // a single path from the phi node consisting of these node types.
657   const Node* current = phi;
658   while (true) {
659     const Node* next = nullptr;
660     for (DUIterator_Fast imax, i = current->fast_outs(imax); i < imax; i++) {
661       if (!current->fast_out(i)->isa_Mach()) {
662         continue;
663       }
664       const MachNode* succ = current->fast_out(i)->as_Mach();
665       if (succ->ideal_Opcode() == Op_CheckCastPP) {
666         if (succ->get_ptr_type()->isa_aryptr()) {
667           // Cast to Java array pointer: phi corresponds to an array allocation.
668           return true;
669         }
670         // Other cast: record as candidate for further exploration.
671         next = succ;
672       } else if (succ->is_SpillCopy() && next == nullptr) {
673         // Spill copy, and no better candidate found: record as candidate.
674         next = succ;
675       }
676     }
677     if (next == nullptr) {
678       // No evidence found that phi corresponds to an array allocation, and no
679       // candidates available to continue exploring.
680       return false;
681     }
682     // Continue exploring from the best candidate found.
683     current = next;
684   }
685   ShouldNotReachHere();
686 }
687 
688 // Match the phi node that connects a TLAB allocation fast path with its slowpath
689 static bool is_allocation(const Node* node) {
690   if (node->req() != 3) {
691     return false;
692   }
693   const Node* const fast_node = node->in(2);
694   if (!fast_node->is_Mach()) {
695     return false;
696   }
697   const MachNode* const fast_mach = fast_node->as_Mach();
698   if (fast_mach->ideal_Opcode() != Op_LoadP) {
699     return false;
700   }
701   const TypePtr* const adr_type = nullptr;
702   intptr_t offset;
703   const Node* const base = get_base_and_offset(fast_mach, offset);
704   if (base == nullptr || !base->is_Mach() || !is_concrete(offset)) {
705     return false;
706   }
707   const MachNode* const base_mach = base->as_Mach();
708   if (base_mach->ideal_Opcode() != Op_ThreadLocal) {
709     return false;
710   }
711   return offset == in_bytes(Thread::tlab_top_offset());
712 }
713 
714 static void elide_mach_barrier(MachNode* mach) {
715   mach->set_barrier_data(ZBarrierElided);
716 }
717 
718 void ZBarrierSetC2::analyze_dominating_barriers_impl(Node_List& accesses, Node_List& access_dominators) const {
719   Compile* const C = Compile::current();
720   PhaseCFG* const cfg = C->cfg();
721 
722   for (uint i = 0; i < accesses.size(); i++) {
723     MachNode* const access = accesses.at(i)->as_Mach();
724     intptr_t access_offset;
725     const Node* const access_obj = get_base_and_offset(access, access_offset);
726     Block* const access_block = cfg->get_block_for_node(access);
727     const uint access_index = block_index(access_block, access);
728 
729     if (access_obj == nullptr) {
730       // No information available
731       continue;
732     }
733 
734     for (uint j = 0; j < access_dominators.size(); j++) {
735      const  Node* const mem = access_dominators.at(j);
736       if (mem->is_Phi()) {
737         // Allocation node
738         if (mem != access_obj) {
739           continue;
740         }
741         if (is_unknown(access_offset) && !is_array_allocation(mem)) {
742           // The accessed address has an unknown offset, but the allocated
743           // object cannot be determined to be an array. Avoid eliding in this
744           // case, to be on the safe side.
745           continue;
746         }
747         assert((is_concrete(access_offset) && access_offset >= 0) || (is_unknown(access_offset) && is_array_allocation(mem)),
748                "candidate allocation-dominated access offsets must be either concrete and nonnegative, or unknown (for array allocations only)");
749       } else {
750         // Access node
751         const MachNode* const mem_mach = mem->as_Mach();
752         intptr_t mem_offset;
753         const Node* const mem_obj = get_base_and_offset(mem_mach, mem_offset);
754 
755         if (mem_obj == nullptr ||
756             !is_concrete(access_offset) ||
757             !is_concrete(mem_offset)) {
758           // No information available
759           continue;
760         }
761 
762         if (mem_obj != access_obj || mem_offset != access_offset) {
763           // Not the same addresses, not a candidate
764           continue;
765         }
766         assert(is_concrete(access_offset) && access_offset >= 0,
767                "candidate non-allocation-dominated access offsets must be concrete and nonnegative");
768       }
769 
770       Block* mem_block = cfg->get_block_for_node(mem);
771       const uint mem_index = block_index(mem_block, mem);
772 
773       if (access_block == mem_block) {
774         // Earlier accesses in the same block
775         if (mem_index < access_index && !block_has_safepoint(mem_block, mem_index + 1, access_index)) {
776           elide_mach_barrier(access);
777         }
778       } else if (mem_block->dominates(access_block)) {
779         // Dominating block? Look around for safepoints
780         ResourceMark rm;
781         Block_List stack;
782         VectorSet visited;
783         stack.push(access_block);
784         bool safepoint_found = block_has_safepoint(access_block);
785         while (!safepoint_found && stack.size() > 0) {
786           const Block* const block = stack.pop();
787           if (visited.test_set(block->_pre_order)) {
788             continue;
789           }
790           if (block_has_safepoint(block)) {
791             safepoint_found = true;
792             break;
793           }
794           if (block == mem_block) {
795             continue;
796           }
797 
798           // Push predecessor blocks
799           for (uint p = 1; p < block->num_preds(); ++p) {
800             Block* const pred = cfg->get_block_for_node(block->pred(p));
801             stack.push(pred);
802           }
803         }
804 
805         if (!safepoint_found) {
806           elide_mach_barrier(access);
807         }
808       }
809     }
810   }
811 }
812 
813 void ZBarrierSetC2::analyze_dominating_barriers() const {
814   ResourceMark rm;
815   Compile* const C = Compile::current();
816   PhaseCFG* const cfg = C->cfg();
817 
818   Node_List loads;
819   Node_List load_dominators;
820 
821   Node_List stores;
822   Node_List store_dominators;
823 
824   Node_List atomics;
825   Node_List atomic_dominators;
826 
827   // Step 1 - Find accesses and allocations, and track them in lists
828   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
829     const Block* const block = cfg->get_block(i);
830     for (uint j = 0; j < block->number_of_nodes(); ++j) {
831       Node* const node = block->get_node(j);
832       if (node->is_Phi()) {
833         if (is_allocation(node)) {
834           load_dominators.push(node);
835           store_dominators.push(node);
836           // An allocation can't be considered to "dominate" an atomic operation.
837           // For example a CAS requires the memory location to be store-good.
838           // When you have a dominating store or atomic instruction, that is
839           // indeed ensured to be the case. However, as for allocations, the
840           // initialized memory location could be raw null, which isn't store-good.
841         }
842         continue;
843       } else if (!node->is_Mach()) {
844         continue;
845       }
846 
847       MachNode* const mach = node->as_Mach();
848       switch (mach->ideal_Opcode()) {
849       case Op_LoadP:
850         if ((mach->barrier_data() & ZBarrierStrong) != 0 &&
851             (mach->barrier_data() & ZBarrierNoKeepalive) == 0) {
852           loads.push(mach);
853           load_dominators.push(mach);
854         }
855         break;
856       case Op_StoreP:
857         if (mach->barrier_data() != 0) {
858           stores.push(mach);
859           load_dominators.push(mach);
860           store_dominators.push(mach);
861           atomic_dominators.push(mach);
862         }
863         break;
864       case Op_CompareAndExchangeP:
865       case Op_CompareAndSwapP:
866       case Op_GetAndSetP:
867         if (mach->barrier_data() != 0) {
868           atomics.push(mach);
869           load_dominators.push(mach);
870           store_dominators.push(mach);
871           atomic_dominators.push(mach);
872         }
873         break;
874 
875       default:
876         break;
877       }
878     }
879   }
880 
881   // Step 2 - Find dominating accesses or allocations for each access
882   analyze_dominating_barriers_impl(loads, load_dominators);
883   analyze_dominating_barriers_impl(stores, store_dominators);
884   analyze_dominating_barriers_impl(atomics, atomic_dominators);
885 }
886 
887 // == Reduced spilling optimization ==
888 
889 void ZBarrierSetC2::compute_liveness_at_stubs() const {
890   ResourceMark rm;
891   Compile* const C = Compile::current();
892   Arena* const A = Thread::current()->resource_area();
893   PhaseCFG* const cfg = C->cfg();
894   PhaseRegAlloc* const regalloc = C->regalloc();
895   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
896   ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
897   Block_List worklist;
898 
899   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
900     new ((void*)(live + i)) RegMask();
901     worklist.push(cfg->get_block(i));
902   }
903 
904   while (worklist.size() > 0) {
905     const Block* const block = worklist.pop();
906     RegMask& old_live = live[block->_pre_order];
907     RegMask new_live;
908 
909     // Initialize to union of successors
910     for (uint i = 0; i < block->_num_succs; i++) {
911       const uint succ_id = block->_succs[i]->_pre_order;
912       new_live.OR(live[succ_id]);
913     }
914 
915     // Walk block backwards, computing liveness
916     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
917       const Node* const node = block->get_node(i);
918 
919       // Remove def bits
920       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
921       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
922       if (first != OptoReg::Bad) {
923         new_live.Remove(first);
924       }
925       if (second != OptoReg::Bad) {
926         new_live.Remove(second);
927       }
928 
929       // Add use bits
930       for (uint j = 1; j < node->req(); ++j) {
931         const Node* const use = node->in(j);
932         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
933         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
934         if (first != OptoReg::Bad) {
935           new_live.Insert(first);
936         }
937         if (second != OptoReg::Bad) {
938           new_live.Insert(second);
939         }
940       }
941 
942       // If this node tracks liveness, update it
943       RegMask* const regs = barrier_set_state()->live(node);
944       if (regs != nullptr) {
945         regs->OR(new_live);
946       }
947     }
948 
949     // Now at block top, see if we have any changes
950     new_live.SUBTRACT(old_live);
951     if (new_live.is_NotEmpty()) {
952       // Liveness has refined, update and propagate to prior blocks
953       old_live.OR(new_live);
954       for (uint i = 1; i < block->num_preds(); ++i) {
955         Block* const pred = cfg->get_block_for_node(block->pred(i));
956         worklist.push(pred);
957       }
958     }
959   }
960 }
961 
962 void ZBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
963   eliminate_gc_barrier_data(node);
964 }
965 
966 void ZBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
967   if (node->is_LoadStore()) {
968     LoadStoreNode* loadstore = node->as_LoadStore();
969     loadstore->set_barrier_data(ZBarrierElided);
970   } else if (node->is_Mem()) {
971     MemNode* mem = node->as_Mem();
972     mem->set_barrier_data(ZBarrierElided);
973   }
974 }
975 
976 #ifndef PRODUCT
977 void ZBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
978   if ((mach->barrier_data() & ZBarrierStrong) != 0) {
979     st->print("strong ");
980   }
981   if ((mach->barrier_data() & ZBarrierWeak) != 0) {
982     st->print("weak ");
983   }
984   if ((mach->barrier_data() & ZBarrierPhantom) != 0) {
985     st->print("phantom ");
986   }
987   if ((mach->barrier_data() & ZBarrierNoKeepalive) != 0) {
988     st->print("nokeepalive ");
989   }
990   if ((mach->barrier_data() & ZBarrierNative) != 0) {
991     st->print("native ");
992   }
993   if ((mach->barrier_data() & ZBarrierElided) != 0) {
994     st->print("elided ");
995   }
996 }
997 #endif // !PRODUCT