1 /*
  2  * Copyright (c) 2018, 2023, Red Hat, Inc. All rights reserved.
  3  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "classfile/javaClasses.inline.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
 29 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 30 #include "gc/shenandoah/shenandoahForwarding.hpp"
 31 #include "gc/shenandoah/shenandoahHeap.hpp"
 32 #include "gc/shenandoah/shenandoahRuntime.hpp"
 33 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 34 #include "opto/arraycopynode.hpp"
 35 #include "opto/escape.hpp"
 36 #include "opto/graphKit.hpp"
 37 #include "opto/idealKit.hpp"
 38 #include "opto/macro.hpp"
 39 #include "opto/narrowptrnode.hpp"
 40 #include "opto/output.hpp"
 41 #include "opto/rootnode.hpp"
 42 #include "opto/runtime.hpp"
 43 
 44 ShenandoahBarrierSetC2* ShenandoahBarrierSetC2::bsc2() {
 45   return reinterpret_cast<ShenandoahBarrierSetC2*>(BarrierSet::barrier_set()->barrier_set_c2());
 46 }
 47 
 48 ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) :
 49     BarrierSetC2State(comp_arena),
 50     _stubs(new (comp_arena) GrowableArray<ShenandoahBarrierStubC2*>(comp_arena, 8,  0, nullptr)),
 51     _stubs_start_offset(0) {
 52 }
 53 
 54 static void set_barrier_data(C2Access& access, bool load, bool store) {
 55   if (!access.is_oop()) {
 56     return;
 57   }
 58 
 59   DecoratorSet decorators = access.decorators();
 60   bool tightly_coupled = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0;
 61   bool in_heap = (decorators & IN_HEAP) != 0;
 62   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 63   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 64 
 65   if (tightly_coupled) {
 66     access.set_barrier_data(ShenandoahBitElided);
 67     return;
 68   }
 69 
 70   uint8_t barrier_data = 0;
 71 
 72   if (load) {
 73     if (ShenandoahLoadRefBarrier) {
 74       if (on_phantom) {
 75         barrier_data |= ShenandoahBitPhantom;
 76       } else if (on_weak) {
 77         barrier_data |= ShenandoahBitWeak;
 78       } else {
 79         barrier_data |= ShenandoahBitStrong;
 80       }
 81     }
 82   }
 83 
 84   if (store) {
 85     if (ShenandoahSATBBarrier) {
 86       barrier_data |= ShenandoahBitKeepAlive;
 87     }
 88     if (ShenandoahCardBarrier && in_heap) {
 89       barrier_data |= ShenandoahBitCardMark;
 90     }
 91   }
 92 
 93   if (!in_heap) {
 94     barrier_data |= ShenandoahBitNative;
 95   }
 96 
 97   access.set_barrier_data(barrier_data);
 98 }
 99 
100 Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
101   // 1: Non-reference load, no additional barrier is needed
102   if (!access.is_oop()) {
103     return BarrierSetC2::load_at_resolved(access, val_type);
104   }
105 
106   // 2. Set barrier data for load
107   set_barrier_data(access, /* load = */ true, /* store = */ false);
108 
109   // 3. Correction: If we are reading the value of the referent field of
110   // a Reference object, we need to record the referent resurrection.
111   DecoratorSet decorators = access.decorators();
112   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
113   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
114   bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
115   bool needs_keepalive = ((on_weak || on_phantom) && !no_keepalive);
116   if (needs_keepalive) {
117     uint8_t barriers = access.barrier_data() | (ShenandoahSATBBarrier ? ShenandoahBitKeepAlive : 0);
118     access.set_barrier_data(barriers);
119   }
120 
121   return BarrierSetC2::load_at_resolved(access, val_type);
122 }
123 
124 Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
125   // 1: Non-reference store, no additional barrier is needed
126   if (!access.is_oop()) {
127     return BarrierSetC2::store_at_resolved(access, val);
128   }
129 
130   // 2. Set barrier data for store
131   set_barrier_data(access, /* load = */ false, /* store = */ true);
132 
133   // 3. Correction: avoid keep-alive barriers that should not do keep-alive.
134   DecoratorSet decorators = access.decorators();
135   bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
136   if (no_keepalive) {
137     access.set_barrier_data(access.barrier_data() & ~ShenandoahBitKeepAlive);
138   }
139 
140   return BarrierSetC2::store_at_resolved(access, val);
141 }
142 
143 Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
144                                                              Node* new_val, const Type* value_type) const {
145   set_barrier_data(access, /* load = */ true, /* store = */ true);
146   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
147 }
148 
149 Node* ShenandoahBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
150                                                               Node* new_val, const Type* value_type) const {
151   set_barrier_data(access, /* load = */ true, /* store = */ true);
152   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
153 }
154 
155 Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* val, const Type* value_type) const {
156   set_barrier_data(access, /* load = */ true, /* store = */ true);
157   return BarrierSetC2::atomic_xchg_at_resolved(access, val, value_type);
158 }
159 
160 void ShenandoahBarrierSetC2::refine_store(const Node* n) {
161   MemNode* store = n->as_Store();
162   const Node* newval = n->in(MemNode::ValueIn);
163   assert(newval != nullptr, "");
164   const Type* newval_bottom = newval->bottom_type();
165   TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr();
166   uint8_t barrier_data = store->barrier_data();
167   if (!newval_bottom->isa_oopptr() &&
168       !newval_bottom->isa_narrowoop() &&
169       newval_type != TypePtr::Null) {
170     // newval is neither an OOP nor null, so there is no barrier to refine.
171     assert(barrier_data == 0, "non-OOP stores should have no barrier data");
172     return;
173   }
174   if (barrier_data == 0) {
175     // No barrier to refine.
176     return;
177   }
178   if (newval_type == TypePtr::Null) {
179     barrier_data &= ~ShenandoahBitNotNull;
180     // Simply elide post-barrier if writing null.
181     barrier_data &= ~ShenandoahBitCardMark;
182   } else if (newval_type == TypePtr::NotNull) {
183     barrier_data |= ShenandoahBitNotNull;
184   }
185   store->set_barrier_data(barrier_data);
186 }
187 
188 bool ShenandoahBarrierSetC2::can_remove_load_barrier(Node* n) {
189   // Check if all outs feed into nodes that do not expose the oops to the rest
190   // of the runtime system. In this case, we can elide the LRB barrier. We bail
191   // out with false at the first sight of trouble.
192 
193   ResourceMark rm;
194   VectorSet visited;
195   Node_List worklist;
196   worklist.push(n);
197 
198   while (worklist.size() > 0) {
199     Node* n = worklist.pop();
200     if (visited.test_set(n->_idx)) {
201       continue;
202     }
203 
204     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
205       Node* out = n->fast_out(i);
206       switch (out->Opcode()) {
207         case Op_CmpN: {
208           if (out->in(1) == n &&
209               out->in(2)->Opcode() == Op_ConN &&
210               out->in(2)->get_narrowcon() == 0) {
211             // Null check, no oop is exposed.
212             break;
213           } else {
214             return false;
215           }
216         }
217         case Op_CmpP: {
218           if (out->in(1) == n &&
219               out->in(2)->Opcode() == Op_ConP &&
220               out->in(2)->get_ptr() == 0) {
221             // Null check, no oop is exposed.
222             break;
223           } else {
224             return false;
225           }
226         }
227         case Op_DecodeN:
228         case Op_CastPP: {
229           // Check if any other outs are escaping.
230           worklist.push(out);
231           break;
232         }
233         case Op_CallStaticJava: {
234           if (out->as_CallStaticJava()->is_uncommon_trap()) {
235             // Local feeds into uncommon trap. Deopt machinery handles barriers itself.
236             break;
237           } else {
238             return false;
239           }
240         }
241 
242         default: {
243           // Paranoidly distrust any other nodes.
244           // TODO: Check if there are other patterns that benefit from this elision.
245           return false;
246         }
247       }
248     }
249   }
250 
251   // Nothing troublesome found.
252   return true;
253 }
254 
255 void ShenandoahBarrierSetC2::refine_load(Node* n) {
256   MemNode* load = n->as_Load();
257 
258   uint8_t barrier_data = load->barrier_data();
259 
260   // Do not touch weak LRBs at all: they are responsible for shielding from
261   // Reference.referent resurrection.
262   if ((barrier_data & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0) {
263     return;
264   }
265 
266   if (can_remove_load_barrier(n)) {
267     barrier_data &= ~ShenandoahBitStrong;
268     barrier_data |= ShenandoahBitElided;
269   }
270 
271   load->set_barrier_data(barrier_data);
272 }
273 
274 void ShenandoahBarrierSetC2::final_refinement(Compile* C) const {
275   ResourceMark rm;
276   VectorSet visited;
277   Node_List worklist;
278   worklist.push(C->root());
279   while (worklist.size() > 0) {
280     Node* n = worklist.pop();
281     if (visited.test_set(n->_idx)) {
282       continue;
283     }
284 
285     // Do another pass to catch new opportunities after post-expansion optimizations.
286     switch(n->Opcode()) {
287       case Op_StoreP:
288       case Op_StoreN: {
289         refine_store(n);
290         break;
291       }
292       case Op_LoadN:
293       case Op_LoadP: {
294         refine_load(n);
295         break;
296       }
297     }
298 
299     // If there are no real barrier flags on the node, strip away additional fluff.
300     // Matcher does not care about this, and we would like to avoid invoking "barrier_data() != 0"
301     // rules when the only flags are the irrelevant fluff.
302     if (n->is_LoadStore()) {
303       LoadStoreNode* load_store = n->as_LoadStore();
304       uint8_t barrier_data = load_store->barrier_data();
305       if ((barrier_data & ShenandoahBitsReal) == 0) {
306         load_store->set_barrier_data(0);
307       }
308     } else if (n->is_Mem()) {
309       MemNode* mem = n->as_Mem();
310       uint8_t barrier_data = mem->barrier_data();
311       if ((barrier_data & ShenandoahBitsReal) == 0) {
312         mem->set_barrier_data(0);
313       }
314     }
315 
316     for (uint j = 0; j < n->req(); j++) {
317       Node* in = n->in(j);
318       if (in != nullptr) {
319         worklist.push(in);
320       }
321     }
322   }
323 }
324 
325 bool ShenandoahBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
326   ResourceMark rm;
327   VectorSet visited;
328   Node_List worklist;
329   worklist.push(C->root());
330   while (worklist.size() > 0) {
331     Node* n = worklist.pop();
332     if (visited.test_set(n->_idx)) {
333       continue;
334     }
335     switch(n->Opcode()) {
336       case Op_StoreP:
337       case Op_StoreN: {
338         refine_store(n);
339         break;
340       }
341       case Op_LoadN:
342       case Op_LoadP: {
343         refine_load(n);
344         break;
345       }
346     }
347 
348     for (uint j = 0; j < n->req(); j++) {
349       Node* in = n->in(j);
350       if (in != nullptr) {
351         worklist.push(in);
352       }
353     }
354   }
355   return false;
356 }
357 
358 // Support for macro expanded GC barriers
359 void ShenandoahBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
360   if (node->is_LoadStore()) {
361     LoadStoreNode* loadstore = node->as_LoadStore();
362     loadstore->set_barrier_data(0);
363   } else if (node->is_Mem()) {
364     MemNode* mem = node->as_Mem();
365     mem->set_barrier_data(0);
366   }
367 }
368 
369 void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
370   eliminate_gc_barrier_data(node);
371 }
372 
373 void ShenandoahBarrierSetC2::elide_dominated_barrier(MachNode* mach) const {
374   mach->set_barrier_data(0);
375 }
376 
377 void ShenandoahBarrierSetC2::analyze_dominating_barriers() const {
378   ResourceMark rm;
379   Compile* const C = Compile::current();
380   PhaseCFG* const cfg = C->cfg();
381 
382   Node_List loads, stores, atomics;
383   Node_List load_dominators, store_dominators, atomic_dominators;
384 
385   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
386     const Block* const block = cfg->get_block(i);
387     for (uint j = 0; j < block->number_of_nodes(); ++j) {
388       Node* const node = block->get_node(j);
389 
390       // Everything that happens in allocations does not need barriers.
391       if (node->is_Phi() && is_allocation(node)) {
392         load_dominators.push(node);
393         store_dominators.push(node);
394         atomic_dominators.push(node);
395         continue;
396       }
397 
398       if (!node->is_Mach()) {
399         continue;
400       }
401 
402       MachNode* const mach = node->as_Mach();
403       switch (mach->ideal_Opcode()) {
404 
405         // Dominating loads have already passed through LRB and their load
406         // locations got fixed. Subsequent barriers are no longer required.
407         // The only exception are weak loads that have to go through LRB
408         // to deal with dying referents.
409         case Op_LoadP:
410         case Op_LoadN: {
411           if ((mach->barrier_data() & ShenandoahBitStrong) != 0) {
412             loads.push(mach);
413             load_dominators.push(mach);
414           }
415           break;
416         }
417 
418         // Dominating stores have recorded the old value in SATB, and made the
419         // card table update for a location. Subsequent barriers are no longer
420         // required.
421         case Op_StoreP:
422         case Op_StoreN: {
423           if (mach->barrier_data() != 0) {
424             stores.push(mach);
425             load_dominators.push(mach);
426             store_dominators.push(mach);
427             atomic_dominators.push(mach);
428           }
429           break;
430         }
431 
432         // Dominating atomics have dealt with false positives, and made the card
433         // table updates for a location. Even though CAS barriers are conditional,
434         // they perform all needed barriers when memory access is successful.
435         // Therefore, subsequent barriers are no longer required.
436         case Op_CompareAndExchangeN:
437         case Op_CompareAndExchangeP:
438         case Op_CompareAndSwapN:
439         case Op_CompareAndSwapP:
440         case Op_GetAndSetP:
441         case Op_GetAndSetN: {
442           if (mach->barrier_data() != 0) {
443             atomics.push(mach);
444             load_dominators.push(mach);
445             store_dominators.push(mach);
446             atomic_dominators.push(mach);
447           }
448           break;
449         }
450 
451       default:
452         break;
453       }
454     }
455   }
456 
457   elide_dominated_barriers(loads, load_dominators);
458   elide_dominated_barriers(stores, store_dominators);
459   elide_dominated_barriers(atomics, atomic_dominators);
460 }
461 
462 uint ShenandoahBarrierSetC2::estimated_barrier_size(const Node* node) const {
463   uint8_t bd = MemNode::barrier_data(node);
464   assert(bd != 0, "Checked by caller");
465   if ((bd & ShenandoahBitElided) != 0) {
466     return 0;
467   }
468   // GC state check is ~4 fast-path nodes: Cmp, Bool, If, If-Proj.
469   return 4;
470 }
471 
472 bool ShenandoahBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const {
473   bool is_oop = is_reference_type(type);
474   if (!is_oop) {
475     return false;
476   }
477   if (ShenandoahSATBBarrier && tightly_coupled_alloc) {
478     if (phase == Optimization) {
479       return false;
480     }
481     return !is_clone;
482   }
483   return true;
484 }
485 
486 bool ShenandoahBarrierSetC2::clone_needs_barrier(const TypeOopPtr* src_type, bool& is_oop_array) {
487   if (!ShenandoahCloneBarrier) {
488     return false;
489   }
490 
491   if (src_type->isa_instptr() != nullptr) {
492     // Instance: need barrier only if there is a possibility of having an oop anywhere in it.
493     ciInstanceKlass* ik = src_type->is_instptr()->instance_klass();
494     if ((src_type->klass_is_exact() || !ik->has_subklass()) &&
495         !ik->has_injected_fields() && !ik->has_object_fields()) {
496       if (!src_type->klass_is_exact()) {
497         // Class is *currently* the leaf in the hierarchy.
498         // Record the dependency so that we deopt if this does not hold in future.
499         Compile::current()->dependencies()->assert_leaf_type(ik);
500       }
501       return false;
502     }
503   } else if (src_type->isa_aryptr() != nullptr) {
504     // Array: need barrier only if array is oop-bearing.
505     BasicType src_elem = src_type->isa_aryptr()->elem()->array_element_basic_type();
506     if (is_reference_type(src_elem, true)) {
507       is_oop_array = true;
508     } else {
509       return false;
510     }
511   }
512 
513   // Assume the worst.
514   return true;
515 }
516 
517 void ShenandoahBarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
518   const TypeOopPtr* src_type = kit->gvn().type(src_base)->is_oopptr();
519 
520   bool is_oop_array = false;
521   if (!clone_needs_barrier(src_type, is_oop_array)) {
522     // No barrier is needed? Just do what common BarrierSetC2 wants with it.
523     BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
524     return;
525   }
526 
527   if (ShenandoahCloneRuntime || !is_array || !is_oop_array) {
528     // Looks like an instance? Prepare the instance clone. This would either
529     // be exploded into individual accesses or be left as runtime call.
530     // Common BarrierSetC2 prepares everything for both cases.
531     BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
532     return;
533   }
534 
535   // We are cloning the oop array. Prepare to call the normal arraycopy stub
536   // after the expansion. Normal stub takes the number of actual type-sized
537   // elements to copy after the base, compute the count here.
538   Node* offset = kit->MakeConX(arrayOopDesc::base_offset_in_bytes(UseCompressedOops ? T_NARROWOOP : T_OBJECT));
539   size = kit->gvn().transform(new SubXNode(size, offset));
540   size = kit->gvn().transform(new URShiftXNode(size, kit->intcon(LogBytesPerHeapOop)));
541   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, size, true, false);
542   ac->set_clone_array();
543   Node* n = kit->gvn().transform(ac);
544   if (n == ac) {
545     ac->set_adr_type(TypeRawPtr::BOTTOM);
546     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), TypeRawPtr::BOTTOM);
547   } else {
548     kit->set_all_memory(n);
549   }
550 }
551 
552 void ShenandoahBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
553   Node* const ctrl        = ac->in(TypeFunc::Control);
554   Node* const mem         = ac->in(TypeFunc::Memory);
555   Node* const src         = ac->in(ArrayCopyNode::Src);
556   Node* const src_offset  = ac->in(ArrayCopyNode::SrcPos);
557   Node* const dest        = ac->in(ArrayCopyNode::Dest);
558   Node* const dest_offset = ac->in(ArrayCopyNode::DestPos);
559   Node* length            = ac->in(ArrayCopyNode::Length);
560 
561   const TypeOopPtr* src_type = phase->igvn().type(src)->is_oopptr();
562 
563   bool is_oop_array = false;
564   if (!clone_needs_barrier(src_type, is_oop_array)) {
565     // No barrier is needed? Expand to normal HeapWord-sized arraycopy.
566     BarrierSetC2::clone_at_expansion(phase, ac);
567     return;
568   }
569 
570   if (ShenandoahCloneRuntime || !ac->is_clone_array() || !is_oop_array) {
571     // Still looks like an instance? Likely a large instance or reflective
572     // clone with unknown length. Go to runtime and handle it there.
573     clone_in_runtime(phase, ac, CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_addr()), "ShenandoahRuntime::clone");
574     return;
575   }
576 
577   // We are cloning the oop array. Call into normal oop array copy stubs.
578   // Those stubs would call BarrierSetAssembler to handle GC barriers.
579 
580   // This is the full clone, so offsets should equal each other and be at array base.
581   assert(src_offset == dest_offset, "should be equal");
582   const jlong offset = src_offset->get_long();
583   const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
584   BasicType bt = ary_ptr->elem()->array_element_basic_type();
585   assert(offset == arrayOopDesc::base_offset_in_bytes(bt), "should match");
586 
587   const char*   copyfunc_name = "arraycopy";
588   const address copyfunc_addr = phase->basictype2arraycopy(T_OBJECT, nullptr, nullptr, true, copyfunc_name, true);
589 
590   Node* const call = phase->make_leaf_call(ctrl, mem,
591       OptoRuntime::fast_arraycopy_Type(),
592       copyfunc_addr, copyfunc_name,
593       TypeRawPtr::BOTTOM,
594       phase->basic_plus_adr(src, src_offset),
595       phase->basic_plus_adr(dest, dest_offset),
596       length,
597       phase->top()
598   );
599   phase->transform_later(call);
600 
601   phase->igvn().replace_node(ac, call);
602 }
603 
604 void* ShenandoahBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
605   return new(comp_arena) ShenandoahBarrierSetC2State(comp_arena);
606 }
607 
608 ShenandoahBarrierSetC2State* ShenandoahBarrierSetC2::state() const {
609   return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
610 }
611 
612 void ShenandoahBarrierSetC2::print_barrier_data(outputStream* os, uint8_t data) {
613   os->print(" Node barriers: ");
614   if ((data & ShenandoahBitStrong) != 0) {
615     data &= ~ShenandoahBitStrong;
616     os->print("strong ");
617   }
618 
619   if ((data & ShenandoahBitWeak) != 0) {
620     data &= ~ShenandoahBitWeak;
621     os->print("weak ");
622   }
623 
624   if ((data & ShenandoahBitPhantom) != 0) {
625     data &= ~ShenandoahBitPhantom;
626     os->print("phantom ");
627   }
628 
629   if ((data & ShenandoahBitElided) != 0) {
630     data &= ~ShenandoahBitElided;
631     os->print("elided ");
632   }
633 
634   if ((data & ShenandoahBitKeepAlive) != 0) {
635     data &= ~ShenandoahBitKeepAlive;
636     os->print("keepalive ");
637   }
638 
639   if ((data & ShenandoahBitCardMark) != 0) {
640     data &= ~ShenandoahBitCardMark;
641     os->print("cardmark ");
642   }
643 
644   if ((data & ShenandoahBitNotNull) != 0) {
645     data &= ~ShenandoahBitNotNull;
646     os->print("not-null ");
647   }
648   os->cr();
649 
650   if (data > 0) {
651     fatal("Unknown bit!");
652   }
653 
654   os->print_cr(" GC configuration: %sLRB %sSATB %sCAS %sClone %sCard",
655     (ShenandoahLoadRefBarrier ? "+" : "-"),
656     (ShenandoahSATBBarrier    ? "+" : "-"),
657     (ShenandoahCASBarrier     ? "+" : "-"),
658     (ShenandoahCloneBarrier   ? "+" : "-"),
659     (ShenandoahCardBarrier    ? "+" : "-")
660   );
661 }
662 
663 #ifdef ASSERT
664 void ShenandoahBarrierSetC2::verify_gc_barrier_assert(bool cond, const char* msg, uint8_t bd, Node* n) {
665   if (!cond) {
666     stringStream ss;
667     ss.print_cr("%s", msg);
668     ss.print_cr("-----------------");
669     print_barrier_data(&ss, bd);
670     ss.print_cr("-----------------");
671     n->dump_bfs(1, nullptr, "", &ss);
672     report_vm_error(__FILE__, __LINE__, ss.as_string());
673   }
674 }
675 
676 void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
677   if (!ShenandoahVerifyOptoBarriers) {
678     return;
679   }
680 
681   // Final refinement might have removed the remaining auxiliary flags, making some accesses completely blank.
682   bool accept_blank = (phase == BeforeCodeGen);
683   bool expect_load_barriers       = !accept_blank && ShenandoahLoadRefBarrier;
684   bool expect_store_barriers      = !accept_blank && (ShenandoahSATBBarrier || ShenandoahCardBarrier);
685   bool expect_load_store_barriers = !accept_blank && ShenandoahCASBarrier;
686 
687   Unique_Node_List wq;
688   Node_Stack phis(0);
689   VectorSet visited;
690 
691   wq.push(compile->root());
692   for (uint next = 0; next < wq.size(); next++) {
693     Node *n = wq.at(next);
694     int opc = n->Opcode();
695 
696     if (opc == Op_LoadP || opc == Op_LoadN) {
697       uint8_t bd = n->as_Load()->barrier_data();
698 
699       const TypePtr* adr_type = n->as_Load()->adr_type();
700       if (adr_type->isa_oopptr() || adr_type->isa_narrowoop()) {
701         verify_gc_barrier_assert(!expect_load_barriers || (bd != 0), "Oop load should have barrier data", bd, n);
702 
703         bool is_weak = ((bd & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0);
704         bool is_referent = adr_type->isa_instptr() &&
705             adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
706             adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset();
707 
708         verify_gc_barrier_assert(!is_weak || is_referent, "Weak load only for Reference.referent", bd, n);
709       } else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
710         // Some LoadP-s are used for T_ADDRESS loads from raw pointers. These are not oops.
711         // Some LoadP-s are used to load class data.
712         // TODO: Verify their barrier data.
713       } else {
714         verify_gc_barrier_assert(false, "Unclassified access type", bd, n);
715       }
716     } else if (opc == Op_StoreP || opc == Op_StoreN) {
717       uint8_t bd = n->as_Store()->barrier_data();
718       const TypePtr* adr_type = n->as_Store()->adr_type();
719       if (adr_type->isa_oopptr() || adr_type->isa_narrowoop()) {
720         // Reference.clear stores null
721         bool is_referent = adr_type->isa_instptr() &&
722              adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
723              adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset();
724 
725         const TypePtr* val_type = n->as_Store()->in(MemNode::Memory)->adr_type();
726         if (!is_referent && (val_type->isa_oopptr() || val_type->isa_narrowoop())) {
727           verify_gc_barrier_assert(!expect_store_barriers || (bd != 0), "Oop store should have barrier data", bd, n);
728         }
729       } else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
730         // Similar to LoadP-s, some of these accesses are raw, and some are handling oops.
731         // TODO: Verify their barrier data.
732       } else {
733         verify_gc_barrier_assert(false, "Unclassified access type", bd, n);
734       }
735     } else if (opc == Op_WeakCompareAndSwapP || opc == Op_WeakCompareAndSwapN ||
736                opc == Op_CompareAndExchangeP || opc == Op_CompareAndExchangeN ||
737                opc == Op_CompareAndSwapP     || opc == Op_CompareAndSwapN ||
738                opc == Op_GetAndSetP          || opc == Op_GetAndSetN) {
739       uint8_t bd = n->as_LoadStore()->barrier_data();
740       verify_gc_barrier_assert(!expect_load_store_barriers || (bd != 0), "Oop load-store should have barrier data", bd, n);
741     } else if (n->is_Mem()) {
742       uint8_t bd = MemNode::barrier_data(n); // FIXME: LOL HotSpot, why not n->as_Mem()? LoadStore is both is_Mem() and not as_Mem().
743       verify_gc_barrier_assert(bd == 0, "Other mem nodes should have no barrier data", bd, n);
744     }
745 
746     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
747       Node* m = n->fast_out(i);
748       wq.push(m);
749     }
750   }
751 }
752 #endif
753 
754 static ShenandoahBarrierSetC2State* barrier_set_state() {
755   return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
756 }
757 
758 int ShenandoahBarrierSetC2::estimate_stub_size() const {
759   GrowableArray<ShenandoahBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
760   assert(stubs->is_empty(), "Lifecycle: no stubs were yet created");
761   return 0;
762 }
763 
764 void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
765   MacroAssembler masm(&cb);
766   GrowableArray<ShenandoahBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
767   barrier_set_state()->set_stubs_start_offset(masm.offset());
768 
769   // Stub generation uses nested skipped counters that can double-count.
770   // Calculate the actual skipped amount by the real PC before/after stub generation.
771   // FIXME: This should be handled upstream.
772   int offset_before = masm.offset();
773   int skipped_before = masm.get_skipped();
774 
775   for (int i = 0; i < stubs->length(); i++) {
776     // Make sure there is enough space in the code buffer
777     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
778       ciEnv::current()->record_failure("CodeCache is full");
779       return;
780     }
781 
782     stubs->at(i)->emit_code(masm);
783   }
784 
785   int offset_after = masm.offset();
786 
787   // The real stubs section is coming up after this, so we have to account for alignment
788   // padding there. See CodeSection::alignment().
789   offset_after = align_up(offset_after, HeapWordSize);
790 
791   masm.set_skipped(skipped_before + (offset_after - offset_before));
792 
793   masm.flush();
794 }
795 
796 void ShenandoahBarrierStubC2::register_stub() {
797   if (!Compile::current()->output()->in_scratch_emit_size()) {
798     barrier_set_state()->stubs()->append(this);
799   }
800 }
801 
802 ShenandoahLoadBarrierStubC2* ShenandoahLoadBarrierStubC2::create(const MachNode* node, Register dst, Address src) {
803   auto* stub = new (Compile::current()->comp_arena()) ShenandoahLoadBarrierStubC2(node, dst, noreg, src);
804   stub->register_stub();
805   return stub;
806 }
807 
808 ShenandoahLoadBarrierStubC2* ShenandoahLoadBarrierStubC2::create(const MachNode* node, Register dst, Register addr) {
809   auto* stub = new (Compile::current()->comp_arena()) ShenandoahLoadBarrierStubC2(node, dst, addr, Address());
810   stub->register_stub();
811   return stub;
812 }
813 
814 ShenandoahStoreBarrierStubC2* ShenandoahStoreBarrierStubC2::create(const MachNode* node, Address dst, bool dst_narrow, Register src, bool src_narrow, Register tmp) {
815   auto* stub = new (Compile::current()->comp_arena()) ShenandoahStoreBarrierStubC2(node, noreg, dst, dst_narrow, src, src_narrow, tmp);
816   stub->register_stub();
817   return stub;
818 }
819 
820 ShenandoahStoreBarrierStubC2* ShenandoahStoreBarrierStubC2::create(const MachNode* node, Register addr, bool dst_narrow) {
821   auto* stub = new (Compile::current()->comp_arena()) ShenandoahStoreBarrierStubC2(node, addr, Address(), dst_narrow, noreg, false, noreg);
822   stub->register_stub();
823   return stub;
824 }
825 
826 ShenandoahCASBarrierStubC2* ShenandoahCASBarrierStubC2::create(const MachNode* node, Register addr, Register expected, Register new_val, Register result, bool narrow, bool cae, bool maybe_null, bool acquire, bool release, bool weak) {
827   auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierStubC2(node, addr, Address(), expected, new_val, result, noreg, noreg, narrow, cae, maybe_null, acquire, release, weak);
828   stub->register_stub();
829   return stub;
830 }
831 
832 ShenandoahCASBarrierStubC2* ShenandoahCASBarrierStubC2::create(const MachNode* node, Address addr, Register expected, Register new_val, Register result, Register tmp1, Register tmp2, bool narrow, bool cae) {
833   auto* stub = new (Compile::current()->comp_arena()) ShenandoahCASBarrierStubC2(node, noreg, addr, expected, new_val, result, tmp1, tmp2, narrow, cae, true, false, false, false);
834   stub->register_stub();
835   return stub;
836 }
837 
838 bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) const {
839   // Must ask all stubs!
840   return ShenandoahLoadBarrierStubC2::needs_barrier(mach) ||
841          ShenandoahStoreBarrierStubC2::needs_barrier(mach) ||
842          ShenandoahCASBarrierStubC2::needs_barrier(mach);
843 }
844 
845 bool ShenandoahBarrierSetC2State::needs_livein_data() const {
846   return true;
847 }