1 /*
  2  * Copyright (c) 2018, 2023, Red Hat, Inc. All rights reserved.
  3  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "classfile/javaClasses.inline.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
 29 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 30 #include "gc/shenandoah/shenandoahForwarding.hpp"
 31 #include "gc/shenandoah/shenandoahHeap.hpp"
 32 #include "gc/shenandoah/shenandoahRuntime.hpp"
 33 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 34 #include "opto/arraycopynode.hpp"
 35 #include "opto/escape.hpp"
 36 #include "opto/graphKit.hpp"
 37 #include "opto/idealKit.hpp"
 38 #include "opto/macro.hpp"
 39 #include "opto/narrowptrnode.hpp"
 40 #include "opto/output.hpp"
 41 #include "opto/rootnode.hpp"
 42 #include "opto/runtime.hpp"
 43 
 44 ShenandoahBarrierSetC2* ShenandoahBarrierSetC2::bsc2() {
 45   return reinterpret_cast<ShenandoahBarrierSetC2*>(BarrierSet::barrier_set()->barrier_set_c2());
 46 }
 47 
 48 ShenandoahBarrierSetC2State::ShenandoahBarrierSetC2State(Arena* comp_arena) :
 49     BarrierSetC2State(comp_arena),
 50     _stubs(new (comp_arena) GrowableArray<ShenandoahBarrierStubC2*>(comp_arena, 8,  0, nullptr)),
 51     _trampoline_stubs_count(0),
 52     _stubs_start_offset(0) {
 53 }
 54 
 55 static void set_barrier_data(C2Access& access, bool load, bool store) {
 56   if (!access.is_oop()) {
 57     return;
 58   }
 59 
 60   DecoratorSet decorators = access.decorators();
 61   bool tightly_coupled = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0;
 62   bool in_heap = (decorators & IN_HEAP) != 0;
 63   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 64   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 65 
 66   if (tightly_coupled) {
 67     access.set_barrier_data(ShenandoahBitElided);
 68     return;
 69   }
 70 
 71   uint8_t barrier_data = 0;
 72 
 73   if (load) {
 74     if (ShenandoahLoadRefBarrier) {
 75       if (on_phantom) {
 76         barrier_data |= ShenandoahBitPhantom;
 77       } else if (on_weak) {
 78         barrier_data |= ShenandoahBitWeak;
 79       } else {
 80         barrier_data |= ShenandoahBitStrong;
 81       }
 82     }
 83   }
 84 
 85   if (store) {
 86     if (ShenandoahSATBBarrier) {
 87       barrier_data |= ShenandoahBitKeepAlive;
 88     }
 89     if (ShenandoahCardBarrier && in_heap) {
 90       barrier_data |= ShenandoahBitCardMark;
 91     }
 92   }
 93 
 94   if (!in_heap) {
 95     barrier_data |= ShenandoahBitNative;
 96   }
 97 
 98   access.set_barrier_data(barrier_data);
 99 }
100 
101 Node* ShenandoahBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
102   // 1: Non-reference load, no additional barrier is needed
103   if (!access.is_oop()) {
104     return BarrierSetC2::load_at_resolved(access, val_type);
105   }
106 
107   // 2. Set barrier data for load
108   set_barrier_data(access, /* load = */ true, /* store = */ false);
109 
110   // 3. Correction: If we are reading the value of the referent field of
111   // a Reference object, we need to record the referent resurrection.
112   DecoratorSet decorators = access.decorators();
113   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
114   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
115   bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
116   bool needs_keepalive = ((on_weak || on_phantom) && !no_keepalive);
117   if (needs_keepalive) {
118     uint8_t barriers = access.barrier_data() | (ShenandoahSATBBarrier ? ShenandoahBitKeepAlive : 0);
119     access.set_barrier_data(barriers);
120   }
121 
122   return BarrierSetC2::load_at_resolved(access, val_type);
123 }
124 
125 Node* ShenandoahBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
126   // 1: Non-reference store, no additional barrier is needed
127   if (!access.is_oop()) {
128     return BarrierSetC2::store_at_resolved(access, val);
129   }
130 
131   // 2. Set barrier data for store
132   set_barrier_data(access, /* load = */ false, /* store = */ true);
133 
134   // 3. Correction: avoid keep-alive barriers that should not do keep-alive.
135   DecoratorSet decorators = access.decorators();
136   bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0;
137   if (no_keepalive) {
138     access.set_barrier_data(access.barrier_data() & ~ShenandoahBitKeepAlive);
139   }
140 
141   return BarrierSetC2::store_at_resolved(access, val);
142 }
143 
144 Node* ShenandoahBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
145                                                              Node* new_val, const Type* value_type) const {
146   set_barrier_data(access, /* load = */ true, /* store = */ true);
147   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
148 }
149 
150 Node* ShenandoahBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
151                                                               Node* new_val, const Type* value_type) const {
152   set_barrier_data(access, /* load = */ true, /* store = */ true);
153   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
154 }
155 
156 Node* ShenandoahBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* val, const Type* value_type) const {
157   set_barrier_data(access, /* load = */ true, /* store = */ true);
158   return BarrierSetC2::atomic_xchg_at_resolved(access, val, value_type);
159 }
160 
161 void ShenandoahBarrierSetC2::refine_store(const Node* n) {
162   MemNode* store = n->as_Store();
163   const Node* newval = n->in(MemNode::ValueIn);
164   assert(newval != nullptr, "");
165   const Type* newval_bottom = newval->bottom_type();
166   TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr();
167   uint8_t barrier_data = store->barrier_data();
168   if (!newval_bottom->isa_oopptr() &&
169       !newval_bottom->isa_narrowoop() &&
170       newval_type != TypePtr::Null) {
171     // newval is neither an OOP nor null, so there is no barrier to refine.
172     assert(barrier_data == 0, "non-OOP stores should have no barrier data");
173     return;
174   }
175   if (barrier_data == 0) {
176     // No barrier to refine.
177     return;
178   }
179   if (newval_type == TypePtr::Null) {
180     barrier_data &= ~ShenandoahBitNotNull;
181     // Simply elide post-barrier if writing null.
182     barrier_data &= ~ShenandoahBitCardMark;
183   } else if (newval_type == TypePtr::NotNull) {
184     barrier_data |= ShenandoahBitNotNull;
185   }
186   store->set_barrier_data(barrier_data);
187 }
188 
189 bool ShenandoahBarrierSetC2::can_remove_load_barrier(Node* n) {
190   // Check if all outs feed into nodes that do not expose the oops to the rest
191   // of the runtime system. In this case, we can elide the LRB barrier. We bail
192   // out with false at the first sight of trouble.
193 
194   ResourceMark rm;
195   VectorSet visited;
196   Node_List worklist;
197   worklist.push(n);
198 
199   while (worklist.size() > 0) {
200     Node* n = worklist.pop();
201     if (visited.test_set(n->_idx)) {
202       continue;
203     }
204 
205     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
206       Node* out = n->fast_out(i);
207       switch (out->Opcode()) {
208         case Op_CmpN: {
209           if (out->in(1) == n &&
210               out->in(2)->Opcode() == Op_ConN &&
211               out->in(2)->get_narrowcon() == 0) {
212             // Null check, no oop is exposed.
213             break;
214           } else {
215             return false;
216           }
217         }
218         case Op_CmpP: {
219           if (out->in(1) == n &&
220               out->in(2)->Opcode() == Op_ConP &&
221               out->in(2)->get_ptr() == 0) {
222             // Null check, no oop is exposed.
223             break;
224           } else {
225             return false;
226           }
227         }
228         case Op_DecodeN:
229         case Op_CastPP: {
230           // Check if any other outs are escaping.
231           worklist.push(out);
232           break;
233         }
234         case Op_CallStaticJava: {
235           if (out->as_CallStaticJava()->is_uncommon_trap()) {
236             // Local feeds into uncommon trap. Deopt machinery handles barriers itself.
237             break;
238           } else {
239             return false;
240           }
241         }
242 
243         default: {
244           // Paranoidly distrust any other nodes.
245           // TODO: Check if there are other patterns that benefit from this elision.
246           return false;
247         }
248       }
249     }
250   }
251 
252   // Nothing troublesome found.
253   return true;
254 }
255 
256 void ShenandoahBarrierSetC2::refine_load(Node* n) {
257   MemNode* load = n->as_Load();
258 
259   uint8_t barrier_data = load->barrier_data();
260 
261   // Do not touch weak LRBs at all: they are responsible for shielding from
262   // Reference.referent resurrection.
263   if ((barrier_data & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0) {
264     return;
265   }
266 
267   if (can_remove_load_barrier(n)) {
268     barrier_data &= ~ShenandoahBitStrong;
269     barrier_data |= ShenandoahBitElided;
270   }
271 
272   load->set_barrier_data(barrier_data);
273 }
274 
275 bool ShenandoahBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
276   ResourceMark rm;
277   VectorSet visited;
278   Node_List worklist;
279   worklist.push(C->root());
280   while (worklist.size() > 0) {
281     Node* n = worklist.pop();
282     if (visited.test_set(n->_idx)) {
283       continue;
284     }
285     switch(n->Opcode()) {
286       case Op_StoreP:
287       case Op_StoreN: {
288         refine_store(n);
289         break;
290       }
291       case Op_LoadN:
292       case Op_LoadP: {
293         refine_load(n);
294         break;
295       }
296     }
297 
298     for (uint j = 0; j < n->req(); j++) {
299       Node* in = n->in(j);
300       if (in != nullptr) {
301         worklist.push(in);
302       }
303     }
304   }
305   return false;
306 }
307 
308 // Support for macro expanded GC barriers
309 void ShenandoahBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
310   if (node->is_LoadStore()) {
311     LoadStoreNode* loadstore = node->as_LoadStore();
312     loadstore->set_barrier_data(0);
313   } else if (node->is_Mem()) {
314     MemNode* mem = node->as_Mem();
315     mem->set_barrier_data(0);
316   }
317 }
318 
319 // If there are no real barrier flags on the node, strip away additional fluff.
320 // Matcher does not care about this, and we would like to avoid invoking "barrier_data() != 0"
321 // rules when the only flags are the irrelevant fluff.
322 void ShenandoahBarrierSetC2::strip_extra_data(const Node* n) const {
323   if (n->is_LoadStore()) {
324     LoadStoreNode* load_store = n->as_LoadStore();
325     uint8_t barrier_data = load_store->barrier_data();
326     if ((barrier_data & ShenandoahBitsReal) == 0) {
327       load_store->set_barrier_data(0);
328     }
329   } else if (n->is_Mem()) {
330     MemNode* mem = n->as_Mem();
331     uint8_t barrier_data = mem->barrier_data();
332     if ((barrier_data & ShenandoahBitsReal) == 0) {
333       mem->set_barrier_data(0);
334     }
335   }
336 }
337 
338 void ShenandoahBarrierSetC2::strip_extra_data(Node_List& accesses) const {
339   for (uint c = 0; c < accesses.size(); c++) {
340     strip_extra_data(accesses.at(c));
341   }
342 }
343 
344 void ShenandoahBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
345   eliminate_gc_barrier_data(node);
346 }
347 
348 void ShenandoahBarrierSetC2::elide_dominated_barrier(MachNode* mach) const {
349   mach->set_barrier_data(0);
350 }
351 
352 void ShenandoahBarrierSetC2::analyze_dominating_barriers() const {
353   ResourceMark rm;
354   Compile* const C = Compile::current();
355   PhaseCFG* const cfg = C->cfg();
356 
357   Node_List all_loads, loads, stores, atomics;
358   Node_List load_dominators, store_dominators, atomic_dominators;
359 
360   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
361     const Block* const block = cfg->get_block(i);
362     for (uint j = 0; j < block->number_of_nodes(); ++j) {
363       Node* const node = block->get_node(j);
364 
365       // Everything that happens in allocations does not need barriers.
366       if (node->is_Phi() && is_allocation(node)) {
367         load_dominators.push(node);
368         store_dominators.push(node);
369         atomic_dominators.push(node);
370         continue;
371       }
372 
373       if (!node->is_Mach()) {
374         continue;
375       }
376 
377       MachNode* const mach = node->as_Mach();
378       switch (mach->ideal_Opcode()) {
379 
380         // Dominating loads have already passed through LRB and their load
381         // locations got fixed. Subsequent barriers are no longer required.
382         // The only exception are weak loads that have to go through LRB
383         // to deal with dying referents.
384         case Op_LoadP:
385         case Op_LoadN: {
386           if (mach->barrier_data() != 0) {
387             all_loads.push(mach);
388           }
389           if ((mach->barrier_data() & ShenandoahBitStrong) != 0) {
390             loads.push(mach);
391             load_dominators.push(mach);
392           }
393           break;
394         }
395 
396         // Dominating stores have recorded the old value in SATB, and made the
397         // card table update for a location. Subsequent barriers are no longer
398         // required.
399         case Op_StoreP:
400         case Op_StoreN: {
401           if (mach->barrier_data() != 0) {
402             stores.push(mach);
403             load_dominators.push(mach);
404             store_dominators.push(mach);
405             atomic_dominators.push(mach);
406           }
407           break;
408         }
409 
410         // Dominating atomics have dealt with false positives, and made the card
411         // table updates for a location. Even though CAS barriers are conditional,
412         // they perform all needed barriers when memory access is successful.
413         // Therefore, subsequent barriers are no longer required.
414         case Op_CompareAndExchangeN:
415         case Op_CompareAndExchangeP:
416         case Op_CompareAndSwapN:
417         case Op_CompareAndSwapP:
418         case Op_GetAndSetP:
419         case Op_GetAndSetN: {
420           if (mach->barrier_data() != 0) {
421             atomics.push(mach);
422             load_dominators.push(mach);
423             store_dominators.push(mach);
424             atomic_dominators.push(mach);
425           }
426           break;
427         }
428 
429       default:
430         break;
431       }
432     }
433   }
434 
435   elide_dominated_barriers(loads, load_dominators);
436   elide_dominated_barriers(stores, store_dominators);
437   elide_dominated_barriers(atomics, atomic_dominators);
438 
439   // Also clean up extra metadata on these nodes. Dominance analysis likely left
440   // many non-elided barriers with extra metadata, which can be stripped away.
441   strip_extra_data(all_loads);
442   strip_extra_data(stores);
443   strip_extra_data(atomics);
444 }
445 
446 uint ShenandoahBarrierSetC2::estimated_barrier_size(const Node* node) const {
447   // Barrier impact on fast-path is driven by GC state checks emitted very late.
448   // These checks are tight load-test-branch sequences, with no impact on C2 graph
449   // size. Limiting unrolling in presence of GC barriers might turn some loops
450   // tighter than with default unrolling, which may benefit performance due to denser
451   // code. Testing shows it is still counter-productive.
452   // Therefore, we report zero barrier size to let C2 do its normal thing.
453   return 0;
454 }
455 
456 bool ShenandoahBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const {
457   bool is_oop = is_reference_type(type);
458   if (!is_oop) {
459     return false;
460   }
461   if (ShenandoahSATBBarrier && tightly_coupled_alloc) {
462     if (phase == Optimization) {
463       return false;
464     }
465     return !is_clone;
466   }
467   return true;
468 }
469 
470 bool ShenandoahBarrierSetC2::clone_needs_barrier(const TypeOopPtr* src_type, bool& is_oop_array) {
471   if (!ShenandoahCloneBarrier) {
472     return false;
473   }
474 
475   if (src_type->isa_instptr() != nullptr) {
476     // Instance: need barrier only if there is a possibility of having an oop anywhere in it.
477     ciInstanceKlass* ik = src_type->is_instptr()->instance_klass();
478     if ((src_type->klass_is_exact() || !ik->has_subklass()) &&
479         !ik->has_injected_fields() && !ik->has_object_fields()) {
480       if (!src_type->klass_is_exact()) {
481         // Class is *currently* the leaf in the hierarchy.
482         // Record the dependency so that we deopt if this does not hold in future.
483         Compile::current()->dependencies()->assert_leaf_type(ik);
484       }
485       return false;
486     }
487   } else if (src_type->isa_aryptr() != nullptr) {
488     // Array: need barrier only if array is oop-bearing.
489     BasicType src_elem = src_type->isa_aryptr()->elem()->array_element_basic_type();
490     if (is_reference_type(src_elem, true)) {
491       is_oop_array = true;
492     } else {
493       return false;
494     }
495   }
496 
497   // Assume the worst.
498   return true;
499 }
500 
501 void ShenandoahBarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
502   const TypeOopPtr* src_type = kit->gvn().type(src_base)->is_oopptr();
503 
504   bool is_oop_array = false;
505   if (!clone_needs_barrier(src_type, is_oop_array)) {
506     // No barrier is needed? Just do what common BarrierSetC2 wants with it.
507     BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
508     return;
509   }
510 
511   if (ShenandoahCloneRuntime || !is_array || !is_oop_array) {
512     // Looks like an instance? Prepare the instance clone. This would either
513     // be exploded into individual accesses or be left as runtime call.
514     // Common BarrierSetC2 prepares everything for both cases.
515     BarrierSetC2::clone(kit, src_base, dst_base, size, is_array);
516     return;
517   }
518 
519   // We are cloning the oop array. Prepare to call the normal arraycopy stub
520   // after the expansion. Normal stub takes the number of actual type-sized
521   // elements to copy after the base, compute the count here.
522   Node* offset = kit->MakeConX(arrayOopDesc::base_offset_in_bytes(UseCompressedOops ? T_NARROWOOP : T_OBJECT));
523   size = kit->gvn().transform(new SubXNode(size, offset));
524   size = kit->gvn().transform(new URShiftXNode(size, kit->intcon(LogBytesPerHeapOop)));
525   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, size, true, false);
526   ac->set_clone_array();
527   Node* n = kit->gvn().transform(ac);
528   if (n == ac) {
529     ac->set_adr_type(TypeRawPtr::BOTTOM);
530     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), TypeRawPtr::BOTTOM);
531   } else {
532     kit->set_all_memory(n);
533   }
534 }
535 
536 void ShenandoahBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
537   Node* const ctrl        = ac->in(TypeFunc::Control);
538   Node* const mem         = ac->in(TypeFunc::Memory);
539   Node* const src         = ac->in(ArrayCopyNode::Src);
540   Node* const src_offset  = ac->in(ArrayCopyNode::SrcPos);
541   Node* const dest        = ac->in(ArrayCopyNode::Dest);
542   Node* const dest_offset = ac->in(ArrayCopyNode::DestPos);
543   Node* length            = ac->in(ArrayCopyNode::Length);
544 
545   const TypeOopPtr* src_type = phase->igvn().type(src)->is_oopptr();
546 
547   bool is_oop_array = false;
548   if (!clone_needs_barrier(src_type, is_oop_array)) {
549     // No barrier is needed? Expand to normal HeapWord-sized arraycopy.
550     BarrierSetC2::clone_at_expansion(phase, ac);
551     return;
552   }
553 
554   if (ShenandoahCloneRuntime || !ac->is_clone_array() || !is_oop_array) {
555     // Still looks like an instance? Likely a large instance or reflective
556     // clone with unknown length. Go to runtime and handle it there.
557     clone_in_runtime(phase, ac, CAST_FROM_FN_PTR(address, ShenandoahRuntime::clone_addr()), "ShenandoahRuntime::clone");
558     return;
559   }
560 
561   // We are cloning the oop array. Call into normal oop array copy stubs.
562   // Those stubs would call BarrierSetAssembler to handle GC barriers.
563 
564   // This is the full clone, so offsets should equal each other and be at array base.
565   assert(src_offset == dest_offset, "should be equal");
566   const jlong offset = src_offset->get_long();
567   const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
568   BasicType bt = ary_ptr->elem()->array_element_basic_type();
569   assert(offset == arrayOopDesc::base_offset_in_bytes(bt), "should match");
570 
571   const char*   copyfunc_name = "arraycopy";
572   const address copyfunc_addr = phase->basictype2arraycopy(T_OBJECT, nullptr, nullptr, true, copyfunc_name, true);
573 
574   Node* const call = phase->make_leaf_call(ctrl, mem,
575       OptoRuntime::fast_arraycopy_Type(),
576       copyfunc_addr, copyfunc_name,
577       TypeRawPtr::BOTTOM,
578       phase->basic_plus_adr(src, src_offset),
579       phase->basic_plus_adr(dest, dest_offset),
580       length,
581       phase->top()
582   );
583   phase->transform_later(call);
584 
585   phase->igvn().replace_node(ac, call);
586 }
587 
588 void* ShenandoahBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
589   return new(comp_arena) ShenandoahBarrierSetC2State(comp_arena);
590 }
591 
592 ShenandoahBarrierSetC2State* ShenandoahBarrierSetC2::state() const {
593   return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
594 }
595 
596 void ShenandoahBarrierSetC2::print_barrier_data(outputStream* os, uint8_t data) {
597   os->print(" Node barriers: ");
598   if ((data & ShenandoahBitStrong) != 0) {
599     data &= ~ShenandoahBitStrong;
600     os->print("strong ");
601   }
602 
603   if ((data & ShenandoahBitWeak) != 0) {
604     data &= ~ShenandoahBitWeak;
605     os->print("weak ");
606   }
607 
608   if ((data & ShenandoahBitPhantom) != 0) {
609     data &= ~ShenandoahBitPhantom;
610     os->print("phantom ");
611   }
612 
613   if ((data & ShenandoahBitElided) != 0) {
614     data &= ~ShenandoahBitElided;
615     os->print("elided ");
616   }
617 
618   if ((data & ShenandoahBitKeepAlive) != 0) {
619     data &= ~ShenandoahBitKeepAlive;
620     os->print("keepalive ");
621   }
622 
623   if ((data & ShenandoahBitCardMark) != 0) {
624     data &= ~ShenandoahBitCardMark;
625     os->print("cardmark ");
626   }
627 
628   if ((data & ShenandoahBitNotNull) != 0) {
629     data &= ~ShenandoahBitNotNull;
630     os->print("not-null ");
631   }
632   os->cr();
633 
634   if (data > 0) {
635     fatal("Unknown bit!");
636   }
637 
638   os->print_cr(" GC configuration: %sLRB %sSATB %sCAS %sClone %sCard",
639     (ShenandoahLoadRefBarrier ? "+" : "-"),
640     (ShenandoahSATBBarrier    ? "+" : "-"),
641     (ShenandoahCASBarrier     ? "+" : "-"),
642     (ShenandoahCloneBarrier   ? "+" : "-"),
643     (ShenandoahCardBarrier    ? "+" : "-")
644   );
645 }
646 
647 #ifdef ASSERT
648 void ShenandoahBarrierSetC2::verify_gc_barrier_assert(bool cond, const char* msg, uint8_t bd, Node* n) {
649   if (!cond) {
650     stringStream ss;
651     ss.print_cr("%s", msg);
652     ss.print_cr("-----------------");
653     print_barrier_data(&ss, bd);
654     ss.print_cr("-----------------");
655     n->dump_bfs(1, nullptr, "", &ss);
656     report_vm_error(__FILE__, __LINE__, ss.as_string());
657   }
658 }
659 
660 void ShenandoahBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
661   if (!ShenandoahVerifyOptoBarriers) {
662     return;
663   }
664 
665   // Optimizations might have removed the remaining auxiliary flags, making some accesses completely blank.
666   bool accept_blank = (phase == BeforeCodeGen);
667   bool expect_load_barriers       = !accept_blank && ShenandoahLoadRefBarrier;
668   bool expect_store_barriers      = !accept_blank && (ShenandoahSATBBarrier || ShenandoahCardBarrier);
669   bool expect_load_store_barriers = !accept_blank && ShenandoahCASBarrier;
670 
671   Unique_Node_List wq;
672   Node_Stack phis(0);
673   VectorSet visited;
674 
675   wq.push(compile->root());
676   for (uint next = 0; next < wq.size(); next++) {
677     Node *n = wq.at(next);
678     int opc = n->Opcode();
679 
680     if (opc == Op_LoadP || opc == Op_LoadN) {
681       uint8_t bd = n->as_Load()->barrier_data();
682 
683       const TypePtr* adr_type = n->as_Load()->adr_type();
684       if (adr_type->isa_oopptr() || adr_type->isa_narrowoop()) {
685         verify_gc_barrier_assert(!expect_load_barriers || (bd != 0), "Oop load should have barrier data", bd, n);
686 
687         bool is_weak = ((bd & (ShenandoahBitWeak | ShenandoahBitPhantom)) != 0);
688         bool is_referent = adr_type->isa_instptr() &&
689             adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
690             adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset();
691 
692         verify_gc_barrier_assert(!is_weak || is_referent, "Weak load only for Reference.referent", bd, n);
693       } else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
694         // Some LoadP-s are used for T_ADDRESS loads from raw pointers. These are not oops.
695         // Some LoadP-s are used to load class data.
696         // TODO: Verify their barrier data.
697       } else {
698         verify_gc_barrier_assert(false, "Unclassified access type", bd, n);
699       }
700     } else if (opc == Op_StoreP || opc == Op_StoreN) {
701       uint8_t bd = n->as_Store()->barrier_data();
702       const TypePtr* adr_type = n->as_Store()->adr_type();
703       if (adr_type->isa_oopptr() || adr_type->isa_narrowoop()) {
704         // Reference.clear stores null
705         bool is_referent = adr_type->isa_instptr() &&
706              adr_type->is_instptr()->instance_klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
707              adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset();
708 
709         const TypePtr* val_type = n->as_Store()->in(MemNode::Memory)->adr_type();
710         if (!is_referent && (val_type->isa_oopptr() || val_type->isa_narrowoop())) {
711           verify_gc_barrier_assert(!expect_store_barriers || (bd != 0), "Oop store should have barrier data", bd, n);
712         }
713       } else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
714         // Similar to LoadP-s, some of these accesses are raw, and some are handling oops.
715         // TODO: Verify their barrier data.
716       } else {
717         verify_gc_barrier_assert(false, "Unclassified access type", bd, n);
718       }
719     } else if (opc == Op_WeakCompareAndSwapP || opc == Op_WeakCompareAndSwapN ||
720                opc == Op_CompareAndExchangeP || opc == Op_CompareAndExchangeN ||
721                opc == Op_CompareAndSwapP     || opc == Op_CompareAndSwapN ||
722                opc == Op_GetAndSetP          || opc == Op_GetAndSetN) {
723       uint8_t bd = n->as_LoadStore()->barrier_data();
724       verify_gc_barrier_assert(!expect_load_store_barriers || (bd != 0), "Oop load-store should have barrier data", bd, n);
725     } else if (n->is_Mem()) {
726       uint8_t bd = MemNode::barrier_data(n); // FIXME: LOL HotSpot, why not n->as_Mem()? LoadStore is both is_Mem() and not as_Mem().
727       verify_gc_barrier_assert(bd == 0, "Other mem nodes should have no barrier data", bd, n);
728     }
729 
730     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
731       Node* m = n->fast_out(i);
732       wq.push(m);
733     }
734   }
735 }
736 #endif
737 
738 static ShenandoahBarrierSetC2State* barrier_set_state() {
739   return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
740 }
741 
742 int ShenandoahBarrierSetC2::estimate_stub_size() const {
743   GrowableArray<ShenandoahBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
744   assert(stubs->is_empty(), "Lifecycle: no stubs were yet created");
745   return 0;
746 }
747 
748 void ShenandoahBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
749   MacroAssembler masm(&cb);
750   GrowableArray<ShenandoahBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
751   barrier_set_state()->set_stubs_start_offset(masm.offset());
752   barrier_set_state()->set_save_slots_stack_offset(Compile::current()->output()->gc_barrier_save_slots_offset_in_bytes());
753 
754   // Stub generation uses nested skipped counters that can double-count.
755   // Calculate the actual skipped amount by the real PC before/after stub generation.
756   // FIXME: This should be handled upstream.
757   int offset_before = masm.offset();
758   int skipped_before = masm.get_skipped();
759 
760   for (int i = 0; i < stubs->length(); i++) {
761     // Make sure there is enough space in the code buffer
762     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
763       ciEnv::current()->record_failure("CodeCache is full");
764       return;
765     }
766     stubs->at(i)->emit_code(masm);
767   }
768 
769   int offset_after = masm.offset();
770 
771   // The real stubs section is coming up after this, so we have to account for alignment
772   // padding there. See CodeSection::alignment().
773   offset_after = align_up(offset_after, HeapWordSize);
774 
775   masm.set_skipped(skipped_before + (offset_after - offset_before));
776 
777   masm.flush();
778 }
779 
780 void ShenandoahBarrierStubC2::register_stub(ShenandoahBarrierStubC2* stub) {
781   if (!Compile::current()->output()->in_scratch_emit_size()) {
782     barrier_set_state()->stubs()->append(stub);
783   }
784 }
785 
786 void ShenandoahBarrierStubC2::inc_trampoline_stubs_count() {
787   if (!Compile::current()->output()->in_scratch_emit_size()) {
788     barrier_set_state()->inc_trampoline_stubs_count();
789   }
790 }
791 
792 int ShenandoahBarrierStubC2::trampoline_stubs_count() {
793   return barrier_set_state()->trampoline_stubs_count();
794 }
795 
796 int ShenandoahBarrierStubC2::stubs_start_offset() {
797   return barrier_set_state()->stubs_start_offset();
798 }
799 
800 int ShenandoahBarrierStubC2::save_slots_stack_offset() {
801   return barrier_set_state()->save_slots_stack_offset();
802 }
803 
804 int ShenandoahBarrierStubC2::push_save_slot() {
805   assert(_save_slots_idx < ShenandoahBarrierSetC2::bsc2()->reserved_slots(), "Enough slots are reserved");
806   return save_slots_stack_offset() + (_save_slots_idx++) * sizeof(address);
807 }
808 
809 int ShenandoahBarrierStubC2::pop_save_slot() {
810   assert(_save_slots_idx > 0, "About to underflow");
811   return save_slots_stack_offset() + (--_save_slots_idx) * sizeof(address);
812 }
813 
814 ShenandoahBarrierStubC2* ShenandoahBarrierStubC2::create(const MachNode* node, Register obj, Address addr, bool narrow, bool do_load, int offset) {
815   auto* stub = new (Compile::current()->comp_arena()) ShenandoahBarrierStubC2(node, obj, addr, narrow, do_load, offset);
816   ShenandoahBarrierStubC2::register_stub(stub);
817   return stub;
818 }
819 
820 ShenandoahBarrierStubC2* ShenandoahBarrierStubC2::create(const MachNode* node, Register obj, Address addr, bool narrow, bool do_load) {
821   auto* stub = new (Compile::current()->comp_arena()) ShenandoahBarrierStubC2(node, obj, addr, narrow, do_load);
822   ShenandoahBarrierStubC2::register_stub(stub);
823   return stub;
824 }
825 
826 address ShenandoahBarrierStubC2::keepalive_runtime_entry_addr() {
827 #ifdef AMD64
828   return SharedRuntime::shenandoah_keepalive();
829 #else
830   return CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre);
831 #endif
832 }
833 
834 address ShenandoahBarrierStubC2::lrb_runtime_entry_addr() {
835   bool is_strong  = (_node->barrier_data() & ShenandoahBitStrong)  != 0;
836   bool is_weak    = (_node->barrier_data() & ShenandoahBitWeak)    != 0;
837   bool is_phantom = (_node->barrier_data() & ShenandoahBitPhantom) != 0;
838 
839 #ifdef AMD64
840   if (_narrow) {
841     if (is_strong) {
842       return SharedRuntime::shenandoah_lrb_strong_narrow();
843     } else if (is_weak) {
844       return SharedRuntime::shenandoah_lrb_weak_narrow();
845     } else if (is_phantom) {
846       return SharedRuntime::shenandoah_lrb_phantom_narrow();
847     }
848   } else {
849     if (is_strong) {
850       return SharedRuntime::shenandoah_lrb_strong();
851     } else if (is_weak) {
852       return SharedRuntime::shenandoah_lrb_weak();
853     } else if (is_phantom) {
854       return SharedRuntime::shenandoah_lrb_phantom();
855     }
856   }
857 #else
858   // TODO: Remove once platforms migrate to runtime stubs.
859   if (_narrow) {
860     if (is_strong) {
861       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
862     } else if (is_weak) {
863       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
864     } else if (is_phantom) {
865       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow);
866     }
867   } else {
868     if (is_strong) {
869       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
870     } else if (is_weak) {
871       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
872     } else if (is_phantom) {
873       return CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
874     }
875   }
876 #endif
877   assert(false, "sanity");
878   return nullptr;
879 }
880 
881 bool ShenandoahBarrierSetC2State::needs_liveness_data(const MachNode* mach) const {
882   // Nodes that require slow-path stubs need liveness data.
883   return ShenandoahBarrierStubC2::needs_slow_barrier(mach);
884 }
885 
886 bool ShenandoahBarrierSetC2State::needs_livein_data() const {
887   return true;
888 }