1 /*
  2  * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "precompiled.hpp"
 25 #include "classfile/javaClasses.hpp"
 26 #include "gc/x/c2/xBarrierSetC2.hpp"
 27 #include "gc/x/xBarrierSet.hpp"
 28 #include "gc/x/xBarrierSetAssembler.hpp"
 29 #include "gc/x/xBarrierSetRuntime.hpp"
 30 #include "opto/arraycopynode.hpp"
 31 #include "opto/addnode.hpp"
 32 #include "opto/block.hpp"
 33 #include "opto/compile.hpp"
 34 #include "opto/graphKit.hpp"
 35 #include "opto/machnode.hpp"
 36 #include "opto/macro.hpp"
 37 #include "opto/memnode.hpp"
 38 #include "opto/node.hpp"
 39 #include "opto/output.hpp"
 40 #include "opto/regalloc.hpp"
 41 #include "opto/rootnode.hpp"
 42 #include "opto/runtime.hpp"
 43 #include "opto/type.hpp"
 44 #include "utilities/growableArray.hpp"
 45 #include "utilities/macros.hpp"
 46 
 47 class XBarrierSetC2State : public ArenaObj {
 48 private:
 49   GrowableArray<XLoadBarrierStubC2*>* _stubs;
 50   Node_Array                          _live;
 51 
 52 public:
 53   XBarrierSetC2State(Arena* arena) :
 54     _stubs(new (arena) GrowableArray<XLoadBarrierStubC2*>(arena, 8,  0, nullptr)),
 55     _live(arena) {}
 56 
 57   GrowableArray<XLoadBarrierStubC2*>* stubs() {
 58     return _stubs;
 59   }
 60 
 61   RegMask* live(const Node* node) {
 62     if (!node->is_Mach()) {
 63       // Don't need liveness for non-MachNodes
 64       return nullptr;
 65     }
 66 
 67     const MachNode* const mach = node->as_Mach();
 68     if (mach->barrier_data() == XLoadBarrierElided) {
 69       // Don't need liveness data for nodes without barriers
 70       return nullptr;
 71     }
 72 
 73     RegMask* live = (RegMask*)_live[node->_idx];
 74     if (live == nullptr) {
 75       live = new (Compile::current()->comp_arena()->AmallocWords(sizeof(RegMask))) RegMask();
 76       _live.map(node->_idx, (Node*)live);
 77     }
 78 
 79     return live;
 80   }
 81 };
 82 
 83 static XBarrierSetC2State* barrier_set_state() {
 84   return reinterpret_cast<XBarrierSetC2State*>(Compile::current()->barrier_set_state());
 85 }
 86 
 87 XLoadBarrierStubC2* XLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
 88   XLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) XLoadBarrierStubC2(node, ref_addr, ref, tmp, barrier_data);
 89   if (!Compile::current()->output()->in_scratch_emit_size()) {
 90     barrier_set_state()->stubs()->append(stub);
 91   }
 92 
 93   return stub;
 94 }
 95 
 96 XLoadBarrierStubC2::XLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) :
 97     _node(node),
 98     _ref_addr(ref_addr),
 99     _ref(ref),
100     _tmp(tmp),
101     _barrier_data(barrier_data),
102     _entry(),
103     _continuation() {
104   assert_different_registers(ref, ref_addr.base());
105   assert_different_registers(ref, ref_addr.index());
106 }
107 
108 Address XLoadBarrierStubC2::ref_addr() const {
109   return _ref_addr;
110 }
111 
112 Register XLoadBarrierStubC2::ref() const {
113   return _ref;
114 }
115 
116 Register XLoadBarrierStubC2::tmp() const {
117   return _tmp;
118 }
119 
120 address XLoadBarrierStubC2::slow_path() const {
121   DecoratorSet decorators = DECORATORS_NONE;
122   if (_barrier_data & XLoadBarrierStrong) {
123     decorators |= ON_STRONG_OOP_REF;
124   }
125   if (_barrier_data & XLoadBarrierWeak) {
126     decorators |= ON_WEAK_OOP_REF;
127   }
128   if (_barrier_data & XLoadBarrierPhantom) {
129     decorators |= ON_PHANTOM_OOP_REF;
130   }
131   if (_barrier_data & XLoadBarrierNoKeepalive) {
132     decorators |= AS_NO_KEEPALIVE;
133   }
134   return XBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
135 }
136 
137 RegMask& XLoadBarrierStubC2::live() const {
138   RegMask* mask = barrier_set_state()->live(_node);
139   assert(mask != nullptr, "must be mach-node with barrier");
140   return *mask;
141 }
142 
143 Label* XLoadBarrierStubC2::entry() {
144   // The _entry will never be bound when in_scratch_emit_size() is true.
145   // However, we still need to return a label that is not bound now, but
146   // will eventually be bound. Any label will do, as it will only act as
147   // a placeholder, so we return the _continuation label.
148   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
149 }
150 
151 Label* XLoadBarrierStubC2::continuation() {
152   return &_continuation;
153 }
154 
155 void* XBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
156   return new (comp_arena) XBarrierSetC2State(comp_arena);
157 }
158 
159 void XBarrierSetC2::late_barrier_analysis() const {
160   analyze_dominating_barriers();
161   compute_liveness_at_stubs();
162 }
163 
164 void XBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
165   MacroAssembler masm(&cb);
166   GrowableArray<XLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
167 
168   for (int i = 0; i < stubs->length(); i++) {
169     // Make sure there is enough space in the code buffer
170     if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
171       ciEnv::current()->record_failure("CodeCache is full");
172       return;
173     }
174 
175     XBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
176   }
177 
178   masm.flush();
179 }
180 
181 int XBarrierSetC2::estimate_stub_size() const {
182   Compile* const C = Compile::current();
183   BufferBlob* const blob = C->output()->scratch_buffer_blob();
184   GrowableArray<XLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
185   int size = 0;
186 
187   for (int i = 0; i < stubs->length(); i++) {
188     CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
189     MacroAssembler masm(&cb);
190     XBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
191     size += cb.insts_size();
192   }
193 
194   return size;
195 }
196 
197 static void set_barrier_data(C2Access& access) {
198   if (XBarrierSet::barrier_needed(access.decorators(), access.type())) {
199     uint8_t barrier_data = 0;
200 
201     if (access.decorators() & ON_PHANTOM_OOP_REF) {
202       barrier_data |= XLoadBarrierPhantom;
203     } else if (access.decorators() & ON_WEAK_OOP_REF) {
204       barrier_data |= XLoadBarrierWeak;
205     } else {
206       barrier_data |= XLoadBarrierStrong;
207     }
208 
209     if (access.decorators() & AS_NO_KEEPALIVE) {
210       barrier_data |= XLoadBarrierNoKeepalive;
211     }
212 
213     access.set_barrier_data(barrier_data);
214   }
215 }
216 
217 Node* XBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
218   set_barrier_data(access);
219   return BarrierSetC2::load_at_resolved(access, val_type);
220 }
221 
222 Node* XBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
223                                                     Node* new_val, const Type* val_type) const {
224   set_barrier_data(access);
225   return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
226 }
227 
228 Node* XBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
229                                                      Node* new_val, const Type* value_type) const {
230   set_barrier_data(access);
231   return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
232 }
233 
234 Node* XBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
235   set_barrier_data(access);
236   return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
237 }
238 
239 bool XBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
240                                                     bool is_clone, bool is_clone_instance,
241                                                     ArrayCopyPhase phase) const {
242   if (phase == ArrayCopyPhase::Parsing) {
243     return false;
244   }
245   if (phase == ArrayCopyPhase::Optimization) {
246     return is_clone_instance;
247   }
248   // else ArrayCopyPhase::Expansion
249   return type == T_OBJECT || type == T_ARRAY;
250 }
251 
252 // This TypeFunc assumes a 64bit system
253 static const TypeFunc* clone_type() {
254   // Create input type (domain)
255   const Type** domain_fields = TypeTuple::fields(4);
256   domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL;  // src
257   domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL;  // dst
258   domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG;        // size lower
259   domain_fields[TypeFunc::Parms + 3] = Type::HALF;            // size upper
260   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
261 
262   // Create result type (range)
263   const Type** range_fields = TypeTuple::fields(0);
264   const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
265 
266   return TypeFunc::make(domain, range);
267 }
268 
269 #define XTOP LP64_ONLY(COMMA phase->top())
270 
271 void XBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
272   Node* const src = ac->in(ArrayCopyNode::Src);
273   const TypeAryPtr* ary_ptr = src->get_ptr_type()->isa_aryptr();
274 
275   if (ac->is_clone_array() && ary_ptr != nullptr) {
276     BasicType bt = ary_ptr->elem()->array_element_basic_type();
277     if (is_reference_type(bt)) {
278       // Clone object array
279       bt = T_OBJECT;
280     } else {
281       // Clone primitive array
282       bt = T_LONG;
283     }
284 
285     Node* ctrl = ac->in(TypeFunc::Control);
286     Node* mem = ac->in(TypeFunc::Memory);
287     Node* src = ac->in(ArrayCopyNode::Src);
288     Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
289     Node* dest = ac->in(ArrayCopyNode::Dest);
290     Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
291     Node* length = ac->in(ArrayCopyNode::Length);
292 
293     if (bt == T_OBJECT) {
294       // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset
295       // which 8-byte aligns them to allow for word size copies. Make sure the offsets point
296       // to the first element in the array when cloning object arrays. Otherwise, load
297       // barriers are applied to parts of the header. Also adjust the length accordingly.
298       assert(src_offset == dest_offset, "should be equal");
299       jlong offset = src_offset->get_long();
300       if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
301         assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers");
302         assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
303         length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
304         src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
305         dest_offset = src_offset;
306       }
307     }
308     Node* payload_src = phase->basic_plus_adr(src, src_offset);
309     Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
310 
311     const char* copyfunc_name = "arraycopy";
312     address     copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true);
313 
314     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
315     const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
316 
317     Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
318     phase->transform_later(call);
319 
320     phase->igvn().replace_node(ac, call);
321     return;
322   }
323 
324   // Clone instance
325   Node* const ctrl       = ac->in(TypeFunc::Control);
326   Node* const mem        = ac->in(TypeFunc::Memory);
327   Node* const dst        = ac->in(ArrayCopyNode::Dest);
328   Node* const size       = ac->in(ArrayCopyNode::Length);
329 
330   assert(size->bottom_type()->is_long(), "Should be long");
331 
332   // The native clone we are calling here expects the instance size in words
333   // Add header/offset size to payload size to get instance size.
334   Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
335   Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
336 
337   Node* const call = phase->make_leaf_call(ctrl,
338                                            mem,
339                                            clone_type(),
340                                            XBarrierSetRuntime::clone_addr(),
341                                            "XBarrierSetRuntime::clone",
342                                            TypeRawPtr::BOTTOM,
343                                            src,
344                                            dst,
345                                            full_size,
346                                            phase->top());
347   phase->transform_later(call);
348   phase->igvn().replace_node(ac, call);
349 }
350 
351 #undef XTOP
352 
353 // == Dominating barrier elision ==
354 
355 static bool block_has_safepoint(const Block* block, uint from, uint to) {
356   for (uint i = from; i < to; i++) {
357     if (block->get_node(i)->is_MachSafePoint()) {
358       // Safepoint found
359       return true;
360     }
361   }
362 
363   // Safepoint not found
364   return false;
365 }
366 
367 static bool block_has_safepoint(const Block* block) {
368   return block_has_safepoint(block, 0, block->number_of_nodes());
369 }
370 
371 static uint block_index(const Block* block, const Node* node) {
372   for (uint j = 0; j < block->number_of_nodes(); ++j) {
373     if (block->get_node(j) == node) {
374       return j;
375     }
376   }
377   ShouldNotReachHere();
378   return 0;
379 }
380 
381 void XBarrierSetC2::analyze_dominating_barriers() const {
382   ResourceMark rm;
383   Compile* const C = Compile::current();
384   PhaseCFG* const cfg = C->cfg();
385   Block_List worklist;
386   Node_List mem_ops;
387   Node_List barrier_loads;
388 
389   // Step 1 - Find accesses, and track them in lists
390   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
391     const Block* const block = cfg->get_block(i);
392     for (uint j = 0; j < block->number_of_nodes(); ++j) {
393       const Node* const node = block->get_node(j);
394       if (!node->is_Mach()) {
395         continue;
396       }
397 
398       MachNode* const mach = node->as_Mach();
399       switch (mach->ideal_Opcode()) {
400       case Op_LoadP:
401         if ((mach->barrier_data() & XLoadBarrierStrong) != 0) {
402           barrier_loads.push(mach);
403         }
404         if ((mach->barrier_data() & (XLoadBarrierStrong | XLoadBarrierNoKeepalive)) ==
405             XLoadBarrierStrong) {
406           mem_ops.push(mach);
407         }
408         break;
409       case Op_CompareAndExchangeP:
410       case Op_CompareAndSwapP:
411       case Op_GetAndSetP:
412         if ((mach->barrier_data() & XLoadBarrierStrong) != 0) {
413           barrier_loads.push(mach);
414         }
415       case Op_StoreP:
416         mem_ops.push(mach);
417         break;
418 
419       default:
420         break;
421       }
422     }
423   }
424 
425   // Step 2 - Find dominating accesses for each load
426   for (uint i = 0; i < barrier_loads.size(); i++) {
427     MachNode* const load = barrier_loads.at(i)->as_Mach();
428     const TypePtr* load_adr_type = nullptr;
429     intptr_t load_offset = 0;
430     const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
431     Block* const load_block = cfg->get_block_for_node(load);
432     const uint load_index = block_index(load_block, load);
433 
434     for (uint j = 0; j < mem_ops.size(); j++) {
435       MachNode* mem = mem_ops.at(j)->as_Mach();
436       const TypePtr* mem_adr_type = nullptr;
437       intptr_t mem_offset = 0;
438       const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
439       Block* mem_block = cfg->get_block_for_node(mem);
440       uint mem_index = block_index(mem_block, mem);
441 
442       if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
443           load_obj == nullptr || mem_obj == nullptr ||
444           load_offset < 0 || mem_offset < 0) {
445         continue;
446       }
447 
448       if (mem_obj != load_obj || mem_offset != load_offset) {
449         // Not the same addresses, not a candidate
450         continue;
451       }
452 
453       if (load_block == mem_block) {
454         // Earlier accesses in the same block
455         if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
456           load->set_barrier_data(XLoadBarrierElided);
457         }
458       } else if (mem_block->dominates(load_block)) {
459         // Dominating block? Look around for safepoints
460         ResourceMark rm;
461         Block_List stack;
462         VectorSet visited;
463         stack.push(load_block);
464         bool safepoint_found = block_has_safepoint(load_block);
465         while (!safepoint_found && stack.size() > 0) {
466           Block* block = stack.pop();
467           if (visited.test_set(block->_pre_order)) {
468             continue;
469           }
470           if (block_has_safepoint(block)) {
471             safepoint_found = true;
472             break;
473           }
474           if (block == mem_block) {
475             continue;
476           }
477 
478           // Push predecessor blocks
479           for (uint p = 1; p < block->num_preds(); ++p) {
480             Block* pred = cfg->get_block_for_node(block->pred(p));
481             stack.push(pred);
482           }
483         }
484 
485         if (!safepoint_found) {
486           load->set_barrier_data(XLoadBarrierElided);
487         }
488       }
489     }
490   }
491 }
492 
493 // == Reduced spilling optimization ==
494 
495 void XBarrierSetC2::compute_liveness_at_stubs() const {
496   ResourceMark rm;
497   Compile* const C = Compile::current();
498   Arena* const A = Thread::current()->resource_area();
499   PhaseCFG* const cfg = C->cfg();
500   PhaseRegAlloc* const regalloc = C->regalloc();
501   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
502   XBarrierSetAssembler* const bs = XBarrierSet::assembler();
503   Block_List worklist;
504 
505   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
506     new ((void*)(live + i)) RegMask();
507     worklist.push(cfg->get_block(i));
508   }
509 
510   while (worklist.size() > 0) {
511     const Block* const block = worklist.pop();
512     RegMask& old_live = live[block->_pre_order];
513     RegMask new_live;
514 
515     // Initialize to union of successors
516     for (uint i = 0; i < block->_num_succs; i++) {
517       const uint succ_id = block->_succs[i]->_pre_order;
518       new_live.OR(live[succ_id]);
519     }
520 
521     // Walk block backwards, computing liveness
522     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
523       const Node* const node = block->get_node(i);
524 
525       // Remove def bits
526       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
527       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
528       if (first != OptoReg::Bad) {
529         new_live.Remove(first);
530       }
531       if (second != OptoReg::Bad) {
532         new_live.Remove(second);
533       }
534 
535       // Add use bits
536       for (uint j = 1; j < node->req(); ++j) {
537         const Node* const use = node->in(j);
538         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
539         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
540         if (first != OptoReg::Bad) {
541           new_live.Insert(first);
542         }
543         if (second != OptoReg::Bad) {
544           new_live.Insert(second);
545         }
546       }
547 
548       // If this node tracks liveness, update it
549       RegMask* const regs = barrier_set_state()->live(node);
550       if (regs != nullptr) {
551         regs->OR(new_live);
552       }
553     }
554 
555     // Now at block top, see if we have any changes
556     new_live.SUBTRACT(old_live);
557     if (new_live.is_NotEmpty()) {
558       // Liveness has refined, update and propagate to prior blocks
559       old_live.OR(new_live);
560       for (uint i = 1; i < block->num_preds(); ++i) {
561         Block* const pred = cfg->get_block_for_node(block->pred(i));
562         worklist.push(pred);
563       }
564     }
565   }
566 }
567 
568 #ifndef PRODUCT
569 void XBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
570   if ((mach->barrier_data() & XLoadBarrierStrong) != 0) {
571     st->print("strong ");
572   }
573   if ((mach->barrier_data() & XLoadBarrierWeak) != 0) {
574     st->print("weak ");
575   }
576   if ((mach->barrier_data() & XLoadBarrierPhantom) != 0) {
577     st->print("phantom ");
578   }
579   if ((mach->barrier_data() & XLoadBarrierNoKeepalive) != 0) {
580     st->print("nokeepalive ");
581   }
582 }
583 #endif // !PRODUCT