1 /*
  2  * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "code/vmreg.inline.hpp"
 26 #include "gc/shared/barrierSet.hpp"
 27 #include "gc/shared/tlab_globals.hpp"
 28 #include "gc/shared/c2/barrierSetC2.hpp"
 29 #include "opto/arraycopynode.hpp"
 30 #include "opto/block.hpp"
 31 #include "opto/convertnode.hpp"
 32 #include "opto/graphKit.hpp"
 33 #include "opto/idealKit.hpp"
 34 #include "opto/macro.hpp"
 35 #include "opto/narrowptrnode.hpp"
 36 #include "opto/output.hpp"
 37 #include "opto/regalloc.hpp"
 38 #include "opto/runtime.hpp"
 39 #include "utilities/macros.hpp"
 40 #include CPU_HEADER(gc/shared/barrierSetAssembler)
 41 
 42 // By default this is a no-op.
 43 void BarrierSetC2::resolve_address(C2Access& access) const { }
 44 
 45 void* C2ParseAccess::barrier_set_state() const {
 46   return _kit->barrier_set_state();
 47 }
 48 
 49 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); }
 50 
 51 bool C2Access::needs_cpu_membar() const {
 52   bool mismatched   = (_decorators & C2_MISMATCHED) != 0;
 53   bool is_unordered = (_decorators & MO_UNORDERED) != 0;
 54 
 55   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
 56   bool in_heap   = (_decorators & IN_HEAP) != 0;
 57   bool in_native = (_decorators & IN_NATIVE) != 0;
 58   bool is_mixed  = !in_heap && !in_native;
 59 
 60   bool is_write  = (_decorators & C2_WRITE_ACCESS) != 0;
 61   bool is_read   = (_decorators & C2_READ_ACCESS) != 0;
 62   bool is_atomic = is_read && is_write;
 63 
 64   if (is_atomic) {
 65     // Atomics always need to be wrapped in CPU membars
 66     return true;
 67   }
 68 
 69   if (anonymous) {
 70     // We will need memory barriers unless we can determine a unique
 71     // alias category for this reference.  (Note:  If for some reason
 72     // the barriers get omitted and the unsafe reference begins to "pollute"
 73     // the alias analysis of the rest of the graph, either Compile::can_alias
 74     // or Compile::must_alias will throw a diagnostic assert.)
 75     if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
 76       return true;
 77     }
 78   } else {
 79     assert(!is_mixed, "not unsafe");
 80   }
 81 
 82   return false;
 83 }
 84 
 85 static BarrierSetC2State* barrier_set_state() {
 86   return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state());
 87 }
 88 
 89 RegMask& BarrierStubC2::live() const {
 90   return *barrier_set_state()->live(_node);
 91 }
 92 
 93 BarrierStubC2::BarrierStubC2(const MachNode* node)
 94   : _node(node),
 95     _entry(),
 96     _continuation(),
 97     _preserve(live()) {}
 98 
 99 Label* BarrierStubC2::entry() {
100   // The _entry will never be bound when in_scratch_emit_size() is true.
101   // However, we still need to return a label that is not bound now, but
102   // will eventually be bound. Any eventually bound label will do, as it
103   // will only act as a placeholder, so we return the _continuation label.
104   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
105 }
106 
107 Label* BarrierStubC2::continuation() {
108   return &_continuation;
109 }
110 
111 uint8_t BarrierStubC2::barrier_data() const {
112   return _node->barrier_data();
113 }
114 
115 void BarrierStubC2::preserve(Register r) {
116   const VMReg vm_reg = r->as_VMReg();
117   assert(vm_reg->is_Register(), "r must be a general-purpose register");
118   _preserve.Insert(OptoReg::as_OptoReg(vm_reg));
119 }
120 
121 void BarrierStubC2::dont_preserve(Register r) {
122   VMReg vm_reg = r->as_VMReg();
123   assert(vm_reg->is_Register(), "r must be a general-purpose register");
124   // Subtract the given register and all its sub-registers (e.g. {R11, R11_H}
125   // for r11 in aarch64).
126   do {
127     _preserve.Remove(OptoReg::as_OptoReg(vm_reg));
128     vm_reg = vm_reg->next();
129   } while (vm_reg->is_Register() && !vm_reg->is_concrete());
130 }
131 
132 const RegMask& BarrierStubC2::preserve_set() const {
133   return _preserve;
134 }
135 
136 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
137   DecoratorSet decorators = access.decorators();
138 
139   bool mismatched = (decorators & C2_MISMATCHED) != 0;
140   bool unaligned = (decorators & C2_UNALIGNED) != 0;
141   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
142   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
143 
144   MemNode::MemOrd mo = access.mem_node_mo();
145 
146   Node* store;
147   BasicType bt = access.type();
148   if (access.is_parse_access()) {
149     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
150 
151     GraphKit* kit = parse_access.kit();
152     if (bt == T_DOUBLE) {
153       Node* new_val = kit->dprecision_rounding(val.node());
154       val.set_node(new_val);
155     }
156 
157     store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt,
158                                  mo, requires_atomic_access, unaligned, mismatched,
159                                  unsafe, access.barrier_data());
160   } else {
161     assert(access.is_opt_access(), "either parse or opt access");
162     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
163     Node* ctl = opt_access.ctl();
164     MergeMemNode* mm = opt_access.mem();
165     PhaseGVN& gvn = opt_access.gvn();
166     const TypePtr* adr_type = access.addr().type();
167     int alias = gvn.C->get_alias_index(adr_type);
168     Node* mem = mm->memory_at(alias);
169 
170     StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access);
171     if (unaligned) {
172       st->set_unaligned_access();
173     }
174     if (mismatched) {
175       st->set_mismatched_access();
176     }
177     st->set_barrier_data(access.barrier_data());
178     store = gvn.transform(st);
179     if (store == st) {
180       mm->set_memory_at(alias, st);
181     }
182   }
183   access.set_raw_access(store);
184 
185   return store;
186 }
187 
188 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
189   DecoratorSet decorators = access.decorators();
190 
191   Node* adr = access.addr().node();
192   const TypePtr* adr_type = access.addr().type();
193 
194   bool mismatched = (decorators & C2_MISMATCHED) != 0;
195   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
196   bool unaligned = (decorators & C2_UNALIGNED) != 0;
197   bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
198   bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0;
199   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
200   bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0;
201 
202   MemNode::MemOrd mo = access.mem_node_mo();
203   LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest;
204 
205   Node* load;
206   if (access.is_parse_access()) {
207     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
208     GraphKit* kit = parse_access.kit();
209     Node* control = control_dependent ? kit->control() : nullptr;
210 
211     if (immutable) {
212       Compile* C = Compile::current();
213       Node* mem = kit->immutable_memory();
214       load = LoadNode::make(kit->gvn(), control, mem, adr,
215                             adr_type, val_type, access.type(), mo, dep, requires_atomic_access,
216                             unaligned, mismatched, unsafe, access.barrier_data());
217       load = kit->gvn().transform(load);
218     } else {
219       load = kit->make_load(control, adr, val_type, access.type(), mo,
220                             dep, requires_atomic_access, unaligned, mismatched, unsafe,
221                             access.barrier_data());
222     }
223   } else {
224     assert(access.is_opt_access(), "either parse or opt access");
225     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
226     Node* control = control_dependent ? opt_access.ctl() : nullptr;
227     MergeMemNode* mm = opt_access.mem();
228     PhaseGVN& gvn = opt_access.gvn();
229     Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type));
230     load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep,
231                           requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data());
232     load = gvn.transform(load);
233   }
234   access.set_raw_access(load);
235 
236   return load;
237 }
238 
239 class C2AccessFence: public StackObj {
240   C2Access& _access;
241   Node* _leading_membar;
242 
243 public:
244   C2AccessFence(C2Access& access) :
245     _access(access), _leading_membar(nullptr) {
246     GraphKit* kit = nullptr;
247     if (access.is_parse_access()) {
248       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
249       kit = parse_access.kit();
250     }
251     DecoratorSet decorators = access.decorators();
252 
253     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
254     bool is_read = (decorators & C2_READ_ACCESS) != 0;
255     bool is_atomic = is_read && is_write;
256 
257     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
258     bool is_release = (decorators & MO_RELEASE) != 0;
259 
260     if (is_atomic) {
261       assert(kit != nullptr, "unsupported at optimization time");
262       // Memory-model-wise, a LoadStore acts like a little synchronized
263       // block, so needs barriers on each side.  These don't translate
264       // into actual barriers on most machines, but we still need rest of
265       // compiler to respect ordering.
266       if (is_release) {
267         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
268       } else if (is_volatile) {
269         if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
270           _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
271         } else {
272           _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
273         }
274       }
275     } else if (is_write) {
276       // If reference is volatile, prevent following memory ops from
277       // floating down past the volatile write.  Also prevents commoning
278       // another volatile read.
279       if (is_volatile || is_release) {
280         assert(kit != nullptr, "unsupported at optimization time");
281         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
282       }
283     } else {
284       // Memory barrier to prevent normal and 'unsafe' accesses from
285       // bypassing each other.  Happens after null checks, so the
286       // exception paths do not take memory state from the memory barrier,
287       // so there's no problems making a strong assert about mixing users
288       // of safe & unsafe memory.
289       if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
290         assert(kit != nullptr, "unsupported at optimization time");
291         _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
292       }
293     }
294 
295     if (access.needs_cpu_membar()) {
296       assert(kit != nullptr, "unsupported at optimization time");
297       kit->insert_mem_bar(Op_MemBarCPUOrder);
298     }
299 
300     if (is_atomic) {
301       // 4984716: MemBars must be inserted before this
302       //          memory node in order to avoid a false
303       //          dependency which will confuse the scheduler.
304       access.set_memory();
305     }
306   }
307 
308   ~C2AccessFence() {
309     GraphKit* kit = nullptr;
310     if (_access.is_parse_access()) {
311       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access);
312       kit = parse_access.kit();
313     }
314     DecoratorSet decorators = _access.decorators();
315 
316     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
317     bool is_read = (decorators & C2_READ_ACCESS) != 0;
318     bool is_atomic = is_read && is_write;
319 
320     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
321     bool is_acquire = (decorators & MO_ACQUIRE) != 0;
322 
323     // If reference is volatile, prevent following volatiles ops from
324     // floating up before the volatile access.
325     if (_access.needs_cpu_membar()) {
326       kit->insert_mem_bar(Op_MemBarCPUOrder);
327     }
328 
329     if (is_atomic) {
330       assert(kit != nullptr, "unsupported at optimization time");
331       if (is_acquire || is_volatile) {
332         Node* n = _access.raw_access();
333         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
334         if (_leading_membar != nullptr) {
335           MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
336         }
337       }
338     } else if (is_write) {
339       // If not multiple copy atomic, we do the MemBarVolatile before the load.
340       if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
341         assert(kit != nullptr, "unsupported at optimization time");
342         Node* n = _access.raw_access();
343         Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
344         if (_leading_membar != nullptr) {
345           MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
346         }
347       }
348     } else {
349       if (is_volatile || is_acquire) {
350         assert(kit != nullptr, "unsupported at optimization time");
351         Node* n = _access.raw_access();
352         assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
353         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
354         mb->as_MemBar()->set_trailing_load();
355       }
356     }
357   }
358 };
359 
360 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
361   C2AccessFence fence(access);
362   resolve_address(access);
363   return store_at_resolved(access, val);
364 }
365 
366 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
367   C2AccessFence fence(access);
368   resolve_address(access);
369   return load_at_resolved(access, val_type);
370 }
371 
372 MemNode::MemOrd C2Access::mem_node_mo() const {
373   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
374   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
375   if ((_decorators & MO_SEQ_CST) != 0) {
376     if (is_write && is_read) {
377       // For atomic operations
378       return MemNode::seqcst;
379     } else if (is_write) {
380       return MemNode::release;
381     } else {
382       assert(is_read, "what else?");
383       return MemNode::acquire;
384     }
385   } else if ((_decorators & MO_RELEASE) != 0) {
386     return MemNode::release;
387   } else if ((_decorators & MO_ACQUIRE) != 0) {
388     return MemNode::acquire;
389   } else if (is_write) {
390     // Volatile fields need releasing stores.
391     // Non-volatile fields also need releasing stores if they hold an
392     // object reference, because the object reference might point to
393     // a freshly created object.
394     // Conservatively release stores of object references.
395     return StoreNode::release_if_reference(_type);
396   } else {
397     return MemNode::unordered;
398   }
399 }
400 
401 void C2Access::fixup_decorators() {
402   bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
403   bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
404   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
405 
406   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
407   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
408 
409   if (AlwaysAtomicAccesses && is_unordered) {
410     _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
411     _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
412   }
413 
414   _decorators = AccessInternal::decorator_fixup(_decorators, _type);
415 
416   if (is_read && !is_write && anonymous) {
417     // To be valid, unsafe loads may depend on other conditions than
418     // the one that guards them: pin the Load node
419     _decorators |= C2_CONTROL_DEPENDENT_LOAD;
420     _decorators |= C2_UNKNOWN_CONTROL_LOAD;
421     const TypePtr* adr_type = _addr.type();
422     Node* adr = _addr.node();
423     if (!needs_cpu_membar() && adr_type->isa_instptr()) {
424       assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
425       intptr_t offset = Type::OffsetBot;
426       AddPNode::Ideal_base_and_offset(adr, &gvn(), offset);
427       if (offset >= 0) {
428         int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper());
429         if (offset < s) {
430           // Guaranteed to be a valid access, no need to pin it
431           _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
432           _decorators ^= C2_UNKNOWN_CONTROL_LOAD;
433         }
434       }
435     }
436   }
437 }
438 
439 //--------------------------- atomic operations---------------------------------
440 
441 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const {
442   // SCMemProjNodes represent the memory state of a LoadStore. Their
443   // main role is to prevent LoadStore nodes from being optimized away
444   // when their results aren't used.
445   assert(access.is_parse_access(), "entry not supported at optimization time");
446   C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
447   GraphKit* kit = parse_access.kit();
448   Node* load_store = access.raw_access();
449   assert(load_store != nullptr, "must pin atomic op");
450   Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
451   kit->set_memory(proj, access.alias_idx());
452 }
453 
454 void C2AtomicParseAccess::set_memory() {
455   Node *mem = _kit->memory(_alias_idx);
456   _memory = mem;
457 }
458 
459 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
460                                                    Node* new_val, const Type* value_type) const {
461   GraphKit* kit = access.kit();
462   MemNode::MemOrd mo = access.mem_node_mo();
463   Node* mem = access.memory();
464 
465   Node* adr = access.addr().node();
466   const TypePtr* adr_type = access.addr().type();
467 
468   Node* load_store = nullptr;
469 
470   if (access.is_oop()) {
471 #ifdef _LP64
472     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
473       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
474       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
475       load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
476     } else
477 #endif
478     {
479       load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
480     }
481   } else {
482     switch (access.type()) {
483       case T_BYTE: {
484         load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
485         break;
486       }
487       case T_SHORT: {
488         load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
489         break;
490       }
491       case T_INT: {
492         load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
493         break;
494       }
495       case T_LONG: {
496         load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
497         break;
498       }
499       default:
500         ShouldNotReachHere();
501     }
502   }
503 
504   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
505   load_store = kit->gvn().transform(load_store);
506 
507   access.set_raw_access(load_store);
508   pin_atomic_op(access);
509 
510 #ifdef _LP64
511   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
512     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
513   }
514 #endif
515 
516   return load_store;
517 }
518 
519 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
520                                                     Node* new_val, const Type* value_type) const {
521   GraphKit* kit = access.kit();
522   DecoratorSet decorators = access.decorators();
523   MemNode::MemOrd mo = access.mem_node_mo();
524   Node* mem = access.memory();
525   bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
526   Node* load_store = nullptr;
527   Node* adr = access.addr().node();
528 
529   if (access.is_oop()) {
530 #ifdef _LP64
531     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
532       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
533       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
534       if (is_weak_cas) {
535         load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
536       } else {
537         load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
538       }
539     } else
540 #endif
541     {
542       if (is_weak_cas) {
543         load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
544       } else {
545         load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
546       }
547     }
548   } else {
549     switch(access.type()) {
550       case T_BYTE: {
551         if (is_weak_cas) {
552           load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
553         } else {
554           load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
555         }
556         break;
557       }
558       case T_SHORT: {
559         if (is_weak_cas) {
560           load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
561         } else {
562           load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
563         }
564         break;
565       }
566       case T_INT: {
567         if (is_weak_cas) {
568           load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
569         } else {
570           load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
571         }
572         break;
573       }
574       case T_LONG: {
575         if (is_weak_cas) {
576           load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
577         } else {
578           load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
579         }
580         break;
581       }
582       default:
583         ShouldNotReachHere();
584     }
585   }
586 
587   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
588   load_store = kit->gvn().transform(load_store);
589 
590   access.set_raw_access(load_store);
591   pin_atomic_op(access);
592 
593   return load_store;
594 }
595 
596 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
597   GraphKit* kit = access.kit();
598   Node* mem = access.memory();
599   Node* adr = access.addr().node();
600   const TypePtr* adr_type = access.addr().type();
601   Node* load_store = nullptr;
602 
603   if (access.is_oop()) {
604 #ifdef _LP64
605     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
606       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
607       load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
608     } else
609 #endif
610     {
611       load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
612     }
613   } else  {
614     switch (access.type()) {
615       case T_BYTE:
616         load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
617         break;
618       case T_SHORT:
619         load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
620         break;
621       case T_INT:
622         load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
623         break;
624       case T_LONG:
625         load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
626         break;
627       default:
628         ShouldNotReachHere();
629     }
630   }
631 
632   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
633   load_store = kit->gvn().transform(load_store);
634 
635   access.set_raw_access(load_store);
636   pin_atomic_op(access);
637 
638 #ifdef _LP64
639   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
640     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
641   }
642 #endif
643 
644   return load_store;
645 }
646 
647 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
648   Node* load_store = nullptr;
649   GraphKit* kit = access.kit();
650   Node* adr = access.addr().node();
651   const TypePtr* adr_type = access.addr().type();
652   Node* mem = access.memory();
653 
654   switch(access.type()) {
655     case T_BYTE:
656       load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
657       break;
658     case T_SHORT:
659       load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
660       break;
661     case T_INT:
662       load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
663       break;
664     case T_LONG:
665       load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
666       break;
667     default:
668       ShouldNotReachHere();
669   }
670 
671   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
672   load_store = kit->gvn().transform(load_store);
673 
674   access.set_raw_access(load_store);
675   pin_atomic_op(access);
676 
677   return load_store;
678 }
679 
680 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val,
681                                           Node* new_val, const Type* value_type) const {
682   C2AccessFence fence(access);
683   resolve_address(access);
684   return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
685 }
686 
687 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val,
688                                            Node* new_val, const Type* value_type) const {
689   C2AccessFence fence(access);
690   resolve_address(access);
691   return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
692 }
693 
694 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
695   C2AccessFence fence(access);
696   resolve_address(access);
697   return atomic_xchg_at_resolved(access, new_val, value_type);
698 }
699 
700 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
701   C2AccessFence fence(access);
702   resolve_address(access);
703   return atomic_add_at_resolved(access, new_val, value_type);
704 }
705 
706 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) {
707   // Exclude the header but include array length to copy by 8 bytes words.
708   // Can't use base_offset_in_bytes(bt) since basic type is unknown.
709   int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
710                             instanceOopDesc::base_offset_in_bytes();
711   // base_off:
712   // 8  - 32-bit VM or 64-bit VM, compact headers
713   // 12 - 64-bit VM, compressed klass
714   // 16 - 64-bit VM, normal klass
715   if (base_off % BytesPerLong != 0) {
716     assert(UseCompressedClassPointers, "");
717     assert(!UseCompactObjectHeaders, "");
718     if (is_array) {
719       // Exclude length to copy by 8 bytes words.
720       base_off += sizeof(int);
721     } else {
722       // Include klass to copy by 8 bytes words.
723       base_off = instanceOopDesc::klass_offset_in_bytes();
724     }
725     assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
726   }
727   return base_off;
728 }
729 
730 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
731   int base_off = arraycopy_payload_base_offset(is_array);
732   Node* payload_size = size;
733   Node* offset = kit->MakeConX(base_off);
734   payload_size = kit->gvn().transform(new SubXNode(payload_size, offset));
735   if (is_array) {
736     // Ensure the array payload size is rounded up to the next BytesPerLong
737     // multiple when converting to double-words. This is necessary because array
738     // size does not include object alignment padding, so it might not be a
739     // multiple of BytesPerLong for sub-long element types.
740     payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1)));
741   }
742   payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong)));
743   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false);
744   if (is_array) {
745     ac->set_clone_array();
746   } else {
747     ac->set_clone_inst();
748   }
749   Node* n = kit->gvn().transform(ac);
750   if (n == ac) {
751     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
752     ac->set_adr_type(TypeRawPtr::BOTTOM);
753     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
754   } else {
755     kit->set_all_memory(n);
756   }
757 }
758 
759 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes,
760                                  Node*& i_o, Node*& needgc_ctrl,
761                                  Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
762                                  intx prefetch_lines) const {
763   assert(UseTLAB, "Only for TLAB enabled allocations");
764 
765   Node* thread = macro->transform_later(new ThreadLocalNode());
766   Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset()));
767   Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset()));
768 
769   // Load TLAB end.
770   //
771   // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around
772   //       a bug where these values were being moved across
773   //       a safepoint.  These are not oops, so they cannot be include in the oop
774   //       map, but they can be changed by a GC.   The proper way to fix this would
775   //       be to set the raw memory state when generating a  SafepointNode.  However
776   //       this will require extensive changes to the loop optimization in order to
777   //       prevent a degradation of the optimization.
778   //       See comment in memnode.hpp, around line 227 in class LoadPNode.
779   Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
780 
781   // Load the TLAB top.
782   Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered);
783   macro->transform_later(old_tlab_top);
784 
785   // Add to heap top to get a new TLAB top
786   Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes);
787   macro->transform_later(new_tlab_top);
788 
789   // Check against TLAB end
790   Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end);
791   macro->transform_later(tlab_full);
792 
793   Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge);
794   macro->transform_later(needgc_bol);
795   IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
796   macro->transform_later(needgc_iff);
797 
798   // Plug the failing-heap-space-need-gc test into the slow-path region
799   Node* needgc_true = new IfTrueNode(needgc_iff);
800   macro->transform_later(needgc_true);
801   needgc_ctrl = needgc_true;
802 
803   // No need for a GC.
804   Node* needgc_false = new IfFalseNode(needgc_iff);
805   macro->transform_later(needgc_false);
806 
807   // Fast path:
808   i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
809                                    old_tlab_top, new_tlab_top, prefetch_lines);
810 
811   // Store the modified TLAB top back down.
812   Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
813                    TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
814   macro->transform_later(store_tlab_top);
815 
816   fast_oop_ctrl = needgc_false;
817   fast_oop_rawmem = store_tlab_top;
818   return old_tlab_top;
819 }
820 
821 static const TypeFunc* clone_type() {
822   // Create input type (domain)
823   int argcnt = NOT_LP64(3) LP64_ONLY(4);
824   const Type** const domain_fields = TypeTuple::fields(argcnt);
825   int argp = TypeFunc::Parms;
826   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // src
827   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // dst
828   domain_fields[argp++] = TypeX_X;               // size lower
829   LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper
830   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
831   const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields);
832 
833   // Create result type (range)
834   const Type** const range_fields = TypeTuple::fields(0);
835   const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
836 
837   return TypeFunc::make(domain, range);
838 }
839 
840 #define XTOP LP64_ONLY(COMMA phase->top())
841 
842 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac,
843                                     address clone_addr, const char* clone_name) const {
844   Node* const ctrl = ac->in(TypeFunc::Control);
845   Node* const mem  = ac->in(TypeFunc::Memory);
846   Node* const src  = ac->in(ArrayCopyNode::Src);
847   Node* const dst  = ac->in(ArrayCopyNode::Dest);
848   Node* const size = ac->in(ArrayCopyNode::Length);
849 
850   assert(size->bottom_type()->base() == Type_X,
851          "Should be of object size type (int for 32 bits, long for 64 bits)");
852 
853   // The native clone we are calling here expects the object size in words.
854   // Add header/offset size to payload size to get object size.
855   Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
856   Node* const full_size = phase->transform_later(new AddXNode(size, base_offset));
857   // HeapAccess<>::clone expects size in heap words.
858   // For 64-bits platforms, this is a no-operation.
859   // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2).
860   Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong)));
861 
862   Node* const call = phase->make_leaf_call(ctrl,
863                                            mem,
864                                            clone_type(),
865                                            clone_addr,
866                                            clone_name,
867                                            TypeRawPtr::BOTTOM,
868                                            src, dst, full_size_in_heap_words XTOP);
869   phase->transform_later(call);
870   phase->igvn().replace_node(ac, call);
871 }
872 
873 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
874   Node* ctrl = ac->in(TypeFunc::Control);
875   Node* mem = ac->in(TypeFunc::Memory);
876   Node* src = ac->in(ArrayCopyNode::Src);
877   Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
878   Node* dest = ac->in(ArrayCopyNode::Dest);
879   Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
880   Node* length = ac->in(ArrayCopyNode::Length);
881 
882   Node* payload_src = phase->basic_plus_adr(src, src_offset);
883   Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
884 
885   const char* copyfunc_name = "arraycopy";
886   address     copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true);
887 
888   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
889   const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
890 
891   Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
892   phase->transform_later(call);
893 
894   phase->igvn().replace_node(ac, call);
895 }
896 
897 #undef XTOP
898 
899 void BarrierSetC2::compute_liveness_at_stubs() const {
900   ResourceMark rm;
901   Compile* const C = Compile::current();
902   Arena* const A = Thread::current()->resource_area();
903   PhaseCFG* const cfg = C->cfg();
904   PhaseRegAlloc* const regalloc = C->regalloc();
905   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
906   BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler();
907   BarrierSetC2State* bs_state = barrier_set_state();
908   Block_List worklist;
909 
910   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
911     new ((void*)(live + i)) RegMask();
912     worklist.push(cfg->get_block(i));
913   }
914 
915   while (worklist.size() > 0) {
916     const Block* const block = worklist.pop();
917     RegMask& old_live = live[block->_pre_order];
918     RegMask new_live;
919 
920     // Initialize to union of successors
921     for (uint i = 0; i < block->_num_succs; i++) {
922       const uint succ_id = block->_succs[i]->_pre_order;
923       new_live.OR(live[succ_id]);
924     }
925 
926     // Walk block backwards, computing liveness
927     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
928       const Node* const node = block->get_node(i);
929 
930       // If this node tracks out-liveness, update it
931       if (!bs_state->needs_livein_data()) {
932         RegMask* const regs = bs_state->live(node);
933         if (regs != nullptr) {
934           regs->OR(new_live);
935         }
936       }
937 
938       // Remove def bits
939       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
940       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
941       if (first != OptoReg::Bad) {
942         new_live.Remove(first);
943       }
944       if (second != OptoReg::Bad) {
945         new_live.Remove(second);
946       }
947 
948       // Add use bits
949       for (uint j = 1; j < node->req(); ++j) {
950         const Node* const use = node->in(j);
951         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
952         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
953         if (first != OptoReg::Bad) {
954           new_live.Insert(first);
955         }
956         if (second != OptoReg::Bad) {
957           new_live.Insert(second);
958         }
959       }
960 
961       // If this node tracks in-liveness, update it
962       if (bs_state->needs_livein_data()) {
963         RegMask* const regs = bs_state->live(node);
964         if (regs != nullptr) {
965           regs->OR(new_live);
966         }
967       }
968     }
969 
970     // Now at block top, see if we have any changes
971     new_live.SUBTRACT(old_live);
972     if (new_live.is_NotEmpty()) {
973       // Liveness has refined, update and propagate to prior blocks
974       old_live.OR(new_live);
975       for (uint i = 1; i < block->num_preds(); ++i) {
976         Block* const pred = cfg->get_block_for_node(block->pred(i));
977         worklist.push(pred);
978       }
979     }
980   }
981 }