1 /*
  2  * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "code/vmreg.inline.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/tlab_globals.hpp"
 29 #include "gc/shared/c2/barrierSetC2.hpp"
 30 #include "opto/arraycopynode.hpp"
 31 #include "opto/block.hpp"
 32 #include "opto/convertnode.hpp"
 33 #include "opto/graphKit.hpp"
 34 #include "opto/idealKit.hpp"
 35 #include "opto/macro.hpp"
 36 #include "opto/narrowptrnode.hpp"
 37 #include "opto/output.hpp"
 38 #include "opto/regalloc.hpp"
 39 #include "opto/runtime.hpp"
 40 #include "utilities/macros.hpp"
 41 #include CPU_HEADER(gc/shared/barrierSetAssembler)
 42 
 43 // By default this is a no-op.
 44 void BarrierSetC2::resolve_address(C2Access& access) const { }
 45 
 46 void* C2ParseAccess::barrier_set_state() const {
 47   return _kit->barrier_set_state();
 48 }
 49 
 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); }
 51 
 52 Node* C2ParseAccess::control() const {
 53   return _ctl == nullptr ? _kit->control() : _ctl;
 54 }
 55 
 56 bool C2Access::needs_cpu_membar() const {
 57   bool mismatched   = (_decorators & C2_MISMATCHED) != 0;
 58   bool is_unordered = (_decorators & MO_UNORDERED) != 0;
 59 
 60   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
 61   bool in_heap   = (_decorators & IN_HEAP) != 0;
 62   bool in_native = (_decorators & IN_NATIVE) != 0;
 63   bool is_mixed  = !in_heap && !in_native;
 64 
 65   bool is_write  = (_decorators & C2_WRITE_ACCESS) != 0;
 66   bool is_read   = (_decorators & C2_READ_ACCESS) != 0;
 67   bool is_atomic = is_read && is_write;
 68 
 69   if (is_atomic) {
 70     // Atomics always need to be wrapped in CPU membars
 71     return true;
 72   }
 73 
 74   if (anonymous) {
 75     // We will need memory barriers unless we can determine a unique
 76     // alias category for this reference.  (Note:  If for some reason
 77     // the barriers get omitted and the unsafe reference begins to "pollute"
 78     // the alias analysis of the rest of the graph, either Compile::can_alias
 79     // or Compile::must_alias will throw a diagnostic assert.)
 80     if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
 81       return true;
 82     }
 83   } else {
 84     assert(!is_mixed, "not unsafe");
 85   }
 86 
 87   return false;
 88 }
 89 
 90 static BarrierSetC2State* barrier_set_state() {
 91   return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state());
 92 }
 93 
 94 RegMask& BarrierStubC2::live() const {
 95   return *barrier_set_state()->live(_node);
 96 }
 97 
 98 BarrierStubC2::BarrierStubC2(const MachNode* node)
 99   : _node(node),
100     _entry(),
101     _continuation(),
102     _preserve(live()) {}
103 
104 Label* BarrierStubC2::entry() {
105   // The _entry will never be bound when in_scratch_emit_size() is true.
106   // However, we still need to return a label that is not bound now, but
107   // will eventually be bound. Any eventually bound label will do, as it
108   // will only act as a placeholder, so we return the _continuation label.
109   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
110 }
111 
112 Label* BarrierStubC2::continuation() {
113   return &_continuation;
114 }
115 
116 uint8_t BarrierStubC2::barrier_data() const {
117   return _node->barrier_data();
118 }
119 
120 void BarrierStubC2::preserve(Register r) {
121   const VMReg vm_reg = r->as_VMReg();
122   assert(vm_reg->is_Register(), "r must be a general-purpose register");
123   _preserve.Insert(OptoReg::as_OptoReg(vm_reg));
124 }
125 
126 void BarrierStubC2::dont_preserve(Register r) {
127   VMReg vm_reg = r->as_VMReg();
128   assert(vm_reg->is_Register(), "r must be a general-purpose register");
129   // Subtract the given register and all its sub-registers (e.g. {R11, R11_H}
130   // for r11 in aarch64).
131   do {
132     _preserve.Remove(OptoReg::as_OptoReg(vm_reg));
133     vm_reg = vm_reg->next();
134   } while (vm_reg->is_Register() && !vm_reg->is_concrete());
135 }
136 
137 const RegMask& BarrierStubC2::preserve_set() const {
138   return _preserve;
139 }
140 
141 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
142   DecoratorSet decorators = access.decorators();
143 
144   bool mismatched = (decorators & C2_MISMATCHED) != 0;
145   bool unaligned = (decorators & C2_UNALIGNED) != 0;
146   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
147   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
148 
149   MemNode::MemOrd mo = access.mem_node_mo();
150 
151   Node* store;
152   BasicType bt = access.type();
153   if (access.is_parse_access()) {
154     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
155 
156     GraphKit* kit = parse_access.kit();
157     if (bt == T_DOUBLE) {
158       Node* new_val = kit->dprecision_rounding(val.node());
159       val.set_node(new_val);
160     }
161 
162     store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt,
163                                  access.addr().type(), mo, requires_atomic_access, unaligned,
164                                  mismatched, unsafe, access.barrier_data());
165   } else {
166     assert(access.is_opt_access(), "either parse or opt access");
167     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
168     Node* ctl = opt_access.ctl();
169     MergeMemNode* mm = opt_access.mem();
170     PhaseGVN& gvn = opt_access.gvn();
171     const TypePtr* adr_type = access.addr().type();
172     int alias = gvn.C->get_alias_index(adr_type);
173     Node* mem = mm->memory_at(alias);
174 
175     StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access);
176     if (unaligned) {
177       st->set_unaligned_access();
178     }
179     if (mismatched) {
180       st->set_mismatched_access();
181     }
182     st->set_barrier_data(access.barrier_data());
183     store = gvn.transform(st);
184     if (store == st) {
185       mm->set_memory_at(alias, st);
186     }
187   }
188   access.set_raw_access(store);
189 
190   return store;
191 }
192 
193 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
194   DecoratorSet decorators = access.decorators();
195 
196   Node* adr = access.addr().node();
197   const TypePtr* adr_type = access.addr().type();
198 
199   bool mismatched = (decorators & C2_MISMATCHED) != 0;
200   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
201   bool unaligned = (decorators & C2_UNALIGNED) != 0;
202   bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
203   bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0;
204   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
205   bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0;
206 
207   MemNode::MemOrd mo = access.mem_node_mo();
208   LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest;
209 
210   Node* load;
211   if (access.is_parse_access()) {
212     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
213     GraphKit* kit = parse_access.kit();
214     Node* control = control_dependent ? parse_access.control() : nullptr;
215 
216     if (immutable) {
217       Compile* C = Compile::current();
218       Node* mem = kit->immutable_memory();
219       load = LoadNode::make(kit->gvn(), control, mem, adr,
220                             adr_type, val_type, access.type(), mo, dep, requires_atomic_access,
221                             unaligned, mismatched, unsafe, access.barrier_data());
222       load = kit->gvn().transform(load);
223     } else {
224       load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
225                             dep, requires_atomic_access, unaligned, mismatched, unsafe,
226                             access.barrier_data());
227     }
228   } else {
229     assert(access.is_opt_access(), "either parse or opt access");
230     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
231     Node* control = control_dependent ? opt_access.ctl() : nullptr;
232     MergeMemNode* mm = opt_access.mem();
233     PhaseGVN& gvn = opt_access.gvn();
234     Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type));
235     load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep,
236                           requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data());
237     load = gvn.transform(load);
238   }
239   access.set_raw_access(load);
240 
241   return load;
242 }
243 
244 class C2AccessFence: public StackObj {
245   C2Access& _access;
246   Node* _leading_membar;
247 
248 public:
249   C2AccessFence(C2Access& access) :
250     _access(access), _leading_membar(nullptr) {
251     GraphKit* kit = nullptr;
252     if (access.is_parse_access()) {
253       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
254       kit = parse_access.kit();
255     }
256     DecoratorSet decorators = access.decorators();
257 
258     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
259     bool is_read = (decorators & C2_READ_ACCESS) != 0;
260     bool is_atomic = is_read && is_write;
261 
262     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
263     bool is_release = (decorators & MO_RELEASE) != 0;
264 
265     if (is_atomic) {
266       assert(kit != nullptr, "unsupported at optimization time");
267       // Memory-model-wise, a LoadStore acts like a little synchronized
268       // block, so needs barriers on each side.  These don't translate
269       // into actual barriers on most machines, but we still need rest of
270       // compiler to respect ordering.
271       if (is_release) {
272         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
273       } else if (is_volatile) {
274         if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
275           _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
276         } else {
277           _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
278         }
279       }
280     } else if (is_write) {
281       // If reference is volatile, prevent following memory ops from
282       // floating down past the volatile write.  Also prevents commoning
283       // another volatile read.
284       if (is_volatile || is_release) {
285         assert(kit != nullptr, "unsupported at optimization time");
286         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
287       }
288     } else {
289       // Memory barrier to prevent normal and 'unsafe' accesses from
290       // bypassing each other.  Happens after null checks, so the
291       // exception paths do not take memory state from the memory barrier,
292       // so there's no problems making a strong assert about mixing users
293       // of safe & unsafe memory.
294       if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
295         assert(kit != nullptr, "unsupported at optimization time");
296         _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
297       }
298     }
299 
300     if (access.needs_cpu_membar()) {
301       assert(kit != nullptr, "unsupported at optimization time");
302       kit->insert_mem_bar(Op_MemBarCPUOrder);
303     }
304 
305     if (is_atomic) {
306       // 4984716: MemBars must be inserted before this
307       //          memory node in order to avoid a false
308       //          dependency which will confuse the scheduler.
309       access.set_memory();
310     }
311   }
312 
313   ~C2AccessFence() {
314     GraphKit* kit = nullptr;
315     if (_access.is_parse_access()) {
316       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access);
317       kit = parse_access.kit();
318     }
319     DecoratorSet decorators = _access.decorators();
320 
321     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
322     bool is_read = (decorators & C2_READ_ACCESS) != 0;
323     bool is_atomic = is_read && is_write;
324 
325     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
326     bool is_acquire = (decorators & MO_ACQUIRE) != 0;
327 
328     // If reference is volatile, prevent following volatiles ops from
329     // floating up before the volatile access.
330     if (_access.needs_cpu_membar()) {
331       kit->insert_mem_bar(Op_MemBarCPUOrder);
332     }
333 
334     if (is_atomic) {
335       assert(kit != nullptr, "unsupported at optimization time");
336       if (is_acquire || is_volatile) {
337         Node* n = _access.raw_access();
338         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
339         if (_leading_membar != nullptr) {
340           MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
341         }
342       }
343     } else if (is_write) {
344       // If not multiple copy atomic, we do the MemBarVolatile before the load.
345       if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
346         assert(kit != nullptr, "unsupported at optimization time");
347         Node* n = _access.raw_access();
348         Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
349         if (_leading_membar != nullptr) {
350           MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
351         }
352       }
353     } else {
354       if (is_volatile || is_acquire) {
355         assert(kit != nullptr, "unsupported at optimization time");
356         Node* n = _access.raw_access();
357         assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
358         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
359         mb->as_MemBar()->set_trailing_load();
360       }
361     }
362   }
363 };
364 
365 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
366   C2AccessFence fence(access);
367   resolve_address(access);
368   return store_at_resolved(access, val);
369 }
370 
371 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
372   C2AccessFence fence(access);
373   resolve_address(access);
374   return load_at_resolved(access, val_type);
375 }
376 
377 MemNode::MemOrd C2Access::mem_node_mo() const {
378   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
379   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
380   if ((_decorators & MO_SEQ_CST) != 0) {
381     if (is_write && is_read) {
382       // For atomic operations
383       return MemNode::seqcst;
384     } else if (is_write) {
385       return MemNode::release;
386     } else {
387       assert(is_read, "what else?");
388       return MemNode::acquire;
389     }
390   } else if ((_decorators & MO_RELEASE) != 0) {
391     return MemNode::release;
392   } else if ((_decorators & MO_ACQUIRE) != 0) {
393     return MemNode::acquire;
394   } else if (is_write) {
395     // Volatile fields need releasing stores.
396     // Non-volatile fields also need releasing stores if they hold an
397     // object reference, because the object reference might point to
398     // a freshly created object.
399     // Conservatively release stores of object references.
400     return StoreNode::release_if_reference(_type);
401   } else {
402     return MemNode::unordered;
403   }
404 }
405 
406 void C2Access::fixup_decorators() {
407   bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
408   bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
409   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
410 
411   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
412   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
413 
414   if (AlwaysAtomicAccesses && is_unordered) {
415     _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
416     _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
417   }
418 
419   _decorators = AccessInternal::decorator_fixup(_decorators, _type);
420 
421   if (is_read && !is_write && anonymous) {
422     // To be valid, unsafe loads may depend on other conditions than
423     // the one that guards them: pin the Load node
424     _decorators |= C2_CONTROL_DEPENDENT_LOAD;
425     _decorators |= C2_UNKNOWN_CONTROL_LOAD;
426     const TypePtr* adr_type = _addr.type();
427     Node* adr = _addr.node();
428     if (!needs_cpu_membar() && adr_type->isa_instptr()) {
429       assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
430       intptr_t offset = Type::OffsetBot;
431       AddPNode::Ideal_base_and_offset(adr, &gvn(), offset);
432       if (offset >= 0) {
433         int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper());
434         if (offset < s) {
435           // Guaranteed to be a valid access, no need to pin it
436           _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
437           _decorators ^= C2_UNKNOWN_CONTROL_LOAD;
438         }
439       }
440     }
441   }
442 }
443 
444 //--------------------------- atomic operations---------------------------------
445 
446 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const {
447   // SCMemProjNodes represent the memory state of a LoadStore. Their
448   // main role is to prevent LoadStore nodes from being optimized away
449   // when their results aren't used.
450   assert(access.is_parse_access(), "entry not supported at optimization time");
451   C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
452   GraphKit* kit = parse_access.kit();
453   Node* load_store = access.raw_access();
454   assert(load_store != nullptr, "must pin atomic op");
455   Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
456   kit->set_memory(proj, access.alias_idx());
457 }
458 
459 void C2AtomicParseAccess::set_memory() {
460   Node *mem = _kit->memory(_alias_idx);
461   _memory = mem;
462 }
463 
464 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
465                                                    Node* new_val, const Type* value_type) const {
466   GraphKit* kit = access.kit();
467   MemNode::MemOrd mo = access.mem_node_mo();
468   Node* mem = access.memory();
469 
470   Node* adr = access.addr().node();
471   const TypePtr* adr_type = access.addr().type();
472 
473   Node* load_store = nullptr;
474 
475   if (access.is_oop()) {
476 #ifdef _LP64
477     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
478       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
479       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
480       load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
481     } else
482 #endif
483     {
484       load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
485     }
486   } else {
487     switch (access.type()) {
488       case T_BYTE: {
489         load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
490         break;
491       }
492       case T_SHORT: {
493         load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
494         break;
495       }
496       case T_INT: {
497         load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
498         break;
499       }
500       case T_LONG: {
501         load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
502         break;
503       }
504       default:
505         ShouldNotReachHere();
506     }
507   }
508 
509   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
510   load_store = kit->gvn().transform(load_store);
511 
512   access.set_raw_access(load_store);
513   pin_atomic_op(access);
514 
515 #ifdef _LP64
516   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
517     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
518   }
519 #endif
520 
521   return load_store;
522 }
523 
524 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
525                                                     Node* new_val, const Type* value_type) const {
526   GraphKit* kit = access.kit();
527   DecoratorSet decorators = access.decorators();
528   MemNode::MemOrd mo = access.mem_node_mo();
529   Node* mem = access.memory();
530   bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
531   Node* load_store = nullptr;
532   Node* adr = access.addr().node();
533 
534   if (access.is_oop()) {
535 #ifdef _LP64
536     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
537       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
538       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
539       if (is_weak_cas) {
540         load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
541       } else {
542         load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
543       }
544     } else
545 #endif
546     {
547       if (is_weak_cas) {
548         load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
549       } else {
550         load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
551       }
552     }
553   } else {
554     switch(access.type()) {
555       case T_BYTE: {
556         if (is_weak_cas) {
557           load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
558         } else {
559           load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
560         }
561         break;
562       }
563       case T_SHORT: {
564         if (is_weak_cas) {
565           load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
566         } else {
567           load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
568         }
569         break;
570       }
571       case T_INT: {
572         if (is_weak_cas) {
573           load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
574         } else {
575           load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
576         }
577         break;
578       }
579       case T_LONG: {
580         if (is_weak_cas) {
581           load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
582         } else {
583           load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
584         }
585         break;
586       }
587       default:
588         ShouldNotReachHere();
589     }
590   }
591 
592   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
593   load_store = kit->gvn().transform(load_store);
594 
595   access.set_raw_access(load_store);
596   pin_atomic_op(access);
597 
598   return load_store;
599 }
600 
601 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
602   GraphKit* kit = access.kit();
603   Node* mem = access.memory();
604   Node* adr = access.addr().node();
605   const TypePtr* adr_type = access.addr().type();
606   Node* load_store = nullptr;
607 
608   if (access.is_oop()) {
609 #ifdef _LP64
610     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
611       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
612       load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
613     } else
614 #endif
615     {
616       load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
617     }
618   } else  {
619     switch (access.type()) {
620       case T_BYTE:
621         load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
622         break;
623       case T_SHORT:
624         load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
625         break;
626       case T_INT:
627         load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
628         break;
629       case T_LONG:
630         load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
631         break;
632       default:
633         ShouldNotReachHere();
634     }
635   }
636 
637   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
638   load_store = kit->gvn().transform(load_store);
639 
640   access.set_raw_access(load_store);
641   pin_atomic_op(access);
642 
643 #ifdef _LP64
644   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
645     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
646   }
647 #endif
648 
649   return load_store;
650 }
651 
652 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
653   Node* load_store = nullptr;
654   GraphKit* kit = access.kit();
655   Node* adr = access.addr().node();
656   const TypePtr* adr_type = access.addr().type();
657   Node* mem = access.memory();
658 
659   switch(access.type()) {
660     case T_BYTE:
661       load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
662       break;
663     case T_SHORT:
664       load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
665       break;
666     case T_INT:
667       load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
668       break;
669     case T_LONG:
670       load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
671       break;
672     default:
673       ShouldNotReachHere();
674   }
675 
676   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
677   load_store = kit->gvn().transform(load_store);
678 
679   access.set_raw_access(load_store);
680   pin_atomic_op(access);
681 
682   return load_store;
683 }
684 
685 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val,
686                                           Node* new_val, const Type* value_type) const {
687   C2AccessFence fence(access);
688   resolve_address(access);
689   return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
690 }
691 
692 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val,
693                                            Node* new_val, const Type* value_type) const {
694   C2AccessFence fence(access);
695   resolve_address(access);
696   return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
697 }
698 
699 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
700   C2AccessFence fence(access);
701   resolve_address(access);
702   return atomic_xchg_at_resolved(access, new_val, value_type);
703 }
704 
705 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
706   C2AccessFence fence(access);
707   resolve_address(access);
708   return atomic_add_at_resolved(access, new_val, value_type);
709 }
710 
711 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) {
712   // Exclude the header but include array length to copy by 8 bytes words.
713   // Can't use base_offset_in_bytes(bt) since basic type is unknown.
714   int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
715                             instanceOopDesc::base_offset_in_bytes();
716   // base_off:
717   // 8  - 32-bit VM
718   // 12 - 64-bit VM, compressed klass
719   // 16 - 64-bit VM, normal klass
720   if (base_off % BytesPerLong != 0) {
721     assert(UseCompressedClassPointers, "");
722     if (is_array) {
723       // Exclude length to copy by 8 bytes words.
724       base_off += sizeof(int);
725     } else {
726       // Include klass to copy by 8 bytes words.
727       base_off = instanceOopDesc::klass_offset_in_bytes();
728     }
729     assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
730   }
731   return base_off;
732 }
733 
734 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
735   int base_off = arraycopy_payload_base_offset(is_array);
736   Node* payload_size = size;
737   Node* offset = kit->MakeConX(base_off);
738   payload_size = kit->gvn().transform(new SubXNode(payload_size, offset));
739   if (is_array) {
740     // Ensure the array payload size is rounded up to the next BytesPerLong
741     // multiple when converting to double-words. This is necessary because array
742     // size does not include object alignment padding, so it might not be a
743     // multiple of BytesPerLong for sub-long element types.
744     payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1)));
745   }
746   payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong)));
747   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false);
748   if (is_array) {
749     ac->set_clone_array();
750   } else {
751     ac->set_clone_inst();
752   }
753   Node* n = kit->gvn().transform(ac);
754   if (n == ac) {
755     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
756     ac->set_adr_type(TypeRawPtr::BOTTOM);
757     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
758   } else {
759     kit->set_all_memory(n);
760   }
761 }
762 
763 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes,
764                                  Node*& i_o, Node*& needgc_ctrl,
765                                  Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
766                                  intx prefetch_lines) const {
767   assert(UseTLAB, "Only for TLAB enabled allocations");
768 
769   Node* thread = macro->transform_later(new ThreadLocalNode());
770   Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset()));
771   Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset()));
772 
773   // Load TLAB end.
774   //
775   // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around
776   //       a bug where these values were being moved across
777   //       a safepoint.  These are not oops, so they cannot be include in the oop
778   //       map, but they can be changed by a GC.   The proper way to fix this would
779   //       be to set the raw memory state when generating a  SafepointNode.  However
780   //       this will require extensive changes to the loop optimization in order to
781   //       prevent a degradation of the optimization.
782   //       See comment in memnode.hpp, around line 227 in class LoadPNode.
783   Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
784 
785   // Load the TLAB top.
786   Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered);
787   macro->transform_later(old_tlab_top);
788 
789   // Add to heap top to get a new TLAB top
790   Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes);
791   macro->transform_later(new_tlab_top);
792 
793   // Check against TLAB end
794   Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end);
795   macro->transform_later(tlab_full);
796 
797   Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge);
798   macro->transform_later(needgc_bol);
799   IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
800   macro->transform_later(needgc_iff);
801 
802   // Plug the failing-heap-space-need-gc test into the slow-path region
803   Node* needgc_true = new IfTrueNode(needgc_iff);
804   macro->transform_later(needgc_true);
805   needgc_ctrl = needgc_true;
806 
807   // No need for a GC.
808   Node* needgc_false = new IfFalseNode(needgc_iff);
809   macro->transform_later(needgc_false);
810 
811   // Fast path:
812   i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
813                                    old_tlab_top, new_tlab_top, prefetch_lines);
814 
815   // Store the modified TLAB top back down.
816   Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
817                    TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
818   macro->transform_later(store_tlab_top);
819 
820   fast_oop_ctrl = needgc_false;
821   fast_oop_rawmem = store_tlab_top;
822   return old_tlab_top;
823 }
824 
825 static const TypeFunc* clone_type() {
826   // Create input type (domain)
827   int argcnt = NOT_LP64(3) LP64_ONLY(4);
828   const Type** const domain_fields = TypeTuple::fields(argcnt);
829   int argp = TypeFunc::Parms;
830   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // src
831   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // dst
832   domain_fields[argp++] = TypeX_X;               // size lower
833   LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper
834   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
835   const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields);
836 
837   // Create result type (range)
838   const Type** const range_fields = TypeTuple::fields(0);
839   const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
840 
841   return TypeFunc::make(domain, range);
842 }
843 
844 #define XTOP LP64_ONLY(COMMA phase->top())
845 
846 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac,
847                                     address clone_addr, const char* clone_name) const {
848   Node* const ctrl = ac->in(TypeFunc::Control);
849   Node* const mem  = ac->in(TypeFunc::Memory);
850   Node* const src  = ac->in(ArrayCopyNode::Src);
851   Node* const dst  = ac->in(ArrayCopyNode::Dest);
852   Node* const size = ac->in(ArrayCopyNode::Length);
853 
854   assert(size->bottom_type()->base() == Type_X,
855          "Should be of object size type (int for 32 bits, long for 64 bits)");
856 
857   // The native clone we are calling here expects the object size in words.
858   // Add header/offset size to payload size to get object size.
859   Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
860   Node* const full_size = phase->transform_later(new AddXNode(size, base_offset));
861   // HeapAccess<>::clone expects size in heap words.
862   // For 64-bits platforms, this is a no-operation.
863   // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2).
864   Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong)));
865 
866   Node* const call = phase->make_leaf_call(ctrl,
867                                            mem,
868                                            clone_type(),
869                                            clone_addr,
870                                            clone_name,
871                                            TypeRawPtr::BOTTOM,
872                                            src, dst, full_size_in_heap_words XTOP);
873   phase->transform_later(call);
874   phase->replace_node(ac, call);
875 }
876 
877 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
878   Node* ctrl = ac->in(TypeFunc::Control);
879   Node* mem = ac->in(TypeFunc::Memory);
880   Node* src = ac->in(ArrayCopyNode::Src);
881   Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
882   Node* dest = ac->in(ArrayCopyNode::Dest);
883   Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
884   Node* length = ac->in(ArrayCopyNode::Length);
885 
886   Node* payload_src = phase->basic_plus_adr(src, src_offset);
887   Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
888 
889   const char* copyfunc_name = "arraycopy";
890   address     copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true);
891 
892   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
893   const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
894 
895   Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
896   phase->transform_later(call);
897 
898   phase->replace_node(ac, call);
899 }
900 
901 #undef XTOP
902 
903 void BarrierSetC2::compute_liveness_at_stubs() const {
904   ResourceMark rm;
905   Compile* const C = Compile::current();
906   Arena* const A = Thread::current()->resource_area();
907   PhaseCFG* const cfg = C->cfg();
908   PhaseRegAlloc* const regalloc = C->regalloc();
909   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
910   BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler();
911   BarrierSetC2State* bs_state = barrier_set_state();
912   Block_List worklist;
913 
914   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
915     new ((void*)(live + i)) RegMask();
916     worklist.push(cfg->get_block(i));
917   }
918 
919   while (worklist.size() > 0) {
920     const Block* const block = worklist.pop();
921     RegMask& old_live = live[block->_pre_order];
922     RegMask new_live;
923 
924     // Initialize to union of successors
925     for (uint i = 0; i < block->_num_succs; i++) {
926       const uint succ_id = block->_succs[i]->_pre_order;
927       new_live.OR(live[succ_id]);
928     }
929 
930     // Walk block backwards, computing liveness
931     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
932       const Node* const node = block->get_node(i);
933 
934       // If this node tracks out-liveness, update it
935       if (!bs_state->needs_livein_data()) {
936         RegMask* const regs = bs_state->live(node);
937         if (regs != nullptr) {
938           regs->OR(new_live);
939         }
940       }
941 
942       // Remove def bits
943       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
944       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
945       if (first != OptoReg::Bad) {
946         new_live.Remove(first);
947       }
948       if (second != OptoReg::Bad) {
949         new_live.Remove(second);
950       }
951 
952       // Add use bits
953       for (uint j = 1; j < node->req(); ++j) {
954         const Node* const use = node->in(j);
955         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
956         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
957         if (first != OptoReg::Bad) {
958           new_live.Insert(first);
959         }
960         if (second != OptoReg::Bad) {
961           new_live.Insert(second);
962         }
963       }
964 
965       // If this node tracks in-liveness, update it
966       if (bs_state->needs_livein_data()) {
967         RegMask* const regs = bs_state->live(node);
968         if (regs != nullptr) {
969           regs->OR(new_live);
970         }
971       }
972     }
973 
974     // Now at block top, see if we have any changes
975     new_live.SUBTRACT(old_live);
976     if (new_live.is_NotEmpty()) {
977       // Liveness has refined, update and propagate to prior blocks
978       old_live.OR(new_live);
979       for (uint i = 1; i < block->num_preds(); ++i) {
980         Block* const pred = cfg->get_block_for_node(block->pred(i));
981         worklist.push(pred);
982       }
983     }
984   }
985 }