1 /*
  2  * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "code/vmreg.inline.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/tlab_globals.hpp"
 29 #include "gc/shared/c2/barrierSetC2.hpp"
 30 #include "opto/arraycopynode.hpp"
 31 #include "opto/block.hpp"
 32 #include "opto/convertnode.hpp"
 33 #include "opto/graphKit.hpp"
 34 #include "opto/idealKit.hpp"
 35 #include "opto/macro.hpp"
 36 #include "opto/narrowptrnode.hpp"
 37 #include "opto/output.hpp"
 38 #include "opto/regalloc.hpp"
 39 #include "opto/runtime.hpp"
 40 #include "utilities/macros.hpp"
 41 #include CPU_HEADER(gc/shared/barrierSetAssembler)
 42 
 43 // By default this is a no-op.
 44 void BarrierSetC2::resolve_address(C2Access& access) const { }
 45 
 46 void* C2ParseAccess::barrier_set_state() const {
 47   return _kit->barrier_set_state();
 48 }
 49 
 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); }
 51 
 52 bool C2Access::needs_cpu_membar() const {
 53   bool mismatched   = (_decorators & C2_MISMATCHED) != 0;
 54   bool is_unordered = (_decorators & MO_UNORDERED) != 0;
 55 
 56   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
 57   bool in_heap   = (_decorators & IN_HEAP) != 0;
 58   bool in_native = (_decorators & IN_NATIVE) != 0;
 59   bool is_mixed  = !in_heap && !in_native;
 60 
 61   bool is_write  = (_decorators & C2_WRITE_ACCESS) != 0;
 62   bool is_read   = (_decorators & C2_READ_ACCESS) != 0;
 63   bool is_atomic = is_read && is_write;
 64 
 65   if (is_atomic) {
 66     // Atomics always need to be wrapped in CPU membars
 67     return true;
 68   }
 69 
 70   if (anonymous) {
 71     // We will need memory barriers unless we can determine a unique
 72     // alias category for this reference.  (Note:  If for some reason
 73     // the barriers get omitted and the unsafe reference begins to "pollute"
 74     // the alias analysis of the rest of the graph, either Compile::can_alias
 75     // or Compile::must_alias will throw a diagnostic assert.)
 76     if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
 77       return true;
 78     }
 79   } else {
 80     assert(!is_mixed, "not unsafe");
 81   }
 82 
 83   return false;
 84 }
 85 
 86 static BarrierSetC2State* barrier_set_state() {
 87   return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state());
 88 }
 89 
 90 RegMask& BarrierStubC2::live() const {
 91   return *barrier_set_state()->live(_node);
 92 }
 93 
 94 BarrierStubC2::BarrierStubC2(const MachNode* node)
 95   : _node(node),
 96     _entry(),
 97     _continuation(),
 98     _preserve(live()) {}
 99 
100 Label* BarrierStubC2::entry() {
101   // The _entry will never be bound when in_scratch_emit_size() is true.
102   // However, we still need to return a label that is not bound now, but
103   // will eventually be bound. Any eventually bound label will do, as it
104   // will only act as a placeholder, so we return the _continuation label.
105   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
106 }
107 
108 Label* BarrierStubC2::continuation() {
109   return &_continuation;
110 }
111 
112 void BarrierStubC2::preserve(Register r) {
113   const VMReg vm_reg = r->as_VMReg();
114   assert(vm_reg->is_Register(), "r must be a general-purpose register");
115   _preserve.Insert(OptoReg::as_OptoReg(vm_reg));
116 }
117 
118 void BarrierStubC2::dont_preserve(Register r) {
119   VMReg vm_reg = r->as_VMReg();
120   assert(vm_reg->is_Register(), "r must be a general-purpose register");
121   // Subtract the given register and all its sub-registers (e.g. {R11, R11_H}
122   // for r11 in aarch64).
123   do {
124     _preserve.Remove(OptoReg::as_OptoReg(vm_reg));
125     vm_reg = vm_reg->next();
126   } while (vm_reg->is_Register() && !vm_reg->is_concrete());
127 }
128 
129 const RegMask& BarrierStubC2::preserve_set() const {
130   return _preserve;
131 }
132 
133 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
134   DecoratorSet decorators = access.decorators();
135 
136   bool mismatched = (decorators & C2_MISMATCHED) != 0;
137   bool unaligned = (decorators & C2_UNALIGNED) != 0;
138   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
139   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
140 
141   MemNode::MemOrd mo = access.mem_node_mo();
142 
143   Node* store;
144   BasicType bt = access.type();
145   if (access.is_parse_access()) {
146     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
147 
148     GraphKit* kit = parse_access.kit();
149     if (bt == T_DOUBLE) {
150       Node* new_val = kit->dprecision_rounding(val.node());
151       val.set_node(new_val);
152     }
153 
154     store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt,
155                                  access.addr().type(), mo, requires_atomic_access, unaligned,
156                                  mismatched, unsafe, access.barrier_data());
157   } else {
158     assert(access.is_opt_access(), "either parse or opt access");
159     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
160     Node* ctl = opt_access.ctl();
161     MergeMemNode* mm = opt_access.mem();
162     PhaseGVN& gvn = opt_access.gvn();
163     const TypePtr* adr_type = access.addr().type();
164     int alias = gvn.C->get_alias_index(adr_type);
165     Node* mem = mm->memory_at(alias);
166 
167     StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access);
168     if (unaligned) {
169       st->set_unaligned_access();
170     }
171     if (mismatched) {
172       st->set_mismatched_access();
173     }
174     st->set_barrier_data(access.barrier_data());
175     store = gvn.transform(st);
176     if (store == st) {
177       mm->set_memory_at(alias, st);
178     }
179   }
180   access.set_raw_access(store);
181 
182   return store;
183 }
184 
185 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
186   DecoratorSet decorators = access.decorators();
187 
188   Node* adr = access.addr().node();
189   const TypePtr* adr_type = access.addr().type();
190 
191   bool mismatched = (decorators & C2_MISMATCHED) != 0;
192   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
193   bool unaligned = (decorators & C2_UNALIGNED) != 0;
194   bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
195   bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0;
196   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
197   bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0;
198 
199   MemNode::MemOrd mo = access.mem_node_mo();
200   LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest;
201 
202   Node* load;
203   if (access.is_parse_access()) {
204     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
205     GraphKit* kit = parse_access.kit();
206     Node* control = control_dependent ? kit->control() : nullptr;
207 
208     if (immutable) {
209       Compile* C = Compile::current();
210       Node* mem = kit->immutable_memory();
211       load = LoadNode::make(kit->gvn(), control, mem, adr,
212                             adr_type, val_type, access.type(), mo, dep, requires_atomic_access,
213                             unaligned, mismatched, unsafe, access.barrier_data());
214       load = kit->gvn().transform(load);
215     } else {
216       load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
217                             dep, requires_atomic_access, unaligned, mismatched, unsafe,
218                             access.barrier_data());
219     }
220   } else {
221     assert(access.is_opt_access(), "either parse or opt access");
222     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
223     Node* control = control_dependent ? opt_access.ctl() : nullptr;
224     MergeMemNode* mm = opt_access.mem();
225     PhaseGVN& gvn = opt_access.gvn();
226     Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type));
227     load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep,
228                           requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data());
229     load = gvn.transform(load);
230   }
231   access.set_raw_access(load);
232 
233   return load;
234 }
235 
236 class C2AccessFence: public StackObj {
237   C2Access& _access;
238   Node* _leading_membar;
239 
240 public:
241   C2AccessFence(C2Access& access) :
242     _access(access), _leading_membar(nullptr) {
243     GraphKit* kit = nullptr;
244     if (access.is_parse_access()) {
245       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
246       kit = parse_access.kit();
247     }
248     DecoratorSet decorators = access.decorators();
249 
250     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
251     bool is_read = (decorators & C2_READ_ACCESS) != 0;
252     bool is_atomic = is_read && is_write;
253 
254     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
255     bool is_release = (decorators & MO_RELEASE) != 0;
256 
257     if (is_atomic) {
258       assert(kit != nullptr, "unsupported at optimization time");
259       // Memory-model-wise, a LoadStore acts like a little synchronized
260       // block, so needs barriers on each side.  These don't translate
261       // into actual barriers on most machines, but we still need rest of
262       // compiler to respect ordering.
263       if (is_release) {
264         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
265       } else if (is_volatile) {
266         if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
267           _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
268         } else {
269           _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
270         }
271       }
272     } else if (is_write) {
273       // If reference is volatile, prevent following memory ops from
274       // floating down past the volatile write.  Also prevents commoning
275       // another volatile read.
276       if (is_volatile || is_release) {
277         assert(kit != nullptr, "unsupported at optimization time");
278         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
279       }
280     } else {
281       // Memory barrier to prevent normal and 'unsafe' accesses from
282       // bypassing each other.  Happens after null checks, so the
283       // exception paths do not take memory state from the memory barrier,
284       // so there's no problems making a strong assert about mixing users
285       // of safe & unsafe memory.
286       if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
287         assert(kit != nullptr, "unsupported at optimization time");
288         _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
289       }
290     }
291 
292     if (access.needs_cpu_membar()) {
293       assert(kit != nullptr, "unsupported at optimization time");
294       kit->insert_mem_bar(Op_MemBarCPUOrder);
295     }
296 
297     if (is_atomic) {
298       // 4984716: MemBars must be inserted before this
299       //          memory node in order to avoid a false
300       //          dependency which will confuse the scheduler.
301       access.set_memory();
302     }
303   }
304 
305   ~C2AccessFence() {
306     GraphKit* kit = nullptr;
307     if (_access.is_parse_access()) {
308       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access);
309       kit = parse_access.kit();
310     }
311     DecoratorSet decorators = _access.decorators();
312 
313     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
314     bool is_read = (decorators & C2_READ_ACCESS) != 0;
315     bool is_atomic = is_read && is_write;
316 
317     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
318     bool is_acquire = (decorators & MO_ACQUIRE) != 0;
319 
320     // If reference is volatile, prevent following volatiles ops from
321     // floating up before the volatile access.
322     if (_access.needs_cpu_membar()) {
323       kit->insert_mem_bar(Op_MemBarCPUOrder);
324     }
325 
326     if (is_atomic) {
327       assert(kit != nullptr, "unsupported at optimization time");
328       if (is_acquire || is_volatile) {
329         Node* n = _access.raw_access();
330         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
331         if (_leading_membar != nullptr) {
332           MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
333         }
334       }
335     } else if (is_write) {
336       // If not multiple copy atomic, we do the MemBarVolatile before the load.
337       if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
338         assert(kit != nullptr, "unsupported at optimization time");
339         Node* n = _access.raw_access();
340         Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
341         if (_leading_membar != nullptr) {
342           MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
343         }
344       }
345     } else {
346       if (is_volatile || is_acquire) {
347         assert(kit != nullptr, "unsupported at optimization time");
348         Node* n = _access.raw_access();
349         assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
350         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
351         mb->as_MemBar()->set_trailing_load();
352       }
353     }
354   }
355 };
356 
357 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
358   C2AccessFence fence(access);
359   resolve_address(access);
360   return store_at_resolved(access, val);
361 }
362 
363 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
364   C2AccessFence fence(access);
365   resolve_address(access);
366   return load_at_resolved(access, val_type);
367 }
368 
369 MemNode::MemOrd C2Access::mem_node_mo() const {
370   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
371   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
372   if ((_decorators & MO_SEQ_CST) != 0) {
373     if (is_write && is_read) {
374       // For atomic operations
375       return MemNode::seqcst;
376     } else if (is_write) {
377       return MemNode::release;
378     } else {
379       assert(is_read, "what else?");
380       return MemNode::acquire;
381     }
382   } else if ((_decorators & MO_RELEASE) != 0) {
383     return MemNode::release;
384   } else if ((_decorators & MO_ACQUIRE) != 0) {
385     return MemNode::acquire;
386   } else if (is_write) {
387     // Volatile fields need releasing stores.
388     // Non-volatile fields also need releasing stores if they hold an
389     // object reference, because the object reference might point to
390     // a freshly created object.
391     // Conservatively release stores of object references.
392     return StoreNode::release_if_reference(_type);
393   } else {
394     return MemNode::unordered;
395   }
396 }
397 
398 void C2Access::fixup_decorators() {
399   bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
400   bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
401   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
402 
403   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
404   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
405 
406   if (AlwaysAtomicAccesses && is_unordered) {
407     _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
408     _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
409   }
410 
411   _decorators = AccessInternal::decorator_fixup(_decorators, _type);
412 
413   if (is_read && !is_write && anonymous) {
414     // To be valid, unsafe loads may depend on other conditions than
415     // the one that guards them: pin the Load node
416     _decorators |= C2_CONTROL_DEPENDENT_LOAD;
417     _decorators |= C2_UNKNOWN_CONTROL_LOAD;
418     const TypePtr* adr_type = _addr.type();
419     Node* adr = _addr.node();
420     if (!needs_cpu_membar() && adr_type->isa_instptr()) {
421       assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
422       intptr_t offset = Type::OffsetBot;
423       AddPNode::Ideal_base_and_offset(adr, &gvn(), offset);
424       if (offset >= 0) {
425         int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper());
426         if (offset < s) {
427           // Guaranteed to be a valid access, no need to pin it
428           _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
429           _decorators ^= C2_UNKNOWN_CONTROL_LOAD;
430         }
431       }
432     }
433   }
434 }
435 
436 //--------------------------- atomic operations---------------------------------
437 
438 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const {
439   // SCMemProjNodes represent the memory state of a LoadStore. Their
440   // main role is to prevent LoadStore nodes from being optimized away
441   // when their results aren't used.
442   assert(access.is_parse_access(), "entry not supported at optimization time");
443   C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
444   GraphKit* kit = parse_access.kit();
445   Node* load_store = access.raw_access();
446   assert(load_store != nullptr, "must pin atomic op");
447   Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
448   kit->set_memory(proj, access.alias_idx());
449 }
450 
451 void C2AtomicParseAccess::set_memory() {
452   Node *mem = _kit->memory(_alias_idx);
453   _memory = mem;
454 }
455 
456 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
457                                                    Node* new_val, const Type* value_type) const {
458   GraphKit* kit = access.kit();
459   MemNode::MemOrd mo = access.mem_node_mo();
460   Node* mem = access.memory();
461 
462   Node* adr = access.addr().node();
463   const TypePtr* adr_type = access.addr().type();
464 
465   Node* load_store = nullptr;
466 
467   if (access.is_oop()) {
468 #ifdef _LP64
469     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
470       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
471       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
472       load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
473     } else
474 #endif
475     {
476       load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
477     }
478   } else {
479     switch (access.type()) {
480       case T_BYTE: {
481         load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
482         break;
483       }
484       case T_SHORT: {
485         load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
486         break;
487       }
488       case T_INT: {
489         load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
490         break;
491       }
492       case T_LONG: {
493         load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
494         break;
495       }
496       default:
497         ShouldNotReachHere();
498     }
499   }
500 
501   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
502   load_store = kit->gvn().transform(load_store);
503 
504   access.set_raw_access(load_store);
505   pin_atomic_op(access);
506 
507 #ifdef _LP64
508   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
509     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
510   }
511 #endif
512 
513   return load_store;
514 }
515 
516 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
517                                                     Node* new_val, const Type* value_type) const {
518   GraphKit* kit = access.kit();
519   DecoratorSet decorators = access.decorators();
520   MemNode::MemOrd mo = access.mem_node_mo();
521   Node* mem = access.memory();
522   bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
523   Node* load_store = nullptr;
524   Node* adr = access.addr().node();
525 
526   if (access.is_oop()) {
527 #ifdef _LP64
528     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
529       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
530       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
531       if (is_weak_cas) {
532         load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
533       } else {
534         load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
535       }
536     } else
537 #endif
538     {
539       if (is_weak_cas) {
540         load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
541       } else {
542         load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
543       }
544     }
545   } else {
546     switch(access.type()) {
547       case T_BYTE: {
548         if (is_weak_cas) {
549           load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
550         } else {
551           load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
552         }
553         break;
554       }
555       case T_SHORT: {
556         if (is_weak_cas) {
557           load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
558         } else {
559           load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
560         }
561         break;
562       }
563       case T_INT: {
564         if (is_weak_cas) {
565           load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
566         } else {
567           load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
568         }
569         break;
570       }
571       case T_LONG: {
572         if (is_weak_cas) {
573           load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
574         } else {
575           load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
576         }
577         break;
578       }
579       default:
580         ShouldNotReachHere();
581     }
582   }
583 
584   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
585   load_store = kit->gvn().transform(load_store);
586 
587   access.set_raw_access(load_store);
588   pin_atomic_op(access);
589 
590   return load_store;
591 }
592 
593 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
594   GraphKit* kit = access.kit();
595   Node* mem = access.memory();
596   Node* adr = access.addr().node();
597   const TypePtr* adr_type = access.addr().type();
598   Node* load_store = nullptr;
599 
600   if (access.is_oop()) {
601 #ifdef _LP64
602     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
603       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
604       load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
605     } else
606 #endif
607     {
608       load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
609     }
610   } else  {
611     switch (access.type()) {
612       case T_BYTE:
613         load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
614         break;
615       case T_SHORT:
616         load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
617         break;
618       case T_INT:
619         load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
620         break;
621       case T_LONG:
622         load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
623         break;
624       default:
625         ShouldNotReachHere();
626     }
627   }
628 
629   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
630   load_store = kit->gvn().transform(load_store);
631 
632   access.set_raw_access(load_store);
633   pin_atomic_op(access);
634 
635 #ifdef _LP64
636   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
637     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
638   }
639 #endif
640 
641   return load_store;
642 }
643 
644 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
645   Node* load_store = nullptr;
646   GraphKit* kit = access.kit();
647   Node* adr = access.addr().node();
648   const TypePtr* adr_type = access.addr().type();
649   Node* mem = access.memory();
650 
651   switch(access.type()) {
652     case T_BYTE:
653       load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
654       break;
655     case T_SHORT:
656       load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
657       break;
658     case T_INT:
659       load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
660       break;
661     case T_LONG:
662       load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
663       break;
664     default:
665       ShouldNotReachHere();
666   }
667 
668   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
669   load_store = kit->gvn().transform(load_store);
670 
671   access.set_raw_access(load_store);
672   pin_atomic_op(access);
673 
674   return load_store;
675 }
676 
677 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val,
678                                           Node* new_val, const Type* value_type) const {
679   C2AccessFence fence(access);
680   resolve_address(access);
681   return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
682 }
683 
684 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val,
685                                            Node* new_val, const Type* value_type) const {
686   C2AccessFence fence(access);
687   resolve_address(access);
688   return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
689 }
690 
691 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
692   C2AccessFence fence(access);
693   resolve_address(access);
694   return atomic_xchg_at_resolved(access, new_val, value_type);
695 }
696 
697 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
698   C2AccessFence fence(access);
699   resolve_address(access);
700   return atomic_add_at_resolved(access, new_val, value_type);
701 }
702 
703 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) {
704   // Exclude the header but include array length to copy by 8 bytes words.
705   // Can't use base_offset_in_bytes(bt) since basic type is unknown.
706   int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
707                             instanceOopDesc::base_offset_in_bytes();
708   // base_off:
709   // 8  - 32-bit VM
710   // 12 - 64-bit VM, compressed klass
711   // 16 - 64-bit VM, normal klass
712   if (base_off % BytesPerLong != 0) {
713     assert(UseCompressedClassPointers, "");
714     if (is_array) {
715       // Exclude length to copy by 8 bytes words.
716       base_off += sizeof(int);
717     } else {
718       // Include klass to copy by 8 bytes words.
719       base_off = instanceOopDesc::klass_offset_in_bytes();
720     }
721     assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
722   }
723   return base_off;
724 }
725 
726 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
727   int base_off = arraycopy_payload_base_offset(is_array);
728   Node* payload_size = size;
729   Node* offset = kit->MakeConX(base_off);
730   payload_size = kit->gvn().transform(new SubXNode(payload_size, offset));
731   if (is_array) {
732     // Ensure the array payload size is rounded up to the next BytesPerLong
733     // multiple when converting to double-words. This is necessary because array
734     // size does not include object alignment padding, so it might not be a
735     // multiple of BytesPerLong for sub-long element types.
736     payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1)));
737   }
738   payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong)));
739   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false);
740   if (is_array) {
741     ac->set_clone_array();
742   } else {
743     ac->set_clone_inst();
744   }
745   Node* n = kit->gvn().transform(ac);
746   if (n == ac) {
747     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
748     ac->set_adr_type(TypeRawPtr::BOTTOM);
749     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
750   } else {
751     kit->set_all_memory(n);
752   }
753 }
754 
755 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes,
756                                  Node*& i_o, Node*& needgc_ctrl,
757                                  Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
758                                  intx prefetch_lines) const {
759   assert(UseTLAB, "Only for TLAB enabled allocations");
760 
761   Node* thread = macro->transform_later(new ThreadLocalNode());
762   Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset()));
763   Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset()));
764 
765   // Load TLAB end.
766   //
767   // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around
768   //       a bug where these values were being moved across
769   //       a safepoint.  These are not oops, so they cannot be include in the oop
770   //       map, but they can be changed by a GC.   The proper way to fix this would
771   //       be to set the raw memory state when generating a  SafepointNode.  However
772   //       this will require extensive changes to the loop optimization in order to
773   //       prevent a degradation of the optimization.
774   //       See comment in memnode.hpp, around line 227 in class LoadPNode.
775   Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
776 
777   // Load the TLAB top.
778   Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered);
779   macro->transform_later(old_tlab_top);
780 
781   // Add to heap top to get a new TLAB top
782   Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes);
783   macro->transform_later(new_tlab_top);
784 
785   // Check against TLAB end
786   Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end);
787   macro->transform_later(tlab_full);
788 
789   Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge);
790   macro->transform_later(needgc_bol);
791   IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
792   macro->transform_later(needgc_iff);
793 
794   // Plug the failing-heap-space-need-gc test into the slow-path region
795   Node* needgc_true = new IfTrueNode(needgc_iff);
796   macro->transform_later(needgc_true);
797   needgc_ctrl = needgc_true;
798 
799   // No need for a GC.
800   Node* needgc_false = new IfFalseNode(needgc_iff);
801   macro->transform_later(needgc_false);
802 
803   // Fast path:
804   i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
805                                    old_tlab_top, new_tlab_top, prefetch_lines);
806 
807   // Store the modified TLAB top back down.
808   Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
809                    TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
810   macro->transform_later(store_tlab_top);
811 
812   fast_oop_ctrl = needgc_false;
813   fast_oop_rawmem = store_tlab_top;
814   return old_tlab_top;
815 }
816 
817 static const TypeFunc* clone_type() {
818   // Create input type (domain)
819   int argcnt = NOT_LP64(3) LP64_ONLY(4);
820   const Type** const domain_fields = TypeTuple::fields(argcnt);
821   int argp = TypeFunc::Parms;
822   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // src
823   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // dst
824   domain_fields[argp++] = TypeX_X;               // size lower
825   LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper
826   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
827   const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields);
828 
829   // Create result type (range)
830   const Type** const range_fields = TypeTuple::fields(0);
831   const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
832 
833   return TypeFunc::make(domain, range);
834 }
835 
836 #define XTOP LP64_ONLY(COMMA phase->top())
837 
838 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac,
839                                     address clone_addr, const char* clone_name) const {
840   Node* const ctrl = ac->in(TypeFunc::Control);
841   Node* const mem  = ac->in(TypeFunc::Memory);
842   Node* const src  = ac->in(ArrayCopyNode::Src);
843   Node* const dst  = ac->in(ArrayCopyNode::Dest);
844   Node* const size = ac->in(ArrayCopyNode::Length);
845 
846   assert(size->bottom_type()->base() == Type_X,
847          "Should be of object size type (int for 32 bits, long for 64 bits)");
848 
849   // The native clone we are calling here expects the object size in words.
850   // Add header/offset size to payload size to get object size.
851   Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
852   Node* const full_size = phase->transform_later(new AddXNode(size, base_offset));
853   // HeapAccess<>::clone expects size in heap words.
854   // For 64-bits platforms, this is a no-operation.
855   // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2).
856   Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong)));
857 
858   Node* const call = phase->make_leaf_call(ctrl,
859                                            mem,
860                                            clone_type(),
861                                            clone_addr,
862                                            clone_name,
863                                            TypeRawPtr::BOTTOM,
864                                            src, dst, full_size_in_heap_words XTOP);
865   phase->transform_later(call);
866   phase->igvn().replace_node(ac, call);
867 }
868 
869 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
870   Node* ctrl = ac->in(TypeFunc::Control);
871   Node* mem = ac->in(TypeFunc::Memory);
872   Node* src = ac->in(ArrayCopyNode::Src);
873   Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
874   Node* dest = ac->in(ArrayCopyNode::Dest);
875   Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
876   Node* length = ac->in(ArrayCopyNode::Length);
877 
878   Node* payload_src = phase->basic_plus_adr(src, src_offset);
879   Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
880 
881   const char* copyfunc_name = "arraycopy";
882   address     copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true);
883 
884   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
885   const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
886 
887   Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
888   phase->transform_later(call);
889 
890   phase->igvn().replace_node(ac, call);
891 }
892 
893 #undef XTOP
894 
895 void BarrierSetC2::compute_liveness_at_stubs() const {
896   ResourceMark rm;
897   Compile* const C = Compile::current();
898   Arena* const A = Thread::current()->resource_area();
899   PhaseCFG* const cfg = C->cfg();
900   PhaseRegAlloc* const regalloc = C->regalloc();
901   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
902   BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler();
903   BarrierSetC2State* bs_state = barrier_set_state();
904   Block_List worklist;
905 
906   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
907     new ((void*)(live + i)) RegMask();
908     worklist.push(cfg->get_block(i));
909   }
910 
911   while (worklist.size() > 0) {
912     const Block* const block = worklist.pop();
913     RegMask& old_live = live[block->_pre_order];
914     RegMask new_live;
915 
916     // Initialize to union of successors
917     for (uint i = 0; i < block->_num_succs; i++) {
918       const uint succ_id = block->_succs[i]->_pre_order;
919       new_live.OR(live[succ_id]);
920     }
921 
922     // Walk block backwards, computing liveness
923     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
924       const Node* const node = block->get_node(i);
925 
926       // If this node tracks out-liveness, update it
927       if (!bs_state->needs_livein_data()) {
928         RegMask* const regs = bs_state->live(node);
929         if (regs != nullptr) {
930           regs->OR(new_live);
931         }
932       }
933 
934       // Remove def bits
935       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
936       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
937       if (first != OptoReg::Bad) {
938         new_live.Remove(first);
939       }
940       if (second != OptoReg::Bad) {
941         new_live.Remove(second);
942       }
943 
944       // Add use bits
945       for (uint j = 1; j < node->req(); ++j) {
946         const Node* const use = node->in(j);
947         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
948         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
949         if (first != OptoReg::Bad) {
950           new_live.Insert(first);
951         }
952         if (second != OptoReg::Bad) {
953           new_live.Insert(second);
954         }
955       }
956 
957       // If this node tracks in-liveness, update it
958       if (bs_state->needs_livein_data()) {
959         RegMask* const regs = bs_state->live(node);
960         if (regs != nullptr) {
961           regs->OR(new_live);
962         }
963       }
964     }
965 
966     // Now at block top, see if we have any changes
967     new_live.SUBTRACT(old_live);
968     if (new_live.is_NotEmpty()) {
969       // Liveness has refined, update and propagate to prior blocks
970       old_live.OR(new_live);
971       for (uint i = 1; i < block->num_preds(); ++i) {
972         Block* const pred = cfg->get_block_for_node(block->pred(i));
973         worklist.push(pred);
974       }
975     }
976   }
977 }