1 /*
   2  * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "code/vmreg.inline.hpp"
  26 #include "gc/shared/barrierSet.hpp"
  27 #include "gc/shared/c2/barrierSetC2.hpp"
  28 #include "gc/shared/tlab_globals.hpp"
  29 #include "opto/arraycopynode.hpp"
  30 #include "opto/block.hpp"
  31 #include "opto/convertnode.hpp"
  32 #include "opto/graphKit.hpp"
  33 #include "opto/idealKit.hpp"
  34 #include "opto/macro.hpp"
  35 #include "opto/narrowptrnode.hpp"
  36 #include "opto/output.hpp"
  37 #include "opto/regalloc.hpp"
  38 #include "opto/runtime.hpp"
  39 #include "utilities/macros.hpp"
  40 #include CPU_HEADER(gc/shared/barrierSetAssembler)
  41 
  42 // By default this is a no-op.
  43 void BarrierSetC2::resolve_address(C2Access& access) const { }
  44 
  45 void* C2ParseAccess::barrier_set_state() const {
  46   return _kit->barrier_set_state();
  47 }
  48 
  49 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); }
  50 
  51 bool C2Access::needs_cpu_membar() const {
  52   bool mismatched   = (_decorators & C2_MISMATCHED) != 0;
  53   bool is_unordered = (_decorators & MO_UNORDERED) != 0;
  54 
  55   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
  56   bool in_heap   = (_decorators & IN_HEAP) != 0;
  57   bool in_native = (_decorators & IN_NATIVE) != 0;
  58   bool is_mixed  = !in_heap && !in_native;
  59 
  60   bool is_write  = (_decorators & C2_WRITE_ACCESS) != 0;
  61   bool is_read   = (_decorators & C2_READ_ACCESS) != 0;
  62   bool is_atomic = is_read && is_write;
  63 
  64   if (is_atomic) {
  65     // Atomics always need to be wrapped in CPU membars
  66     return true;
  67   }
  68 
  69   if (anonymous) {
  70     // We will need memory barriers unless we can determine a unique
  71     // alias category for this reference.  (Note:  If for some reason
  72     // the barriers get omitted and the unsafe reference begins to "pollute"
  73     // the alias analysis of the rest of the graph, either Compile::can_alias
  74     // or Compile::must_alias will throw a diagnostic assert.)
  75     if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
  76       return true;
  77     }
  78   } else {
  79     assert(!is_mixed, "not unsafe");
  80   }
  81 
  82   return false;
  83 }
  84 
  85 static BarrierSetC2State* barrier_set_state() {
  86   return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state());
  87 }
  88 
  89 RegMask& BarrierStubC2::live() const {
  90   return *barrier_set_state()->live(_node);
  91 }
  92 
  93 BarrierStubC2::BarrierStubC2(const MachNode* node)
  94   : _node(node),
  95     _entry(),
  96     _continuation(),
  97     _preserve(live()) {}
  98 
  99 Label* BarrierStubC2::entry() {
 100   // The _entry will never be bound when in_scratch_emit_size() is true.
 101   // However, we still need to return a label that is not bound now, but
 102   // will eventually be bound. Any eventually bound label will do, as it
 103   // will only act as a placeholder, so we return the _continuation label.
 104   return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
 105 }
 106 
 107 Label* BarrierStubC2::continuation() {
 108   return &_continuation;
 109 }
 110 
 111 uint8_t BarrierStubC2::barrier_data() const {
 112   return _node->barrier_data();
 113 }
 114 
 115 void BarrierStubC2::preserve(Register r) {
 116   const VMReg vm_reg = r->as_VMReg();
 117   assert(vm_reg->is_Register(), "r must be a general-purpose register");
 118   _preserve.insert(OptoReg::as_OptoReg(vm_reg));
 119 }
 120 
 121 void BarrierStubC2::dont_preserve(Register r) {
 122   VMReg vm_reg = r->as_VMReg();
 123   assert(vm_reg->is_Register(), "r must be a general-purpose register");
 124   // Subtract the given register and all its sub-registers (e.g. {R11, R11_H}
 125   // for r11 in aarch64).
 126   do {
 127     _preserve.remove(OptoReg::as_OptoReg(vm_reg));
 128     vm_reg = vm_reg->next();
 129   } while (vm_reg->is_Register() && !vm_reg->is_concrete());
 130 }
 131 
 132 const RegMask& BarrierStubC2::preserve_set() const {
 133   return _preserve;
 134 }
 135 
 136 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
 137   DecoratorSet decorators = access.decorators();
 138 
 139   bool mismatched = (decorators & C2_MISMATCHED) != 0;
 140   bool unaligned = (decorators & C2_UNALIGNED) != 0;
 141   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
 142   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
 143 
 144   MemNode::MemOrd mo = access.mem_node_mo();
 145 
 146   Node* store;
 147   BasicType bt = access.type();
 148   if (access.is_parse_access()) {
 149     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
 150 
 151     GraphKit* kit = parse_access.kit();
 152     store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt,
 153                                  mo, requires_atomic_access, unaligned, mismatched,
 154                                  unsafe, access.barrier_data());
 155   } else {
 156     assert(access.is_opt_access(), "either parse or opt access");
 157     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
 158     Node* ctl = opt_access.ctl();
 159     MergeMemNode* mm = opt_access.mem();
 160     PhaseGVN& gvn = opt_access.gvn();
 161     const TypePtr* adr_type = access.addr().type();
 162     int alias = gvn.C->get_alias_index(adr_type);
 163     Node* mem = mm->memory_at(alias);
 164 
 165     StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access);
 166     if (unaligned) {
 167       st->set_unaligned_access();
 168     }
 169     if (mismatched) {
 170       st->set_mismatched_access();
 171     }
 172     st->set_barrier_data(access.barrier_data());
 173     store = gvn.transform(st);
 174     if (store == st) {
 175       mm->set_memory_at(alias, st);
 176     }
 177   }
 178   access.set_raw_access(store);
 179 
 180   return store;
 181 }
 182 
 183 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
 184   DecoratorSet decorators = access.decorators();
 185 
 186   Node* adr = access.addr().node();
 187   const TypePtr* adr_type = access.addr().type();
 188 
 189   bool mismatched = (decorators & C2_MISMATCHED) != 0;
 190   bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
 191   bool unaligned = (decorators & C2_UNALIGNED) != 0;
 192   bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
 193   bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0;
 194   bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
 195   bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0;
 196 
 197   MemNode::MemOrd mo = access.mem_node_mo();
 198   LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest;
 199 
 200   Node* load;
 201   if (access.is_parse_access()) {
 202     C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
 203     GraphKit* kit = parse_access.kit();
 204     Node* control = control_dependent ? kit->control() : nullptr;
 205 
 206     if (immutable) {
 207       Compile* C = Compile::current();
 208       Node* mem = kit->immutable_memory();
 209       load = LoadNode::make(kit->gvn(), control, mem, adr,
 210                             adr_type, val_type, access.type(), mo, dep, requires_atomic_access,
 211                             unaligned, mismatched, unsafe, access.barrier_data());
 212       load = kit->gvn().transform(load);
 213     } else {
 214       load = kit->make_load(control, adr, val_type, access.type(), mo,
 215                             dep, requires_atomic_access, unaligned, mismatched, unsafe,
 216                             access.barrier_data());
 217     }
 218   } else {
 219     assert(access.is_opt_access(), "either parse or opt access");
 220     C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
 221     Node* control = control_dependent ? opt_access.ctl() : nullptr;
 222     MergeMemNode* mm = opt_access.mem();
 223     PhaseGVN& gvn = opt_access.gvn();
 224     Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type));
 225     load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep,
 226                           requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data());
 227     load = gvn.transform(load);
 228   }
 229   access.set_raw_access(load);
 230 
 231   return load;
 232 }
 233 
 234 class C2AccessFence: public StackObj {
 235   C2Access& _access;
 236   Node* _leading_membar;
 237 
 238 public:
 239   C2AccessFence(C2Access& access) :
 240     _access(access), _leading_membar(nullptr) {
 241     GraphKit* kit = nullptr;
 242     if (access.is_parse_access()) {
 243       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
 244       kit = parse_access.kit();
 245     }
 246     DecoratorSet decorators = access.decorators();
 247 
 248     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
 249     bool is_read = (decorators & C2_READ_ACCESS) != 0;
 250     bool is_atomic = is_read && is_write;
 251 
 252     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
 253     bool is_release = (decorators & MO_RELEASE) != 0;
 254 
 255     if (is_atomic) {
 256       assert(kit != nullptr, "unsupported at optimization time");
 257       // Memory-model-wise, a LoadStore acts like a little synchronized
 258       // block, so needs barriers on each side.  These don't translate
 259       // into actual barriers on most machines, but we still need rest of
 260       // compiler to respect ordering.
 261       if (is_release) {
 262         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
 263       } else if (is_volatile) {
 264         if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 265           _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
 266         } else {
 267           _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
 268         }
 269       }
 270     } else if (is_write) {
 271       // If reference is volatile, prevent following memory ops from
 272       // floating down past the volatile write.  Also prevents commoning
 273       // another volatile read.
 274       if (is_volatile || is_release) {
 275         assert(kit != nullptr, "unsupported at optimization time");
 276         _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
 277       }
 278     } else {
 279       // Memory barrier to prevent normal and 'unsafe' accesses from
 280       // bypassing each other.  Happens after null checks, so the
 281       // exception paths do not take memory state from the memory barrier,
 282       // so there's no problems making a strong assert about mixing users
 283       // of safe & unsafe memory.
 284       if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
 285         assert(kit != nullptr, "unsupported at optimization time");
 286         _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
 287       }
 288     }
 289 
 290     if (access.needs_cpu_membar()) {
 291       assert(kit != nullptr, "unsupported at optimization time");
 292       kit->insert_mem_bar(Op_MemBarCPUOrder);
 293     }
 294 
 295     if (is_atomic) {
 296       // 4984716: MemBars must be inserted before this
 297       //          memory node in order to avoid a false
 298       //          dependency which will confuse the scheduler.
 299       access.set_memory();
 300     }
 301   }
 302 
 303   ~C2AccessFence() {
 304     GraphKit* kit = nullptr;
 305     if (_access.is_parse_access()) {
 306       C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access);
 307       kit = parse_access.kit();
 308     }
 309     DecoratorSet decorators = _access.decorators();
 310 
 311     bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
 312     bool is_read = (decorators & C2_READ_ACCESS) != 0;
 313     bool is_atomic = is_read && is_write;
 314 
 315     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
 316     bool is_acquire = (decorators & MO_ACQUIRE) != 0;
 317 
 318     // If reference is volatile, prevent following volatiles ops from
 319     // floating up before the volatile access.
 320     if (_access.needs_cpu_membar()) {
 321       kit->insert_mem_bar(Op_MemBarCPUOrder);
 322     }
 323 
 324     if (is_atomic) {
 325       assert(kit != nullptr, "unsupported at optimization time");
 326       if (is_acquire || is_volatile) {
 327         Node* n = _access.raw_access();
 328         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
 329         if (_leading_membar != nullptr) {
 330           MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
 331         }
 332       }
 333     } else if (is_write) {
 334       // If not multiple copy atomic, we do the MemBarVolatile before the load.
 335       if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
 336         assert(kit != nullptr, "unsupported at optimization time");
 337         Node* n = _access.raw_access();
 338         Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
 339         if (_leading_membar != nullptr) {
 340           MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
 341         }
 342       }
 343     } else {
 344       if (is_volatile || is_acquire) {
 345         assert(kit != nullptr, "unsupported at optimization time");
 346         Node* n = _access.raw_access();
 347         assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
 348         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
 349         mb->as_MemBar()->set_trailing_load();
 350       }
 351     }
 352   }
 353 };
 354 
 355 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
 356   C2AccessFence fence(access);
 357   resolve_address(access);
 358   return store_at_resolved(access, val);
 359 }
 360 
 361 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
 362   C2AccessFence fence(access);
 363   resolve_address(access);
 364   return load_at_resolved(access, val_type);
 365 }
 366 
 367 MemNode::MemOrd C2Access::mem_node_mo() const {
 368   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
 369   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
 370   if ((_decorators & MO_SEQ_CST) != 0) {
 371     if (is_write && is_read) {
 372       // For atomic operations
 373       return MemNode::seqcst;
 374     } else if (is_write) {
 375       return MemNode::release;
 376     } else {
 377       assert(is_read, "what else?");
 378       return MemNode::acquire;
 379     }
 380   } else if ((_decorators & MO_RELEASE) != 0) {
 381     return MemNode::release;
 382   } else if ((_decorators & MO_ACQUIRE) != 0) {
 383     return MemNode::acquire;
 384   } else if (is_write) {
 385     // Volatile fields need releasing stores.
 386     // Non-volatile fields also need releasing stores if they hold an
 387     // object reference, because the object reference might point to
 388     // a freshly created object.
 389     // Conservatively release stores of object references.
 390     return StoreNode::release_if_reference(_type);
 391   } else {
 392     return MemNode::unordered;
 393   }
 394 }
 395 
 396 void C2Access::fixup_decorators() {
 397   bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
 398   bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
 399 
 400   bool is_read = (_decorators & C2_READ_ACCESS) != 0;
 401   bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
 402 
 403   _decorators = AccessInternal::decorator_fixup(_decorators, _type);
 404 
 405   if (is_read && !is_write && anonymous) {
 406     // To be valid, unsafe loads may depend on other conditions than
 407     // the one that guards them: pin the Load node
 408     _decorators |= C2_CONTROL_DEPENDENT_LOAD;
 409     _decorators |= C2_UNKNOWN_CONTROL_LOAD;
 410     const TypePtr* adr_type = _addr.type();
 411     Node* adr = _addr.node();
 412     if (!needs_cpu_membar() && adr_type->isa_instptr()) {
 413       assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
 414       intptr_t offset = Type::OffsetBot;
 415       AddPNode::Ideal_base_and_offset(adr, &gvn(), offset);
 416       if (offset >= 0) {
 417         int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper());
 418         if (offset < s) {
 419           // Guaranteed to be a valid access, no need to pin it
 420           _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
 421           _decorators ^= C2_UNKNOWN_CONTROL_LOAD;
 422         }
 423       }
 424     }
 425   }
 426 }
 427 
 428 //--------------------------- atomic operations---------------------------------
 429 
 430 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const {
 431   // SCMemProjNodes represent the memory state of a LoadStore. Their
 432   // main role is to prevent LoadStore nodes from being optimized away
 433   // when their results aren't used.
 434   assert(access.is_parse_access(), "entry not supported at optimization time");
 435   C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
 436   GraphKit* kit = parse_access.kit();
 437   Node* load_store = access.raw_access();
 438   assert(load_store != nullptr, "must pin atomic op");
 439   Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
 440   kit->set_memory(proj, access.alias_idx());
 441 }
 442 
 443 void C2AtomicParseAccess::set_memory() {
 444   Node *mem = _kit->memory(_alias_idx);
 445   _memory = mem;
 446 }
 447 
 448 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 449                                                    Node* new_val, const Type* value_type) const {
 450   GraphKit* kit = access.kit();
 451   MemNode::MemOrd mo = access.mem_node_mo();
 452   Node* mem = access.memory();
 453 
 454   Node* adr = access.addr().node();
 455   const TypePtr* adr_type = access.addr().type();
 456 
 457   Node* load_store = nullptr;
 458 
 459   if (access.is_oop()) {
 460 #ifdef _LP64
 461     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
 462       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
 463       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
 464       load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
 465     } else
 466 #endif
 467     {
 468       load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
 469     }
 470   } else {
 471     switch (access.type()) {
 472       case T_BYTE: {
 473         load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
 474         break;
 475       }
 476       case T_SHORT: {
 477         load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
 478         break;
 479       }
 480       case T_INT: {
 481         load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
 482         break;
 483       }
 484       case T_LONG: {
 485         load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
 486         break;
 487       }
 488       default:
 489         ShouldNotReachHere();
 490     }
 491   }
 492 
 493   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
 494   load_store = kit->gvn().transform(load_store);
 495 
 496   access.set_raw_access(load_store);
 497   pin_atomic_op(access);
 498 
 499 #ifdef _LP64
 500   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
 501     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
 502   }
 503 #endif
 504 
 505   return load_store;
 506 }
 507 
 508 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 509                                                     Node* new_val, const Type* value_type) const {
 510   GraphKit* kit = access.kit();
 511   DecoratorSet decorators = access.decorators();
 512   MemNode::MemOrd mo = access.mem_node_mo();
 513   Node* mem = access.memory();
 514   bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
 515   Node* load_store = nullptr;
 516   Node* adr = access.addr().node();
 517 
 518   if (access.is_oop()) {
 519 #ifdef _LP64
 520     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
 521       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
 522       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
 523       if (is_weak_cas) {
 524         load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
 525       } else {
 526         load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
 527       }
 528     } else
 529 #endif
 530     {
 531       if (is_weak_cas) {
 532         load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
 533       } else {
 534         load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
 535       }
 536     }
 537   } else {
 538     switch(access.type()) {
 539       case T_BYTE: {
 540         if (is_weak_cas) {
 541           load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
 542         } else {
 543           load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
 544         }
 545         break;
 546       }
 547       case T_SHORT: {
 548         if (is_weak_cas) {
 549           load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
 550         } else {
 551           load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
 552         }
 553         break;
 554       }
 555       case T_INT: {
 556         if (is_weak_cas) {
 557           load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
 558         } else {
 559           load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
 560         }
 561         break;
 562       }
 563       case T_LONG: {
 564         if (is_weak_cas) {
 565           load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
 566         } else {
 567           load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
 568         }
 569         break;
 570       }
 571       default:
 572         ShouldNotReachHere();
 573     }
 574   }
 575 
 576   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
 577   load_store = kit->gvn().transform(load_store);
 578 
 579   access.set_raw_access(load_store);
 580   pin_atomic_op(access);
 581 
 582   return load_store;
 583 }
 584 
 585 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
 586   GraphKit* kit = access.kit();
 587   Node* mem = access.memory();
 588   Node* adr = access.addr().node();
 589   const TypePtr* adr_type = access.addr().type();
 590   Node* load_store = nullptr;
 591 
 592   if (access.is_oop()) {
 593 #ifdef _LP64
 594     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
 595       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
 596       load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
 597     } else
 598 #endif
 599     {
 600       load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
 601     }
 602   } else  {
 603     switch (access.type()) {
 604       case T_BYTE:
 605         load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
 606         break;
 607       case T_SHORT:
 608         load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
 609         break;
 610       case T_INT:
 611         load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
 612         break;
 613       case T_LONG:
 614         load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
 615         break;
 616       default:
 617         ShouldNotReachHere();
 618     }
 619   }
 620 
 621   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
 622   load_store = kit->gvn().transform(load_store);
 623 
 624   access.set_raw_access(load_store);
 625   pin_atomic_op(access);
 626 
 627 #ifdef _LP64
 628   if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
 629     return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
 630   }
 631 #endif
 632 
 633   return load_store;
 634 }
 635 
 636 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
 637   Node* load_store = nullptr;
 638   GraphKit* kit = access.kit();
 639   Node* adr = access.addr().node();
 640   const TypePtr* adr_type = access.addr().type();
 641   Node* mem = access.memory();
 642 
 643   switch(access.type()) {
 644     case T_BYTE:
 645       load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
 646       break;
 647     case T_SHORT:
 648       load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
 649       break;
 650     case T_INT:
 651       load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
 652       break;
 653     case T_LONG:
 654       load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
 655       break;
 656     default:
 657       ShouldNotReachHere();
 658   }
 659 
 660   load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
 661   load_store = kit->gvn().transform(load_store);
 662 
 663   access.set_raw_access(load_store);
 664   pin_atomic_op(access);
 665 
 666   return load_store;
 667 }
 668 
 669 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val,
 670                                           Node* new_val, const Type* value_type) const {
 671   C2AccessFence fence(access);
 672   resolve_address(access);
 673   return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
 674 }
 675 
 676 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val,
 677                                            Node* new_val, const Type* value_type) const {
 678   C2AccessFence fence(access);
 679   resolve_address(access);
 680   return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
 681 }
 682 
 683 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
 684   C2AccessFence fence(access);
 685   resolve_address(access);
 686   return atomic_xchg_at_resolved(access, new_val, value_type);
 687 }
 688 
 689 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
 690   C2AccessFence fence(access);
 691   resolve_address(access);
 692   return atomic_add_at_resolved(access, new_val, value_type);
 693 }
 694 
 695 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) {
 696   // Exclude the header but include array length to copy by 8 bytes words.
 697   // Can't use base_offset_in_bytes(bt) since basic type is unknown.
 698   int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
 699                             instanceOopDesc::base_offset_in_bytes();
 700   // base_off:
 701   // 4  - compact headers
 702   // 8  - 32-bit VM
 703   // 12 - 64-bit VM, compressed klass
 704   // 16 - 64-bit VM, normal klass
 705   if (base_off % BytesPerLong != 0) {
 706     if (is_array) {
 707       // Exclude length to copy by 8 bytes words.
 708       base_off += sizeof(int);
 709     } else {
 710       if (!UseCompactObjectHeaders) {
 711         // Include klass to copy by 8 bytes words.
 712         base_off = instanceOopDesc::klass_offset_in_bytes();
 713       }
 714     }
 715     assert(base_off % BytesPerLong == 0 || UseCompactObjectHeaders, "expect 8 bytes alignment");
 716   }
 717   return base_off;
 718 }
 719 
 720 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
 721   int base_off = arraycopy_payload_base_offset(is_array);
 722 
 723   Node* payload_size = size;
 724   Node* offset = kit->MakeConX(base_off);
 725   payload_size = kit->gvn().transform(new SubXNode(payload_size, offset));
 726   if (is_array) {
 727     // Ensure the array payload size is rounded up to the next BytesPerLong
 728     // multiple when converting to double-words. This is necessary because array
 729     // size does not include object alignment padding, so it might not be a
 730     // multiple of BytesPerLong for sub-long element types.
 731     payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1)));
 732   }
 733   payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong)));
 734   ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false);
 735   if (is_array) {
 736     ac->set_clone_array();
 737   } else {
 738     ac->set_clone_inst();
 739   }
 740   Node* n = kit->gvn().transform(ac);
 741   if (n == ac) {
 742     const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
 743     ac->set_adr_type(TypeRawPtr::BOTTOM);
 744     kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
 745   } else {
 746     kit->set_all_memory(n);
 747   }
 748 }
 749 
 750 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes,
 751                                  Node*& i_o, Node*& needgc_ctrl,
 752                                  Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
 753                                  intx prefetch_lines) const {
 754   assert(UseTLAB, "Only for TLAB enabled allocations");
 755 
 756   Node* thread = macro->transform_later(new ThreadLocalNode());
 757   Node* tlab_top_adr = macro->off_heap_plus_addr(thread, in_bytes(JavaThread::tlab_top_offset()));
 758   Node* tlab_end_adr = macro->off_heap_plus_addr(thread, in_bytes(JavaThread::tlab_end_offset()));
 759 
 760   // Load TLAB end.
 761   //
 762   // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around
 763   //       a bug where these values were being moved across
 764   //       a safepoint.  These are not oops, so they cannot be include in the oop
 765   //       map, but they can be changed by a GC.   The proper way to fix this would
 766   //       be to set the raw memory state when generating a  SafepointNode.  However
 767   //       this will require extensive changes to the loop optimization in order to
 768   //       prevent a degradation of the optimization.
 769   //       See comment in memnode.hpp, around line 227 in class LoadPNode.
 770   Node* tlab_end = macro->make_load_raw(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
 771 
 772   // Load the TLAB top.
 773   Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered);
 774   macro->transform_later(old_tlab_top);
 775 
 776   // Add to heap top to get a new TLAB top
 777   Node* new_tlab_top = AddPNode::make_off_heap(old_tlab_top, size_in_bytes);
 778   macro->transform_later(new_tlab_top);
 779 
 780   // Check against TLAB end
 781   Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end);
 782   macro->transform_later(tlab_full);
 783 
 784   Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge);
 785   macro->transform_later(needgc_bol);
 786   IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
 787   macro->transform_later(needgc_iff);
 788 
 789   // Plug the failing-heap-space-need-gc test into the slow-path region
 790   Node* needgc_true = new IfTrueNode(needgc_iff);
 791   macro->transform_later(needgc_true);
 792   needgc_ctrl = needgc_true;
 793 
 794   // No need for a GC.
 795   Node* needgc_false = new IfFalseNode(needgc_iff);
 796   macro->transform_later(needgc_false);
 797 
 798   // Fast path:
 799   i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
 800                                    old_tlab_top, new_tlab_top, prefetch_lines);
 801 
 802   // Store the modified TLAB top back down.
 803   Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
 804                    TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
 805   macro->transform_later(store_tlab_top);
 806 
 807   fast_oop_ctrl = needgc_false;
 808   fast_oop_rawmem = store_tlab_top;
 809   return old_tlab_top;
 810 }
 811 
 812 const TypeFunc* BarrierSetC2::_clone_type_Type = nullptr;
 813 
 814 void BarrierSetC2::make_clone_type() {
 815   assert(BarrierSetC2::_clone_type_Type == nullptr, "should be");
 816   // Create input type (domain)
 817   int argcnt = NOT_LP64(3) LP64_ONLY(4);
 818   const Type** const domain_fields = TypeTuple::fields(argcnt);
 819   int argp = TypeFunc::Parms;
 820   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // src
 821   domain_fields[argp++] = TypeInstPtr::NOTNULL;  // dst
 822   domain_fields[argp++] = TypeX_X;               // size lower
 823   LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper
 824   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
 825   const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields);
 826 
 827   // Create result type (range)
 828   const Type** const range_fields = TypeTuple::fields(0);
 829   const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
 830 
 831   BarrierSetC2::_clone_type_Type = TypeFunc::make(domain, range);
 832 }
 833 
 834 inline const TypeFunc* BarrierSetC2::clone_type() {
 835   assert(BarrierSetC2::_clone_type_Type != nullptr, "should be initialized");
 836   return BarrierSetC2::_clone_type_Type;
 837 }
 838 
 839 #define XTOP LP64_ONLY(COMMA phase->top())
 840 
 841 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac,
 842                                     address clone_addr, const char* clone_name) const {
 843   Node* const ctrl = ac->in(TypeFunc::Control);
 844   Node* const mem  = ac->in(TypeFunc::Memory);
 845   Node* const src  = ac->in(ArrayCopyNode::Src);
 846   Node* const dst  = ac->in(ArrayCopyNode::Dest);
 847   Node* const size = ac->in(ArrayCopyNode::Length);
 848 
 849   assert(size->bottom_type()->base() == Type_X,
 850          "Should be of object size type (int for 32 bits, long for 64 bits)");
 851 
 852   // The native clone we are calling here expects the object size in words.
 853   // Add header/offset size to payload size to get object size.
 854 
 855   // We need the full object size - payload (already aligned) plus base offset (which is not always aligned, so round *up*),
 856   // because clone_in_runtime copies the whole object from 0 to end.
 857   Node* const base_offset = phase->MakeConX((arraycopy_payload_base_offset(ac->is_clone_array()) + (BytesPerLong - 1)) >> LogBytesPerLong);
 858   Node* const full_size = phase->transform_later(new AddXNode(size, base_offset));
 859 
 860   // HeapAccess<>::clone expects size in heap words.
 861   // For 64-bits platforms, this is a no-operation.
 862   // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2).
 863   Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong)));
 864 
 865   Node* const call = phase->make_leaf_call(ctrl,
 866                                            mem,
 867                                            clone_type(),
 868                                            clone_addr,
 869                                            clone_name,
 870                                            TypeRawPtr::BOTTOM,
 871                                            src, dst, full_size_in_heap_words XTOP);
 872   phase->transform_later(call);
 873   phase->igvn().replace_node(ac, call);
 874 }
 875 
 876 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
 877   Node* ctrl = ac->in(TypeFunc::Control);
 878   Node* mem = ac->in(TypeFunc::Memory);
 879   Node* src = ac->in(ArrayCopyNode::Src);
 880   Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
 881   Node* dest = ac->in(ArrayCopyNode::Dest);
 882   Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
 883   Node* length = ac->in(ArrayCopyNode::Length);
 884 
 885   Node* payload_src = phase->basic_plus_adr(src, src_offset);
 886   Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
 887 
 888   if (should_copy_int_prefix(phase, ac)) {
 889     mem = arraycopy_copy_int_prefix(phase, ctrl, mem, payload_src, payload_dst);
 890 
 891     // We've copied the prefix, bump the pointers.
 892     payload_src = phase->basic_plus_adr(src, payload_src, BytesPerInt);
 893     payload_dst = phase->basic_plus_adr(dest, payload_dst, BytesPerInt);
 894   }
 895 
 896   // Bulk copy.
 897   const char* copyfunc_name = "arraycopy";
 898   address     copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true);
 899 
 900   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
 901   const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
 902 
 903   Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
 904   phase->transform_later(call);
 905 
 906   phase->igvn().replace_node(ac, call);
 907 }
 908 
 909 bool BarrierSetC2::should_copy_int_prefix(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
 910   // We do our bulk copy in longs. If base offset is not aligned, then we must copy the prefix separately.
 911   // With CompactObjectHeaders, the base offset for an instance is 4 bytes.
 912   // We cannot simply expand the copy to the previous long-alignment, as that will copy the object header,
 913   // which is stateful with COH - it contains hash and lock bits that are specific to the instance.
 914 
 915   // Skip this when src has an array type. With StressReflectiveCode, the
 916   // instance path of the clone can be live in the IR even when the type system
 917   // knows src is an array. The pre-copy is unnecessary on such paths (they
 918   // are unreachable at runtime), and creating a LoadNode at the array length
 919   // offset would assert (LoadRangeNode required).
 920   Node* src = ac->in(ArrayCopyNode::Src);
 921   if (phase->igvn().type(src)->isa_aryptr()) {
 922     return false;
 923   }
 924 
 925   int base_off = arraycopy_payload_base_offset(ac->is_clone_array());
 926   if (is_aligned(base_off, BytesPerLong)) {
 927     // We're aligned, no need to copy anything separately.
 928     return false;
 929   }
 930 
 931   assert(UseCompactObjectHeaders, "non-aligned base offset only possible with compact object headers");
 932   assert(is_aligned(base_off, BytesPerInt), "must be 4-bytes aligned");
 933   return true;
 934 }
 935 
 936 MergeMemNode* BarrierSetC2::arraycopy_copy_int_prefix(PhaseMacroExpand* phase, Node* ctrl, Node* mem, Node* src, Node* dst) const {
 937   // Manual load/store of one int.
 938   MergeMemNode* mm = phase->transform_later(MergeMemNode::make(mem))->as_MergeMem();
 939   const TypePtr* s_adr_type = phase->igvn().type(src)->is_ptr();
 940   const TypePtr* d_adr_type = phase->igvn().type(dst)->is_ptr();
 941   uint s_alias_idx = phase->C->get_alias_index(s_adr_type);
 942   uint d_alias_idx = phase->C->get_alias_index(d_adr_type);
 943   // This copies the first 4 bytes after the compact header (hash field or first instance field) as a raw int.
 944   // The actual field at this offset may be a narrowOop, so the load/store must be marked as mismatched to
 945   // avoid StoreN-vs-StoreI assertion failures during IGVN.
 946   Node* load_prefix = phase->transform_later(
 947       LoadNode::make(phase->igvn(), ctrl, mm->memory_at(s_alias_idx), src, s_adr_type,
 948                       TypeInt::INT, T_INT, MemNode::unordered, LoadNode::DependsOnlyOnTest,
 949                       false /*require_atomic_access*/, false /*unaligned*/, true /*mismatched*/));
 950   Node* store_prefix = phase->transform_later(
 951       StoreNode::make(phase->igvn(), ctrl, mm->memory_at(d_alias_idx), dst, d_adr_type,
 952                       load_prefix, T_INT, MemNode::unordered));
 953   store_prefix->as_Store()->set_mismatched_access();
 954   mm->set_memory_at(d_alias_idx, store_prefix);
 955   return mm;
 956 }
 957 
 958 #undef XTOP
 959 
 960 static bool block_has_safepoint(const Block* block, uint from, uint to) {
 961   for (uint i = from; i < to; i++) {
 962     if (block->get_node(i)->is_MachSafePoint()) {
 963       // Safepoint found
 964       return true;
 965     }
 966   }
 967 
 968   // Safepoint not found
 969   return false;
 970 }
 971 
 972 static bool block_has_safepoint(const Block* block) {
 973   return block_has_safepoint(block, 0, block->number_of_nodes());
 974 }
 975 
 976 static uint block_index(const Block* block, const Node* node) {
 977   for (uint j = 0; j < block->number_of_nodes(); ++j) {
 978     if (block->get_node(j) == node) {
 979       return j;
 980     }
 981   }
 982   ShouldNotReachHere();
 983   return 0;
 984 }
 985 
 986 // Look through various node aliases
 987 static const Node* look_through_node(const Node* node) {
 988   while (node != nullptr) {
 989     const Node* new_node = node;
 990     if (node->is_Mach()) {
 991       const MachNode* const node_mach = node->as_Mach();
 992       if (node_mach->ideal_Opcode() == Op_CheckCastPP) {
 993         new_node = node->in(1);
 994       }
 995       if (node_mach->is_SpillCopy()) {
 996         new_node = node->in(1);
 997       }
 998     }
 999     if (new_node == node || new_node == nullptr) {
1000       break;
1001     } else {
1002       node = new_node;
1003     }
1004   }
1005 
1006   return node;
1007 }
1008 
1009 // Whether the given offset is undefined.
1010 static bool is_undefined(intptr_t offset) {
1011   return offset == Type::OffsetTop;
1012 }
1013 
1014 // Whether the given offset is unknown.
1015 static bool is_unknown(intptr_t offset) {
1016   return offset == Type::OffsetBot;
1017 }
1018 
1019 // Whether the given offset is concrete (defined and compile-time known).
1020 static bool is_concrete(intptr_t offset) {
1021   return !is_undefined(offset) && !is_unknown(offset);
1022 }
1023 
1024 // Compute base + offset components of the memory address accessed by mach.
1025 // Return a node representing the base address, or null if the base cannot be
1026 // found or the offset is undefined or a concrete negative value. If a non-null
1027 // base is returned, the offset is a concrete, nonnegative value or unknown.
1028 static const Node* get_base_and_offset(const MachNode* mach, intptr_t& offset) {
1029   const TypePtr* adr_type = nullptr;
1030   offset = 0;
1031   const Node* base = mach->get_base_and_disp(offset, adr_type);
1032 
1033   if (base == nullptr || base == NodeSentinel) {
1034     return nullptr;
1035   }
1036 
1037   if (offset == 0 && base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_AddP) {
1038     // The memory address is computed by 'base' and fed to 'mach' via an
1039     // indirect memory operand (indicated by offset == 0). The ultimate base and
1040     // offset can be fetched directly from the inputs and Ideal type of 'base'.
1041     const TypeOopPtr* oopptr = base->bottom_type()->isa_oopptr();
1042     if (oopptr == nullptr) return nullptr;
1043     offset = oopptr->offset();
1044     // Even if 'base' is not an Ideal AddP node anymore, Matcher::ReduceInst()
1045     // guarantees that the base address is still available at the same slot.
1046     base = base->in(AddPNode::Base);
1047     assert(base != nullptr, "");
1048   }
1049 
1050   if (is_undefined(offset) || (is_concrete(offset) && offset < 0)) {
1051     return nullptr;
1052   }
1053 
1054   return look_through_node(base);
1055 }
1056 
1057 // Whether a phi node corresponds to an array allocation.
1058 // This test is incomplete: in some edge cases, it might return false even
1059 // though the node does correspond to an array allocation.
1060 static bool is_array_allocation(const Node* phi) {
1061   precond(phi->is_Phi());
1062   // Check whether phi has a successor cast (CheckCastPP) to Java array pointer,
1063   // possibly below spill copies and other cast nodes. Limit the exploration to
1064   // a single path from the phi node consisting of these node types.
1065   const Node* current = phi;
1066   while (true) {
1067     const Node* next = nullptr;
1068     for (DUIterator_Fast imax, i = current->fast_outs(imax); i < imax; i++) {
1069       if (!current->fast_out(i)->isa_Mach()) {
1070         continue;
1071       }
1072       const MachNode* succ = current->fast_out(i)->as_Mach();
1073       if (succ->ideal_Opcode() == Op_CheckCastPP) {
1074         if (succ->get_ptr_type()->isa_aryptr()) {
1075           // Cast to Java array pointer: phi corresponds to an array allocation.
1076           return true;
1077         }
1078         // Other cast: record as candidate for further exploration.
1079         next = succ;
1080       } else if (succ->is_SpillCopy() && next == nullptr) {
1081         // Spill copy, and no better candidate found: record as candidate.
1082         next = succ;
1083       }
1084     }
1085     if (next == nullptr) {
1086       // No evidence found that phi corresponds to an array allocation, and no
1087       // candidates available to continue exploring.
1088       return false;
1089     }
1090     // Continue exploring from the best candidate found.
1091     current = next;
1092   }
1093   ShouldNotReachHere();
1094 }
1095 
1096 bool BarrierSetC2::is_allocation(const Node* node) {
1097   assert(node->is_Phi(), "expected phi node");
1098   if (node->req() != 3) {
1099     return false;
1100   }
1101   const Node* const fast_node = node->in(2);
1102   if (!fast_node->is_Mach()) {
1103     return false;
1104   }
1105   const MachNode* const fast_mach = fast_node->as_Mach();
1106   if (fast_mach->ideal_Opcode() != Op_LoadP) {
1107     return false;
1108   }
1109   intptr_t offset;
1110   const Node* const base = get_base_and_offset(fast_mach, offset);
1111   if (base == nullptr || !base->is_Mach() || !is_concrete(offset)) {
1112     return false;
1113   }
1114   const MachNode* const base_mach = base->as_Mach();
1115   if (base_mach->ideal_Opcode() != Op_ThreadLocal) {
1116     return false;
1117   }
1118   return offset == in_bytes(Thread::tlab_top_offset());
1119 }
1120 
1121 void BarrierSetC2::elide_dominated_barriers(Node_List& accesses, Node_List& access_dominators) const {
1122   Compile* const C = Compile::current();
1123   PhaseCFG* const cfg = C->cfg();
1124 
1125   for (uint i = 0; i < accesses.size(); i++) {
1126     MachNode* const access = accesses.at(i)->as_Mach();
1127     intptr_t access_offset;
1128     const Node* const access_obj = get_base_and_offset(access, access_offset);
1129     Block* const access_block = cfg->get_block_for_node(access);
1130     const uint access_index = block_index(access_block, access);
1131 
1132     if (access_obj == nullptr) {
1133       // No information available
1134       continue;
1135     }
1136 
1137     for (uint j = 0; j < access_dominators.size(); j++) {
1138      const  Node* const mem = access_dominators.at(j);
1139       if (mem->is_Phi()) {
1140         assert(is_allocation(mem), "expected allocation phi node");
1141         if (mem != access_obj) {
1142           continue;
1143         }
1144         if (is_unknown(access_offset) && !is_array_allocation(mem)) {
1145           // The accessed address has an unknown offset, but the allocated
1146           // object cannot be determined to be an array. Avoid eliding in this
1147           // case, to be on the safe side.
1148           continue;
1149         }
1150         assert((is_concrete(access_offset) && access_offset >= 0) || (is_unknown(access_offset) && is_array_allocation(mem)),
1151                "candidate allocation-dominated access offsets must be either concrete and nonnegative, or unknown (for array allocations only)");
1152       } else {
1153         // Access node
1154         const MachNode* const mem_mach = mem->as_Mach();
1155         intptr_t mem_offset;
1156         const Node* const mem_obj = get_base_and_offset(mem_mach, mem_offset);
1157 
1158         if (mem_obj == nullptr ||
1159             !is_concrete(access_offset) ||
1160             !is_concrete(mem_offset)) {
1161           // No information available
1162           continue;
1163         }
1164 
1165         if (mem_obj != access_obj || mem_offset != access_offset) {
1166           // Not the same addresses, not a candidate
1167           continue;
1168         }
1169         assert(is_concrete(access_offset) && access_offset >= 0,
1170                "candidate non-allocation-dominated access offsets must be concrete and nonnegative");
1171       }
1172 
1173       Block* mem_block = cfg->get_block_for_node(mem);
1174       const uint mem_index = block_index(mem_block, mem);
1175 
1176       if (access_block == mem_block) {
1177         // Earlier accesses in the same block
1178         if (mem_index < access_index && !block_has_safepoint(mem_block, mem_index + 1, access_index)) {
1179           elide_dominated_barrier(access);
1180         }
1181       } else if (mem_block->dominates(access_block)) {
1182         // Dominating block? Look around for safepoints
1183         ResourceMark rm;
1184         Block_List stack;
1185         VectorSet visited;
1186         stack.push(access_block);
1187         bool safepoint_found = block_has_safepoint(access_block);
1188         while (!safepoint_found && stack.size() > 0) {
1189           const Block* const block = stack.pop();
1190           if (visited.test_set(block->_pre_order)) {
1191             continue;
1192           }
1193           if (block_has_safepoint(block)) {
1194             safepoint_found = true;
1195             break;
1196           }
1197           if (block == mem_block) {
1198             continue;
1199           }
1200 
1201           // Push predecessor blocks
1202           for (uint p = 1; p < block->num_preds(); ++p) {
1203             Block* const pred = cfg->get_block_for_node(block->pred(p));
1204             stack.push(pred);
1205           }
1206         }
1207 
1208         if (!safepoint_found) {
1209           elide_dominated_barrier(access);
1210         }
1211       }
1212     }
1213   }
1214 }
1215 
1216 void BarrierSetC2::compute_liveness_at_stubs() const {
1217   ResourceMark rm;
1218   Compile* const C = Compile::current();
1219   Arena* const A = Thread::current()->resource_area();
1220   PhaseCFG* const cfg = C->cfg();
1221   PhaseRegAlloc* const regalloc = C->regalloc();
1222   RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
1223   BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler();
1224   BarrierSetC2State* bs_state = barrier_set_state();
1225   Block_List worklist;
1226 
1227   for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
1228     new ((void*)(live + i)) RegMask();
1229     worklist.push(cfg->get_block(i));
1230   }
1231 
1232   while (worklist.size() > 0) {
1233     const Block* const block = worklist.pop();
1234     RegMask& old_live = live[block->_pre_order];
1235     RegMask new_live;
1236 
1237     // Initialize to union of successors
1238     for (uint i = 0; i < block->_num_succs; i++) {
1239       const uint succ_id = block->_succs[i]->_pre_order;
1240       new_live.or_with(live[succ_id]);
1241     }
1242 
1243     // Walk block backwards, computing liveness
1244     for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
1245       const Node* const node = block->get_node(i);
1246 
1247       // If this node tracks out-liveness, update it
1248       if (!bs_state->needs_livein_data()) {
1249         RegMask* const regs = bs_state->live(node);
1250         if (regs != nullptr) {
1251           regs->or_with(new_live);
1252         }
1253       }
1254 
1255       // Remove def bits
1256       const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
1257       const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
1258       if (first != OptoReg::Bad) {
1259         new_live.remove(first);
1260       }
1261       if (second != OptoReg::Bad) {
1262         new_live.remove(second);
1263       }
1264 
1265       // Add use bits
1266       for (uint j = 1; j < node->req(); ++j) {
1267         const Node* const use = node->in(j);
1268         const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
1269         const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
1270         if (first != OptoReg::Bad) {
1271           new_live.insert(first);
1272         }
1273         if (second != OptoReg::Bad) {
1274           new_live.insert(second);
1275         }
1276       }
1277 
1278       // If this node tracks in-liveness, update it
1279       if (bs_state->needs_livein_data()) {
1280         RegMask* const regs = bs_state->live(node);
1281         if (regs != nullptr) {
1282           regs->or_with(new_live);
1283         }
1284       }
1285     }
1286 
1287     // Now at block top, see if we have any changes
1288     new_live.subtract(old_live);
1289     if (!new_live.is_empty()) {
1290       // Liveness has refined, update and propagate to prior blocks
1291       old_live.or_with(new_live);
1292       for (uint i = 1; i < block->num_preds(); ++i) {
1293         Block* const pred = cfg->get_block_for_node(block->pred(i));
1294         worklist.push(pred);
1295       }
1296     }
1297   }
1298 }