1 /*
   2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "opto/castnode.hpp"
  26 #include "opto/compile.hpp"
  27 #include "opto/escape.hpp"
  28 #include "opto/graphKit.hpp"
  29 #include "opto/loopnode.hpp"
  30 #include "opto/machnode.hpp"
  31 #include "opto/macro.hpp"
  32 #include "opto/memnode.hpp"
  33 #include "opto/movenode.hpp"
  34 #include "opto/node.hpp"
  35 #include "opto/phase.hpp"
  36 #include "opto/phaseX.hpp"
  37 #include "opto/rootnode.hpp"
  38 #include "opto/type.hpp"
  39 #include "utilities/copy.hpp"
  40 #include "utilities/growableArray.hpp"
  41 #include "utilities/macros.hpp"
  42 #include "gc/z/zBarrierSet.hpp"
  43 #include "gc/z/c2/zBarrierSetC2.hpp"
  44 #include "gc/z/zThreadLocalData.hpp"
  45 #include "gc/z/zBarrierSetRuntime.hpp"
  46 
  47 ZBarrierSetC2State::ZBarrierSetC2State(Arena* comp_arena) :
  48     _load_barrier_nodes(new (comp_arena) GrowableArray<LoadBarrierNode*>(comp_arena, 8,  0, NULL)) {}
  49 
  50 int ZBarrierSetC2State::load_barrier_count() const {
  51   return _load_barrier_nodes->length();
  52 }
  53 
  54 void ZBarrierSetC2State::add_load_barrier_node(LoadBarrierNode * n) {
  55   assert(!_load_barrier_nodes->contains(n), " duplicate entry in expand list");
  56   _load_barrier_nodes->append(n);
  57 }
  58 
  59 void ZBarrierSetC2State::remove_load_barrier_node(LoadBarrierNode * n) {
  60   // this function may be called twice for a node so check
  61   // that the node is in the array before attempting to remove it
  62   if (_load_barrier_nodes->contains(n)) {
  63     _load_barrier_nodes->remove(n);
  64   }
  65 }
  66 
  67 LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const {
  68   return _load_barrier_nodes->at(idx);
  69 }
  70 
  71 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
  72   return new(comp_arena) ZBarrierSetC2State(comp_arena);
  73 }
  74 
  75 ZBarrierSetC2State* ZBarrierSetC2::state() const {
  76   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
  77 }
  78 
  79 bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
  80   // 1. This step follows potential oop projections of a load barrier before expansion
  81   if (node->is_Proj()) {
  82     node = node->in(0);
  83   }
  84 
  85   // 2. This step checks for unexpanded load barriers
  86   if (node->is_LoadBarrier()) {
  87     return true;
  88   }
  89 
  90   // 3. This step checks for the phi corresponding to an optimized load barrier expansion
  91   if (node->is_Phi()) {
  92     PhiNode* phi = node->as_Phi();
  93     Node* n = phi->in(1);
  94     if (n != NULL && n->is_LoadBarrierSlowReg()) {
  95       return true;
  96     }
  97   }
  98 
  99   return false;
 100 }
 101 
 102 void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
 103   if (node->is_LoadBarrier()) {
 104     state()->add_load_barrier_node(node->as_LoadBarrier());
 105   }
 106 }
 107 
 108 void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
 109   if (node->is_LoadBarrier()) {
 110     state()->remove_load_barrier_node(node->as_LoadBarrier());
 111   }
 112 }
 113 
 114 void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {
 115   // Remove useless LoadBarrier nodes
 116   ZBarrierSetC2State* s = state();
 117   for (int i = s->load_barrier_count()-1; i >= 0; i--) {
 118     LoadBarrierNode* n = s->load_barrier_node(i);
 119     if (!useful.member(n)) {
 120       unregister_potential_barrier_node(n);
 121     }
 122   }
 123 }
 124 
 125 void ZBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {
 126   if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
 127     igvn->_worklist.push(node);
 128   }
 129 }
 130 
 131 static bool load_require_barrier(LoadNode* load)      { return ((load->barrier_data() & RequireBarrier) != 0); }
 132 static bool load_has_weak_barrier(LoadNode* load)     { return ((load->barrier_data() & WeakBarrier) != 0); }
 133 static bool load_has_expanded_barrier(LoadNode* load) { return ((load->barrier_data() & ExpandedBarrier) != 0); }
 134 static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); }
 135 
 136 static void load_set_barrier(LoadNode* load, bool weak)    {
 137   if (weak) {
 138     load->set_barrier_data(WeakBarrier);
 139   } else {
 140     load->set_barrier_data(RequireBarrier);
 141   }
 142 }
 143 
 144 // == LoadBarrierNode ==
 145 
 146 LoadBarrierNode::LoadBarrierNode(Compile* C,
 147                                  Node* c,
 148                                  Node* mem,
 149                                  Node* val,
 150                                  Node* adr,
 151                                  bool weak) :
 152     MultiNode(Number_of_Inputs),
 153     _weak(weak) {
 154   init_req(Control, c);
 155   init_req(Memory, mem);
 156   init_req(Oop, val);
 157   init_req(Address, adr);
 158   init_req(Similar, C->top());
 159 
 160   init_class_id(Class_LoadBarrier);
 161   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
 162   bs->register_potential_barrier_node(this);
 163 }
 164 
 165 uint LoadBarrierNode::size_of() const {
 166   return sizeof(*this);
 167 }
 168 
 169 bool LoadBarrierNode::cmp(const Node& n) const {
 170   ShouldNotReachHere();
 171   return false;
 172 }
 173 
 174 const Type *LoadBarrierNode::bottom_type() const {
 175   const Type** floadbarrier = (const Type **)(Compile::current()->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
 176   Node* in_oop = in(Oop);
 177   floadbarrier[Control] = Type::CONTROL;
 178   floadbarrier[Memory] = Type::MEMORY;
 179   floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type();
 180   return TypeTuple::make(Number_of_Outputs, floadbarrier);
 181 }
 182 
 183 const TypePtr* LoadBarrierNode::adr_type() const {
 184   ShouldNotReachHere();
 185   return NULL;
 186 }
 187 
 188 const Type *LoadBarrierNode::Value(PhaseGVN *phase) const {
 189   const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
 190   const Type* val_t = phase->type(in(Oop));
 191   floadbarrier[Control] = Type::CONTROL;
 192   floadbarrier[Memory]  = Type::MEMORY;
 193   floadbarrier[Oop]     = val_t;
 194   return TypeTuple::make(Number_of_Outputs, floadbarrier);
 195 }
 196 
 197 bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) {
 198   if (phase != NULL) {
 199     return phase->is_dominator(d, n);
 200   }
 201 
 202   for (int i = 0; i < 10 && n != NULL; i++) {
 203     n = IfNode::up_one_dom(n, linear_only);
 204     if (n == d) {
 205       return true;
 206     }
 207   }
 208 
 209   return false;
 210 }
 211 
 212 LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
 213   if (is_weak()) {
 214     // Weak barriers can't be eliminated
 215     return NULL;
 216   }
 217 
 218   Node* val = in(LoadBarrierNode::Oop);
 219   if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) {
 220     LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier();
 221     assert(lb->in(Address) == in(Address), "");
 222     // Load barrier on Similar edge dominates so if it now has the Oop field it can replace this barrier.
 223     if (lb->in(Oop) == in(Oop)) {
 224       return lb;
 225     }
 226     // Follow chain of load barrier through Similar edges
 227     while (!lb->in(Similar)->is_top()) {
 228       lb = lb->in(Similar)->in(0)->as_LoadBarrier();
 229       assert(lb->in(Address) == in(Address), "");
 230     }
 231     if (lb != in(Similar)->in(0)) {
 232       return lb;
 233     }
 234   }
 235   for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
 236     Node* u = val->fast_out(i);
 237     if (u != this && u->is_LoadBarrier() && u->in(Oop) == val && u->as_LoadBarrier()->has_true_uses()) {
 238       Node* this_ctrl = in(LoadBarrierNode::Control);
 239       Node* other_ctrl = u->in(LoadBarrierNode::Control);
 240       if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
 241         return u->as_LoadBarrier();
 242       }
 243     }
 244   }
 245 
 246   if (can_be_eliminated()) {
 247     return NULL;
 248   }
 249 
 250   if (!look_for_similar) {
 251     return NULL;
 252   }
 253 
 254   Node* addr = in(LoadBarrierNode::Address);
 255   for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
 256     Node* u = addr->fast_out(i);
 257     if (u != this && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) {
 258       Node* this_ctrl = in(LoadBarrierNode::Control);
 259       Node* other_ctrl = u->in(LoadBarrierNode::Control);
 260       if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
 261         ResourceMark rm;
 262         Unique_Node_List wq;
 263         wq.push(in(LoadBarrierNode::Control));
 264         bool ok = true;
 265         bool dom_found = false;
 266         for (uint next = 0; next < wq.size(); ++next) {
 267           Node *n = wq.at(next);
 268           if (n->is_top()) {
 269             return NULL;
 270           }
 271           assert(n->is_CFG(), "");
 272           if (n->is_SafePoint()) {
 273             ok = false;
 274             break;
 275           }
 276           if (n == u) {
 277             dom_found = true;
 278             continue;
 279           }
 280           if (n->is_Region()) {
 281             for (uint i = 1; i < n->req(); i++) {
 282               Node* m = n->in(i);
 283               if (m != NULL) {
 284                 wq.push(m);
 285               }
 286             }
 287           } else {
 288             Node* m = n->in(0);
 289             if (m != NULL) {
 290               wq.push(m);
 291             }
 292           }
 293         }
 294         if (ok) {
 295           assert(dom_found, "");
 296           return u->as_LoadBarrier();
 297         }
 298         break;
 299       }
 300     }
 301   }
 302 
 303   return NULL;
 304 }
 305 
 306 void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
 307   // Change to that barrier may affect a dominated barrier so re-push those
 308   assert(!is_weak(), "sanity");
 309   Node* val = in(LoadBarrierNode::Oop);
 310 
 311   for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
 312     Node* u = val->fast_out(i);
 313     if (u != this && u->is_LoadBarrier() && u->in(Oop) == val) {
 314       Node* this_ctrl = in(Control);
 315       Node* other_ctrl = u->in(Control);
 316       if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
 317         igvn->_worklist.push(u);
 318       }
 319     }
 320 
 321     Node* addr = in(LoadBarrierNode::Address);
 322     for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
 323       Node* u = addr->fast_out(i);
 324       if (u != this && u->is_LoadBarrier() && u->in(Similar)->is_top()) {
 325         Node* this_ctrl = in(Control);
 326         Node* other_ctrl = u->in(Control);
 327         if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
 328           igvn->_worklist.push(u);
 329         }
 330       }
 331     }
 332   }
 333 }
 334 
 335 Node *LoadBarrierNode::Identity(PhaseGVN *phase) {
 336   LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false);
 337   if (dominating_barrier != NULL) {
 338     assert(!is_weak(), "Weak barriers cant be eliminated");
 339     assert(dominating_barrier->in(Oop) == in(Oop), "");
 340     return dominating_barrier;
 341   }
 342 
 343   return this;
 344 }
 345 
 346 Node *LoadBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 347   if (remove_dead_region(phase, can_reshape)) {
 348     return this;
 349   }
 350 
 351   Node *val = in(Oop);
 352   Node *mem = in(Memory);
 353   Node *ctrl = in(Control);
 354 
 355   assert(val->Opcode() != Op_LoadN, "");
 356   assert(val->Opcode() != Op_DecodeN, "");
 357 
 358   if (mem->is_MergeMem()) {
 359     Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
 360     set_req(Memory, new_mem);
 361     if (mem->outcnt() == 0 && can_reshape) {
 362       phase->is_IterGVN()->_worklist.push(mem);
 363     }
 364     return this;
 365   }
 366 
 367   LoadBarrierNode *dominating_barrier = NULL;
 368   if (!is_weak()) {
 369     dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress());
 370     if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
 371       assert(in(Address) == dominating_barrier->in(Address), "");
 372       set_req(Similar, dominating_barrier->proj_out(Oop));
 373       return this;
 374     }
 375   }
 376 
 377   bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses());
 378   if (eliminate) {
 379     if (can_reshape) {
 380       PhaseIterGVN* igvn = phase->is_IterGVN();
 381       Node* out_ctrl = proj_out_or_null(Control);
 382       Node* out_res = proj_out_or_null(Oop);
 383 
 384       if (out_ctrl != NULL) {
 385         igvn->replace_node(out_ctrl, ctrl);
 386       }
 387 
 388       // That transformation may cause the Similar edge on the load barrier to be invalid
 389       fix_similar_in_uses(igvn);
 390       if (out_res != NULL) {
 391         if (dominating_barrier != NULL) {
 392           assert(!is_weak(), "Sanity");
 393           igvn->replace_node(out_res, dominating_barrier->proj_out(Oop));
 394         } else {
 395           igvn->replace_node(out_res, val);
 396         }
 397       }
 398     }
 399     return new ConINode(TypeInt::ZERO);
 400   }
 401 
 402   // If the Similar edge is no longer a load barrier, clear it
 403   Node* similar = in(Similar);
 404   if (!similar->is_top() && !(similar->is_Proj() && similar->in(0)->is_LoadBarrier())) {
 405     set_req(Similar, phase->C->top());
 406     return this;
 407   }
 408 
 409   if (can_reshape && !is_weak()) {
 410     // If this barrier is linked through the Similar edge by a
 411     // dominated barrier and both barriers have the same Oop field,
 412     // the dominated barrier can go away, so push it for reprocessing.
 413     // We also want to avoid a barrier to depend on another dominating
 414     // barrier through its Similar edge that itself depend on another
 415     // barrier through its Similar edge and rather have the first
 416     // depend on the third.
 417     PhaseIterGVN* igvn = phase->is_IterGVN();
 418     Node* out_res = proj_out(Oop);
 419     for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 420       Node* u = out_res->fast_out(i);
 421       if (u->is_LoadBarrier() && u->in(Similar) == out_res &&
 422           (u->in(Oop) == val || !u->in(Similar)->is_top())) {
 423         assert(!u->as_LoadBarrier()->is_weak(), "Sanity");
 424         igvn->_worklist.push(u);
 425       }
 426     }
 427     push_dominated_barriers(igvn);
 428   }
 429 
 430   return NULL;
 431 }
 432 
 433 uint LoadBarrierNode::match_edge(uint idx) const {
 434   ShouldNotReachHere();
 435   return 0;
 436 }
 437 
 438 void LoadBarrierNode::fix_similar_in_uses(PhaseIterGVN* igvn) {
 439   Node* out_res = proj_out_or_null(Oop);
 440   if (out_res == NULL) {
 441     return;
 442   }
 443 
 444   for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 445     Node* u = out_res->fast_out(i);
 446     if (u->is_LoadBarrier() && u->in(Similar) == out_res) {
 447       igvn->replace_input_of(u, Similar, igvn->C->top());
 448       --i;
 449       --imax;
 450     }
 451   }
 452 }
 453 
 454 bool LoadBarrierNode::has_true_uses() const {
 455   Node* out_res = proj_out_or_null(Oop);
 456   if (out_res != NULL) {
 457     for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 458       Node *u = out_res->fast_out(i);
 459       if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
 460         return true;
 461       }
 462     }
 463   }
 464   return false;
 465 }
 466 
 467 static bool barrier_needed(C2Access& access) {
 468   return ZBarrierSet::barrier_needed(access.decorators(), access.type());
 469 }
 470 
 471 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
 472   Node* p = BarrierSetC2::load_at_resolved(access, val_type);
 473   if (!barrier_needed(access)) {
 474     return p;
 475   }
 476 
 477   bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
 478   if (p->isa_Load()) {
 479     load_set_barrier(p->as_Load(), weak);
 480   }
 481   return p;
 482 }
 483 
 484 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 485                                                     Node* new_val, const Type* val_type) const {
 486   Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
 487   LoadStoreNode* lsn = result->as_LoadStore();
 488   if (barrier_needed(access)) {
 489     lsn->set_has_barrier();
 490   }
 491   return lsn;
 492 }
 493 
 494 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 495                                                      Node* new_val, const Type* value_type) const {
 496   Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
 497   LoadStoreNode* lsn = result->as_LoadStore();
 498   if (barrier_needed(access)) {
 499     lsn->set_has_barrier();
 500   }
 501   return lsn;
 502 }
 503 
 504 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
 505   Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
 506   LoadStoreNode* lsn = result->as_LoadStore();
 507   if (barrier_needed(access)) {
 508     lsn->set_has_barrier();
 509   }
 510   return lsn;
 511 }
 512 
 513 // == Macro Expansion ==
 514 
 515 // Optimized, low spill, loadbarrier variant using stub specialized on register used
 516 void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const {
 517   PhaseIterGVN &igvn = phase->igvn();
 518   float unlikely  = PROB_UNLIKELY(0.999);
 519 
 520   Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
 521   Node* in_mem = barrier->in(LoadBarrierNode::Memory);
 522   Node* in_val = barrier->in(LoadBarrierNode::Oop);
 523   Node* in_adr = barrier->in(LoadBarrierNode::Address);
 524 
 525   Node* out_ctrl = barrier->proj_out_or_null(LoadBarrierNode::Control);
 526   Node* out_res = barrier->proj_out(LoadBarrierNode::Oop);
 527 
 528   assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null");
 529 
 530   Node* jthread = igvn.transform(new ThreadLocalNode());
 531   Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
 532   Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr,
 533                                                  TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(),
 534                                                  MemNode::unordered));
 535   Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val));
 536   Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask));
 537   Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type())));
 538   Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
 539   IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If();
 540   Node* then = igvn.transform(new IfTrueNode(iff));
 541   Node* elsen = igvn.transform(new IfFalseNode(iff));
 542 
 543   Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(),
 544                                                                     (const TypePtr*) in_val->bottom_type(), MemNode::unordered, barrier->is_weak()));
 545 
 546   // Create the final region/phi pair to converge cntl/data paths to downstream code
 547   Node* result_region = igvn.transform(new RegionNode(3));
 548   result_region->set_req(1, then);
 549   result_region->set_req(2, elsen);
 550 
 551   Node* result_phi = igvn.transform(new PhiNode(result_region, TypeInstPtr::BOTTOM));
 552   result_phi->set_req(1, new_loadp);
 553   result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop));
 554 
 555   if (out_ctrl != NULL) {
 556     igvn.replace_node(out_ctrl, result_region);
 557   }
 558   igvn.replace_node(out_res, result_phi);
 559 
 560   assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!");
 561 
 562   igvn.remove_dead_node(barrier);
 563   igvn.remove_dead_node(out_ctrl);
 564   igvn.remove_dead_node(out_res);
 565 
 566   assert(is_gc_barrier_node(result_phi), "sanity");
 567   assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
 568 
 569   phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx);
 570 }
 571 
 572 bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
 573   ZBarrierSetC2State* s = state();
 574   if (s->load_barrier_count() > 0) {
 575     PhaseMacroExpand macro(igvn);
 576 
 577     int skipped = 0;
 578     while (s->load_barrier_count() > skipped) {
 579       int load_barrier_count = s->load_barrier_count();
 580       LoadBarrierNode * n = s->load_barrier_node(load_barrier_count-1-skipped);
 581       if (igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
 582         // Node is unreachable, so don't try to expand it
 583         s->remove_load_barrier_node(n);
 584         continue;
 585       }
 586       if (!n->can_be_eliminated()) {
 587         skipped++;
 588         continue;
 589       }
 590       expand_loadbarrier_node(&macro, n);
 591       assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
 592       if (C->failing()) {
 593         return true;
 594       }
 595     }
 596     while (s->load_barrier_count() > 0) {
 597       int load_barrier_count = s->load_barrier_count();
 598       LoadBarrierNode* n = s->load_barrier_node(load_barrier_count - 1);
 599       assert(!(igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())), "should have been processed already");
 600       assert(!n->can_be_eliminated(), "should have been processed already");
 601       expand_loadbarrier_node(&macro, n);
 602       assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
 603       if (C->failing()) {
 604         return true;
 605       }
 606     }
 607     igvn.set_delay_transform(false);
 608     igvn.optimize();
 609     if (C->failing()) {
 610       return true;
 611     }
 612   }
 613 
 614   return false;
 615 }
 616 
 617 Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
 618   Node* node = c;
 619 
 620   // 1. This step follows potential oop projections of a load barrier before expansion
 621   if (node->is_Proj()) {
 622     node = node->in(0);
 623   }
 624 
 625   // 2. This step checks for unexpanded load barriers
 626   if (node->is_LoadBarrier()) {
 627     return node->in(LoadBarrierNode::Oop);
 628   }
 629 
 630   // 3. This step checks for the phi corresponding to an optimized load barrier expansion
 631   if (node->is_Phi()) {
 632     PhiNode* phi = node->as_Phi();
 633     Node* n = phi->in(1);
 634     if (n != NULL && n->is_LoadBarrierSlowReg()) {
 635       assert(c == node, "projections from step 1 should only be seen before macro expansion");
 636       return phi->in(2);
 637     }
 638   }
 639 
 640   return c;
 641 }
 642 
 643 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const {
 644   return type == T_OBJECT || type == T_ARRAY;
 645 }
 646 
 647 bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const {
 648   switch (opcode) {
 649     case Op_LoadBarrier:
 650       assert(0, "There should be no load barriers left");
 651     case Op_ZGetAndSetP:
 652     case Op_ZCompareAndExchangeP:
 653     case Op_ZCompareAndSwapP:
 654     case Op_ZWeakCompareAndSwapP:
 655     case Op_LoadBarrierSlowReg:
 656 #ifdef ASSERT
 657       if (VerifyOptoOopOffsets) {
 658         MemNode *mem = n->as_Mem();
 659         // Check to see if address types have grounded out somehow.
 660         const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
 661         ciInstanceKlass *k = tp->klass()->as_instance_klass();
 662         bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
 663         assert(!tp || oop_offset_is_sane, "");
 664       }
 665 #endif
 666       return true;
 667     default:
 668       return false;
 669   }
 670 }
 671 
 672 bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const {
 673   switch(opcode) {
 674     case Op_CallLeaf:
 675       if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
 676           n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) {
 677         mem_op = true;
 678         mem_addr_idx = TypeFunc::Parms + 1;
 679         return true;
 680       }
 681       return false;
 682     default:
 683       return false;
 684   }
 685 }
 686 
 687 bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
 688   switch(opcode) {
 689     case Op_ZCompareAndExchangeP:
 690     case Op_ZCompareAndSwapP:
 691     case Op_ZWeakCompareAndSwapP: {
 692       Node *mem = n->in(MemNode::Address);
 693       Node *keepalive = n->in(5);
 694       Node *pair1 = new BinaryNode(mem, keepalive);
 695 
 696       Node *newval = n->in(MemNode::ValueIn);
 697       Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
 698       Node *pair2 = new BinaryNode(oldval, newval);
 699 
 700       n->set_req(MemNode::Address, pair1);
 701       n->set_req(MemNode::ValueIn, pair2);
 702       n->del_req(5);
 703       n->del_req(LoadStoreConditionalNode::ExpectedIn);
 704       return true;
 705     }
 706     case Op_ZGetAndSetP: {
 707       Node *keepalive = n->in(4);
 708       Node *newval = n->in(MemNode::ValueIn);
 709       Node *pair = new BinaryNode(newval, keepalive);
 710       n->set_req(MemNode::ValueIn, pair);
 711       n->del_req(4);
 712       return true;
 713     }
 714 
 715     default:
 716       return false;
 717   }
 718 }
 719 
 720 // == Verification ==
 721 
 722 #ifdef ASSERT
 723 
 724 static bool look_for_barrier(Node* n, bool post_parse, VectorSet& visited) {
 725   if (visited.test_set(n->_idx)) {
 726     return true;
 727   }
 728 
 729   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 730     Node* u = n->fast_out(i);
 731     if (u->is_LoadBarrier()) {
 732     } else if ((u->is_Phi() || u->is_CMove()) && !post_parse) {
 733       if (!look_for_barrier(u, post_parse, visited)) {
 734         return false;
 735       }
 736     } else if (u->Opcode() == Op_EncodeP || u->Opcode() == Op_DecodeN) {
 737       if (!look_for_barrier(u, post_parse, visited)) {
 738         return false;
 739       }
 740     } else if (u->Opcode() != Op_SCMemProj) {
 741       tty->print("bad use"); u->dump();
 742       return false;
 743     }
 744   }
 745 
 746   return true;
 747 }
 748 
 749 void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
 750   switch(phase) {
 751     case BarrierSetC2::BeforeOptimize:
 752     case BarrierSetC2::BeforeLateInsertion:
 753       assert(state()->load_barrier_count() == 0, "No barriers inserted yet");
 754       break;
 755     case BarrierSetC2::BeforeMacroExpand:
 756       // Barrier placement should be set by now.
 757       verify_gc_barriers(false /*post_parse*/);
 758       break;
 759     case BarrierSetC2::BeforeCodeGen:
 760       // Barriers has been fully expanded.
 761       assert(state()->load_barrier_count() == 0, "No more macro barriers");
 762       break;
 763     default:
 764       assert(0, "Phase without verification");
 765   }
 766 }
 767 
 768 // post_parse implies that there might be load barriers without uses after parsing
 769 // That only applies when adding barriers at parse time.
 770 void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const {
 771   ZBarrierSetC2State* s = state();
 772   Compile* C = Compile::current();
 773   ResourceMark rm;
 774   VectorSet visited(Thread::current()->resource_area());
 775 
 776   for (int i = 0; i < s->load_barrier_count(); i++) {
 777     LoadBarrierNode* n = s->load_barrier_node(i);
 778 
 779     // The dominating barrier on the same address if it exists and
 780     // this barrier must not be applied on the value from the same
 781     // load otherwise the value is not reloaded before it's used the
 782     // second time.
 783     assert(n->in(LoadBarrierNode::Similar)->is_top() ||
 784            (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
 785             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Address) == n->in(LoadBarrierNode::Address) &&
 786             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)),
 787            "broken similar edge");
 788 
 789     assert(n->as_LoadBarrier()->has_true_uses(),
 790            "found unneeded load barrier");
 791 
 792     // Several load barrier nodes chained through their Similar edge
 793     // break the code that remove the barriers in final graph reshape.
 794     assert(n->in(LoadBarrierNode::Similar)->is_top() ||
 795            (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
 796             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Similar)->is_top()),
 797            "chain of Similar load barriers");
 798 
 799     if (!n->in(LoadBarrierNode::Similar)->is_top()) {
 800       ResourceMark rm;
 801       Unique_Node_List wq;
 802       Node* other = n->in(LoadBarrierNode::Similar)->in(0);
 803       wq.push(n);
 804       for (uint next = 0; next < wq.size(); ++next) {
 805         Node *nn = wq.at(next);
 806         assert(nn->is_CFG(), "");
 807         assert(!nn->is_SafePoint(), "");
 808 
 809         if (nn == other) {
 810           continue;
 811         }
 812 
 813         if (nn->is_Region()) {
 814           for (uint i = 1; i < nn->req(); i++) {
 815             Node* m = nn->in(i);
 816             if (m != NULL) {
 817               wq.push(m);
 818             }
 819           }
 820         } else {
 821           Node* m = nn->in(0);
 822           if (m != NULL) {
 823             wq.push(m);
 824           }
 825         }
 826       }
 827     }
 828   }
 829 }
 830 
 831 #endif // end verification code
 832 
 833 static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl);
 834 
 835 // This code is cloning all uses of a load that is between a call and the catch blocks,
 836 // to each use.
 837 
 838 static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) {
 839 
 840   if (!phase->has_ctrl(node)) {
 841     // This node is floating - doesn't need to be cloned.
 842     assert(node != start_ctrl, "check");
 843     return false;
 844   }
 845 
 846   Node* ctrl = phase->get_ctrl(node);
 847   if (ctrl != start_ctrl) {
 848     // We are in a successor block - the node is ok.
 849     return false; // Unwind
 850   }
 851 
 852   // Process successor nodes
 853   int outcnt = node->outcnt();
 854   for (int i = 0; i < outcnt; i++) {
 855     Node* n = node->raw_out(0);
 856     assert(!n->is_LoadBarrier(), "Sanity");
 857     // Calling recursively, visiting leafs first
 858     fixup_uses_in_catch(phase, start_ctrl, n);
 859   }
 860 
 861   // Now all successors are outside
 862   // - Clone this node to both successors
 863   int no_succs = node->outcnt();
 864   assert(!node->is_Store(), "Stores not expected here");
 865 
 866   // In some very rare cases a load that doesn't need a barrier will end up here
 867   // Treat it as a LoadP and the insertion of phis will be done correctly.
 868   if (node->is_Load()) {
 869     call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node));
 870   } else {
 871     for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) {
 872       Node* use = node->fast_out(i);
 873       Node* clone = node->clone();
 874       assert(clone->outcnt() == 0, "");
 875 
 876       assert(use->find_edge(node) != -1, "check");
 877       phase->igvn().rehash_node_delayed(use);
 878       use->replace_edge(node, clone);
 879 
 880       Node* new_ctrl;
 881       if (use->is_block_start()) {
 882         new_ctrl = use;
 883       } else if (use->is_CFG()) {
 884         new_ctrl = use->in(0);
 885         assert (new_ctrl != NULL, "");
 886       } else {
 887         new_ctrl = phase->get_ctrl(use);
 888       }
 889 
 890       phase->set_ctrl(clone, new_ctrl);
 891 
 892       if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx);
 893       phase->igvn().register_new_node_with_optimizer(clone);
 894       --i, --jmax;
 895     }
 896     assert(node->outcnt() == 0, "must be empty now");
 897 
 898     // Node node is dead.
 899     phase->igvn().remove_dead_node(node);
 900   }
 901   return true; // unwind - return if a use was processed
 902 }
 903 
 904 // Clone a load to a specific catch_proj
 905 static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) {
 906   Node* cloned_load = load->clone();
 907   cloned_load->set_req(0, catch_proj);      // set explicit control
 908   phase->set_ctrl(cloned_load, catch_proj); // update
 909   if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx);
 910   phase->igvn().register_new_node_with_optimizer(cloned_load);
 911   return cloned_load;
 912 }
 913 
 914 static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) {
 915   Node* region = node;
 916   while (!region->isa_Region()) {
 917     Node *up = phase->idom(region);
 918     assert(up != region, "Must not loop");
 919     assert(up != stop,   "Must not find original control");
 920     region = up;
 921   }
 922   return region;
 923 }
 924 
 925 // Clone this load to each catch block
 926 static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) {
 927   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
 928   phase->igvn().set_delay_transform(true);
 929 
 930   // Verify pre conditions
 931   assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj");
 932   assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch");
 933 
 934   if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) {
 935     if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx);
 936     return;
 937   }
 938 
 939   // Process the loads successor nodes - if any is between
 940   // the call and the catch blocks, they need to be cloned to.
 941   // This is done recursively
 942   int outcnt = load->outcnt();
 943   uint index = 0;
 944   for (int i = 0; i < outcnt; i++) {
 945     if (index < load->outcnt()) {
 946       Node *n = load->raw_out(index);
 947       assert(!n->is_LoadBarrier(), "Sanity");
 948       if (!fixup_uses_in_catch(phase, ctrl, n)) {
 949         // if no successor was cloned, progress to next out.
 950         index++;
 951       }
 952     }
 953   }
 954 
 955   // Now all the loads uses has been cloned down
 956   // Only thing left is to clone the loads, but they must end up
 957   // first in the catch blocks.
 958 
 959   // We clone the loads oo the catch blocks only when needed.
 960   // An array is used to map the catch blocks to each lazily cloned load.
 961   // In that way no extra unnecessary loads are cloned.
 962 
 963   // Any use dominated by original block must have an phi and a region added
 964 
 965   Node* catch_node = ctrl->raw_out(0);
 966   int number_of_catch_projs = catch_node->outcnt();
 967   Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
 968   Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
 969 
 970   // The phi_map is used to keep track of where phis have already been inserted
 971   int phi_map_len = phase->C->unique();
 972   Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
 973   Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
 974 
 975   for (unsigned int i = 0; i  < load->outcnt(); i++) {
 976     Node* load_use_control = NULL;
 977     Node* load_use = load->raw_out(i);
 978 
 979     if (phase->has_ctrl(load_use)) {
 980       load_use_control = phase->get_ctrl(load_use);
 981     } else {
 982       load_use_control = load_use->in(0);
 983     }
 984     assert(load_use_control != NULL, "sanity");
 985     if (trace) tty->print_cr("  Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx);
 986 
 987     // Some times the loads use is a phi. For them we need to determine from which catch block
 988     // the use is defined.
 989     bool load_use_is_phi = false;
 990     unsigned int load_use_phi_index = 0;
 991     Node* phi_ctrl = NULL;
 992     if (load_use->is_Phi()) {
 993       // Find phi input that matches load
 994       for (unsigned int u = 1; u < load_use->req(); u++) {
 995         if (load_use->in(u) == load) {
 996           load_use_is_phi = true;
 997           load_use_phi_index = u;
 998           assert(load_use->in(0)->is_Region(), "Region or broken");
 999           phi_ctrl = load_use->in(0)->in(u);
1000           assert(phi_ctrl->is_CFG(), "check");
1001           assert(phi_ctrl != load,   "check");
1002           break;
1003         }
1004       }
1005       assert(load_use_is_phi,        "must find");
1006       assert(load_use_phi_index > 0, "sanity");
1007     }
1008 
1009     // For each load use, see which catch projs dominates, create load clone lazily and reconnect
1010     bool found_dominating_catchproj = false;
1011     for (int c = 0; c < number_of_catch_projs; c++) {
1012       Node* catchproj = catch_node->raw_out(c);
1013       assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity");
1014 
1015       if (!phase->is_dominator(catchproj, load_use_control)) {
1016         if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) {
1017           // The loads use is local to the catchproj.
1018           // fall out and replace load with catch-local load clone.
1019         } else {
1020           continue;
1021         }
1022       }
1023       assert(!found_dominating_catchproj, "Max one should match");
1024 
1025       // Clone loads to catch projs
1026       Node* load_clone = proj_to_load_mapping[c];
1027       if (load_clone == NULL) {
1028         load_clone = clone_load_to_catchproj(phase, load, catchproj);
1029         proj_to_load_mapping[c] = load_clone;
1030       }
1031       phase->igvn().rehash_node_delayed(load_use);
1032 
1033       if (load_use_is_phi) {
1034         // phis are special - the load is defined from a specific control flow
1035         load_use->set_req(load_use_phi_index, load_clone);
1036       } else {
1037         // Multipe edges can be replaced at once - on calls for example
1038         load_use->replace_edge(load, load_clone);
1039       }
1040       --i; // more than one edge can have been removed, but the next is in later iterations
1041 
1042       // We could break the for-loop after finding a dominating match.
1043       // But keep iterating to catch any bad idom early.
1044       found_dominating_catchproj = true;
1045     }
1046 
1047     // We found no single catchproj that dominated the use - The use is at a point after
1048     // where control flow from multiple catch projs have merged. We will have to create
1049     // phi nodes before the use and tie the output from the cloned loads together. It
1050     // can be a single phi or a number of chained phis, depending on control flow
1051     if (!found_dominating_catchproj) {
1052 
1053       // Use phi-control if use is a phi
1054       if (load_use_is_phi) {
1055         load_use_control = phi_ctrl;
1056       }
1057       assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator");
1058 
1059       // Clone a load on all paths
1060       for (int c = 0; c < number_of_catch_projs; c++) {
1061         Node* catchproj = catch_node->raw_out(c);
1062         Node* load_clone = proj_to_load_mapping[c];
1063         if (load_clone == NULL) {
1064           load_clone = clone_load_to_catchproj(phase, load, catchproj);
1065           proj_to_load_mapping[c] = load_clone;
1066         }
1067       }
1068 
1069       // Move up dominator tree from use until dom front is reached
1070       Node* next_region = get_dominating_region(phase, load_use_control, ctrl);
1071       while (phase->idom(next_region) != catch_node) {
1072         next_region = phase->idom(next_region);
1073         if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx);
1074       }
1075       assert(phase->is_dominator(catch_node, next_region), "Sanity");
1076 
1077       // Create or reuse phi node that collect all cloned loads and feed it to the use.
1078       Node* test_phi = phi_map[next_region->_idx];
1079       if ((test_phi != NULL) && test_phi->is_Phi()) {
1080         // Reuse an already created phi
1081         if (trace) tty->print_cr("    Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx);
1082         phase->igvn().rehash_node_delayed(load_use);
1083         load_use->replace_edge(load, test_phi);
1084         // Now this use is done
1085       } else {
1086         // Otherwise we need to create one or more phis
1087         PhiNode* next_phi = new PhiNode(next_region, load->type());
1088         phi_map[next_region->_idx] = next_phi; // cache new phi
1089         phase->igvn().rehash_node_delayed(load_use);
1090         load_use->replace_edge(load, next_phi);
1091 
1092         int dominators_of_region = 0;
1093         do {
1094           // New phi, connect to region and add all loads as in.
1095           Node* region = next_region;
1096           assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes");
1097           PhiNode* new_phi = next_phi;
1098 
1099           if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx);
1100 
1101           // Need to add all cloned loads to the phi, taking care that the right path is matched
1102           dominators_of_region = 0; // reset for new region
1103           for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) {
1104             Node* region_pred = region->in(reg_i);
1105             assert(region_pred->is_CFG(), "check");
1106             bool pred_has_dominator = false;
1107             for (int c = 0; c < number_of_catch_projs; c++) {
1108               Node* catchproj = catch_node->raw_out(c);
1109               if (phase->is_dominator(catchproj, region_pred)) {
1110                 new_phi->set_req(reg_i, proj_to_load_mapping[c]);
1111                 if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx);
1112                 pred_has_dominator = true;
1113                 dominators_of_region++;
1114                 break;
1115               }
1116             }
1117 
1118             // Sometimes we need to chain several phis.
1119             if (!pred_has_dominator) {
1120               assert(dominators_of_region <= 1, "More than one region can't require extra phi");
1121               if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx);
1122               // Continue search on on this region_pred
1123               // - walk up to next region
1124               // - create a new phi and connect to first new_phi
1125               next_region = get_dominating_region(phase, region_pred, ctrl);
1126 
1127               // Lookup if there already is a phi, create a new otherwise
1128               Node* test_phi = phi_map[next_region->_idx];
1129               if ((test_phi != NULL) && test_phi->is_Phi()) {
1130                 next_phi = test_phi->isa_Phi();
1131                 dominators_of_region++; // record that a match was found and that we are done
1132                 if (trace) tty->print_cr("    Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx);
1133               } else {
1134                 next_phi = new PhiNode(next_region, load->type());
1135                 phi_map[next_region->_idx] = next_phi;
1136               }
1137               new_phi->set_req(reg_i, next_phi);
1138             }
1139           }
1140 
1141           new_phi->set_req(0, region);
1142           phase->igvn().register_new_node_with_optimizer(new_phi);
1143           phase->set_ctrl(new_phi, region);
1144 
1145           assert(dominators_of_region != 0, "Must have found one this iteration");
1146         } while (dominators_of_region == 1);
1147       }
1148       --i;
1149     }
1150   } // end of loop over uses
1151 
1152   assert(load->outcnt() == 0, "All uses should be handled");
1153   phase->igvn().remove_dead_node(load);
1154   phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx);
1155 
1156   // Now we should be home
1157   phase->igvn().set_delay_transform(false);
1158 }
1159 
1160 // Sort out the loads that are between a call ant its catch blocks
1161 static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load) {
1162   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1163 
1164   Node* ctrl = phase->get_ctrl(load);
1165   if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) {
1166     return;
1167   }
1168 
1169   Node* catch_node = ctrl->isa_Proj()->raw_out(0);
1170   if (catch_node->is_Catch()) {
1171     if (catch_node->outcnt() > 1) {
1172       call_catch_cleanup_one(phase, load, ctrl);
1173     } else {
1174       if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx);
1175     }
1176   }
1177 }
1178 
1179 void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const {
1180   PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion);
1181   if (C->failing())  return;
1182 }
1183 
1184 bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const {
1185 
1186   if (mode == LoopOptsZBarrierInsertion) {
1187     // First make sure all loads between call and catch are moved to the catch block
1188     clean_catch_blocks(phase);
1189 
1190     // Then expand barriers on all loads
1191     insert_load_barriers(phase);
1192 
1193     // Handle all Unsafe that need barriers.
1194     insert_barriers_on_unsafe(phase);
1195 
1196     phase->C->clear_major_progress();
1197     return true;
1198   } else {
1199     return false;
1200   }
1201 }
1202 
1203 static bool can_simplify_cas(LoadStoreNode* node) {
1204   if (node->isa_LoadStoreConditional()) {
1205     Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn);
1206     return (expected_in->get_ptr_type() == TypePtr::NULL_PTR);
1207   } else {
1208     return false;
1209   }
1210 }
1211 
1212 static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) {
1213 
1214   Compile *C = phase->C;
1215   PhaseIterGVN &igvn = phase->igvn();
1216   LoadStoreNode* zclone = NULL;
1217   bool is_weak = false;
1218 
1219   Node *in_ctrl = old_node->in(MemNode::Control);
1220   Node *in_mem  = old_node->in(MemNode::Memory);
1221   Node *in_adr  = old_node->in(MemNode::Address);
1222   Node *in_val  = old_node->in(MemNode::ValueIn);
1223   const TypePtr *adr_type = old_node->adr_type();
1224   const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding
1225 
1226   switch (old_node->Opcode()) {
1227     case Op_CompareAndExchangeP: {
1228       zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1229               adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order());
1230       load_type = old_node->bottom_type()->is_ptr();
1231       break;
1232     }
1233     case Op_WeakCompareAndSwapP: {
1234       if (can_simplify_cas(old_node)) {
1235         break;
1236       }
1237       is_weak  = true;
1238       zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1239               ((CompareAndSwapNode*)old_node)->order());
1240       adr_type = TypePtr::BOTTOM;
1241       break;
1242     }
1243     case Op_CompareAndSwapP: {
1244       if (can_simplify_cas(old_node)) {
1245         break;
1246       }
1247       zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1248               ((CompareAndSwapNode*)old_node)->order());
1249       adr_type = TypePtr::BOTTOM;
1250       break;
1251     }
1252     case Op_GetAndSetP: {
1253       zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type());
1254       load_type = old_node->bottom_type()->is_ptr();
1255       break;
1256     }
1257   }
1258   if (zclone != NULL) {
1259     igvn.register_new_node_with_optimizer(zclone, old_node);
1260 
1261     // Make load
1262     LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered,
1263                                     LoadNode::DependsOnlyOnTest);
1264     load_set_expanded_barrier(load);
1265     igvn.register_new_node_with_optimizer(load);
1266     igvn.replace_node(old_node, zclone);
1267 
1268     Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, is_weak);
1269     Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1270     Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1271 
1272     igvn.register_new_node_with_optimizer(barrier);
1273     igvn.register_new_node_with_optimizer(barrier_val);
1274     igvn.register_new_node_with_optimizer(barrier_ctrl);
1275 
1276     // loop over all of in_ctrl usages and move to barrier_ctrl
1277     for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) {
1278       Node *use = in_ctrl->last_out(i);
1279       uint l;
1280       for (l = 0; use->in(l) != in_ctrl; l++) {}
1281       igvn.replace_input_of(use, l, barrier_ctrl);
1282     }
1283 
1284     load->set_req(MemNode::Control, in_ctrl);
1285     barrier->set_req(LoadBarrierNode::Control, in_ctrl);
1286     zclone->add_req(barrier_val); // add req as keep alive.
1287 
1288     C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx);
1289   }
1290 }
1291 
1292 void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const {
1293   Compile *C = phase->C;
1294   PhaseIterGVN &igvn = phase->igvn();
1295   uint new_ids = C->unique();
1296   VectorSet visited(Thread::current()->resource_area());
1297   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1298   nodeStack.push(C->root());
1299   visited.test_set(C->root()->_idx);
1300 
1301   // Traverse all nodes, visit all unsafe ops that require a barrier
1302   while (nodeStack.length() > 0) {
1303     Node *n = nodeStack.pop();
1304 
1305     bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1306     if (is_old_node) {
1307       if (n->is_LoadStore()) {
1308         LoadStoreNode* lsn = n->as_LoadStore();
1309         if (lsn->has_barrier()) {
1310           BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type();
1311           assert ((bt == T_OBJECT || bt == T_ARRAY), "Sanity test");
1312           insert_barrier_before_unsafe(phase, lsn);
1313         }
1314       }
1315     }
1316     for (uint i = 0; i < n->len(); i++) {
1317       if (n->in(i)) {
1318         if (!visited.test_set(n->in(i)->_idx)) {
1319           nodeStack.push(n->in(i));
1320         }
1321       }
1322     }
1323   }
1324 
1325   igvn.optimize();
1326   C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2);
1327 }
1328 
1329 // The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for
1330 // splicing in load barrier nodes.
1331 //
1332 // The problem is that we might have instructions between a call and its catch nodes.
1333 // (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in
1334 // already scheduled blocks.) We can't have loads that require barriers there,
1335 // because we need to splice in new control flow, and that would violate the IR.
1336 //
1337 // clean_catch_blocks find all Loads that require a barrier and clone them and any
1338 // dependent instructions to each use. The loads must be in the beginning of the catch block
1339 // before any store.
1340 //
1341 // Sometimes the loads use will be at a place dominated by all catch blocks, then we need
1342 // a load in each catch block, and a Phi at the dominated use.
1343 
1344 void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase) const {
1345 
1346   Compile *C = phase->C;
1347   uint new_ids = C->unique();
1348   PhaseIterGVN &igvn = phase->igvn();
1349   VectorSet visited(Thread::current()->resource_area());
1350   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1351   nodeStack.push(C->root());
1352   visited.test_set(C->root()->_idx);
1353 
1354   // Traverse all nodes, visit all loads that require a barrier
1355   while(nodeStack.length() > 0) {
1356     Node *n = nodeStack.pop();
1357 
1358     for (uint i = 0; i < n->len(); i++) {
1359       if (n->in(i)) {
1360         if (!visited.test_set(n->in(i)->_idx)) {
1361           nodeStack.push(n->in(i));
1362         }
1363       }
1364     }
1365 
1366     bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1367     if (n->is_Load() && is_old_node) {
1368       LoadNode* load = n->isa_Load();
1369       // only care about loads that will have a barrier
1370       if (load_require_barrier(load)) {
1371         process_catch_cleanup_candidate(phase, load);
1372       }
1373     }
1374   }
1375 
1376   C->print_method(PHASE_CALL_CATCH_CLEANUP, 2);
1377 }
1378 
1379 class DomDepthCompareClosure : public CompareClosure<LoadNode*> {
1380   PhaseIdealLoop* _phase;
1381 
1382 public:
1383   DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { }
1384 
1385   int do_compare(LoadNode* const &n1, LoadNode* const &n2) {
1386     int d1 = _phase->dom_depth(_phase->get_ctrl(n1));
1387     int d2 = _phase->dom_depth(_phase->get_ctrl(n2));
1388     if (d1 == d2) {
1389       // Compare index if the depth is the same, ensures all entries are unique.
1390       return n1->_idx - n2->_idx;
1391     } else {
1392       return d2 - d1;
1393     }
1394   }
1395 };
1396 
1397 // Traverse graph and add all loadPs to list, sorted by dom depth
1398 void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) {
1399 
1400   VectorSet visited(Thread::current()->resource_area());
1401   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1402   DomDepthCompareClosure ddcc(phase);
1403 
1404   nodeStack.push(phase->C->root());
1405   while(nodeStack.length() > 0) {
1406     Node *n = nodeStack.pop();
1407     if (visited.test(n->_idx)) {
1408       continue;
1409     }
1410 
1411     if (n->isa_Load()) {
1412       LoadNode *load = n->as_Load();
1413       if (load_require_barrier(load)) {
1414         assert(phase->get_ctrl(load) != NULL, "sanity");
1415         assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity");
1416         loadList->insert_sorted(&ddcc, load);
1417       }
1418     }
1419 
1420     visited.set(n->_idx);
1421     for (uint i = 0; i < n->req(); i++) {
1422       if (n->in(i)) {
1423         if (!visited.test(n->in(i)->_idx)) {
1424           nodeStack.push(n->in(i));
1425         }
1426       }
1427     }
1428   }
1429 }
1430 
1431 // Add LoadBarriers to all LoadPs
1432 void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const {
1433 
1434   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1435   GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL);
1436   gather_loadnodes_sorted(phase, &loadList);
1437 
1438   PhaseIterGVN &igvn = phase->igvn();
1439   int count = 0;
1440 
1441   for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) {
1442     LoadNode *load = *loadIter;
1443 
1444     if (load_has_expanded_barrier(load)) {
1445       continue;
1446     }
1447 
1448     do {
1449       // Insert a barrier on a loadP
1450       // if another load is found that needs to be expanded first, retry on that one
1451       LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load));
1452       while (result != NULL) {
1453         result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result));
1454       }
1455     } while (!load_has_expanded_barrier(load));
1456   }
1457 
1458   phase->C->print_method(PHASE_INSERT_BARRIER, 2);
1459 }
1460 
1461 void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) {
1462   // push all stores on the same mem, that can_alias
1463   // Any load found must be handled first
1464   PhaseIterGVN &igvn = phase->igvn();
1465   int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type());
1466 
1467   Node *mem = start_load->in(1);
1468   for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) {
1469     Node *mem_use = mem->fast_out(u);
1470 
1471     if (mem_use == start_load) continue;
1472     if (!mem_use->is_Store()) continue;
1473     if (!phase->has_ctrl(mem_use)) continue;
1474     if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue;
1475 
1476     // add any aliasing store in this block
1477     StoreNode *store = mem_use->isa_Store();
1478     const TypePtr *adr_type = store->adr_type();
1479     if (igvn.C->can_alias(adr_type, load_alias_idx)) {
1480       nodestack.push(store, 0);
1481     }
1482   }
1483 }
1484 
1485 LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const {
1486   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1487   PhaseIterGVN &igvn = phase->igvn();
1488 
1489   // Check for other loadPs at the same loop depth that is reachable by a DFS
1490   // - if found - return it. It needs to be inserted first
1491   // - otherwise proceed and insert barrier
1492 
1493   VectorSet visited(Thread::current()->resource_area());
1494   Node_Stack nodestack(100);
1495 
1496   nodestack.push(start_load, 0);
1497   push_antidependent_stores(phase, nodestack, start_load);
1498 
1499   while(!nodestack.is_empty()) {
1500     Node* n = nodestack.node(); // peek
1501     nodestack.pop();
1502     if (visited.test(n->_idx)) {
1503       continue;
1504     }
1505 
1506     if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) {
1507       // Found another load that needs a barrier in the same block. Must expand later loads first.
1508       if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx);
1509       return n->as_Load(); // return node that should be expanded first
1510     }
1511 
1512     if (!phase->has_ctrl(n)) continue;
1513     if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue;
1514     if (n->is_Phi()) continue;
1515 
1516     visited.set(n->_idx);
1517     // push all children
1518     for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) {
1519       Node* c = n->fast_out(ii);
1520       if (c != NULL) {
1521         nodestack.push(c, 0);
1522       }
1523     }
1524   }
1525 
1526   insert_one_loadbarrier_inner(phase, start_load, ctrl, visited);
1527   return NULL;
1528 }
1529 
1530 void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const {
1531   PhaseIterGVN &igvn = phase->igvn();
1532   Compile* C = igvn.C;
1533   bool trace = C->directive()->ZTraceLoadBarriersOption;
1534 
1535   // create barrier
1536   Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load));
1537   Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1538   Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1539 
1540   if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx);
1541 
1542   // Splice control
1543   // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end.
1544   // - If control successor is a catch, step over to next.
1545   Node* ctrl_succ = NULL;
1546   for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) {
1547     Node* tmp = ctrl->fast_out(j);
1548 
1549     // - CFG nodes is the ones we are going to splice (1 only!)
1550     // - Phi nodes will continue to hang from the region node!
1551     // - self loops should be skipped
1552     if (tmp->is_Phi() || tmp == ctrl) {
1553       continue;
1554     }
1555 
1556     if (tmp->is_CFG()) {
1557       assert(ctrl_succ == NULL, "There can be only one");
1558       ctrl_succ = tmp;
1559       continue;
1560     }
1561   }
1562 
1563   // Now splice control
1564   assert(ctrl_succ != load, "sanity");
1565   assert(ctrl_succ != NULL, "Broken IR");
1566   bool found = false;
1567   for(uint k = 0; k < ctrl_succ->req(); k++) {
1568     if (ctrl_succ->in(k) == ctrl) {
1569       assert(!found, "sanity");
1570       if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx);
1571       igvn.replace_input_of(ctrl_succ, k, barrier_ctrl);
1572       found = true;
1573       k--;
1574     }
1575   }
1576 
1577   // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead
1578   for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) {
1579     Node* tmp = ctrl->fast_out(r);
1580     if (visited2.test(tmp->_idx) && (tmp != load)) {
1581       if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx);
1582       igvn.replace_input_of(tmp, 0, barrier_ctrl);
1583       --r; --imax;
1584     }
1585   }
1586 
1587   // Move the loads user to the barrier
1588   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
1589     Node* u = load->fast_out(i);
1590     if (u->isa_LoadBarrier()) {
1591       continue;
1592     }
1593 
1594     // find correct input  - replace with iterator?
1595     for(uint j = 0; j < u->req(); j++) {
1596       if (u->in(j) == load) {
1597         igvn.replace_input_of(u, j, barrier_val);
1598         --i; --imax; // Adjust the iterator of the *outer* loop
1599         break; // some nodes (calls) might have several uses from the same node
1600       }
1601     }
1602   }
1603 
1604   // Connect barrier to load and control
1605   barrier->set_req(LoadBarrierNode::Oop, load);
1606   barrier->set_req(LoadBarrierNode::Control, ctrl);
1607 
1608   igvn.rehash_node_delayed(load);
1609   igvn.register_new_node_with_optimizer(barrier);
1610   igvn.register_new_node_with_optimizer(barrier_val);
1611   igvn.register_new_node_with_optimizer(barrier_ctrl);
1612   load_set_expanded_barrier(load);
1613 
1614   C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx);
1615 }
1616 
1617 // The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check.
1618 // The bad_mask address if of type TypeRawPtr, but that will alias
1619 // InitializeNodes until the type system is expanded.
1620 bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const {
1621   MachNode* mnode = node->as_Mach();
1622   if (mnode != NULL) {
1623     intptr_t offset = 0;
1624     const TypePtr *adr_type2 = NULL;
1625     const Node* base = mnode->get_base_and_disp(offset, adr_type2);
1626     if ((base != NULL) &&
1627         (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) &&
1628         (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) {
1629       return false;
1630     }
1631   }
1632   return true;
1633 }