1 /*
   2  * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/barrierSet.hpp"
  27 #include "gc/shared/c2/barrierSetC2.hpp"
  28 #include "memory/allocation.inline.hpp"
  29 #include "memory/resourceArea.hpp"
  30 #include "opto/addnode.hpp"
  31 #include "opto/callnode.hpp"
  32 #include "opto/castnode.hpp"
  33 #include "opto/connode.hpp"
  34 #include "opto/castnode.hpp"
  35 #include "opto/divnode.hpp"
  36 #include "opto/inlinetypenode.hpp"
  37 #include "opto/loopnode.hpp"
  38 #include "opto/matcher.hpp"
  39 #include "opto/mulnode.hpp"
  40 #include "opto/movenode.hpp"
  41 #include "opto/opaquenode.hpp"
  42 #include "opto/rootnode.hpp"
  43 #include "opto/subnode.hpp"
  44 #include "opto/subtypenode.hpp"
  45 #include "utilities/macros.hpp"
  46 
  47 //=============================================================================
  48 //------------------------------split_thru_phi---------------------------------
  49 // Split Node 'n' through merge point if there is enough win.
  50 Node* PhaseIdealLoop::split_thru_phi(Node* n, Node* region, int policy) {
  51   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  52     // ConvI2L may have type information on it which is unsafe to push up
  53     // so disable this for now
  54     return NULL;
  55   }
  56 
  57   // Splitting range check CastIIs through a loop induction Phi can
  58   // cause new Phis to be created that are left unrelated to the loop
  59   // induction Phi and prevent optimizations (vectorization)
  60   if (n->Opcode() == Op_CastII && region->is_CountedLoop() &&
  61       n->in(1) == region->as_CountedLoop()->phi()) {
  62     return NULL;
  63   }
  64 
  65   // Inline types should not be split through Phis because they cannot be merged
  66   // through Phi nodes but each value input needs to be merged individually.
  67   if (n->is_InlineType()) {
  68     return NULL;
  69   }
  70 
  71   // Bail out if 'n' is a Div or Mod node whose zero check was removed earlier (i.e. control is NULL) and its divisor is an induction variable
  72   // phi p of a trip-counted (integer) loop whose inputs could be zero (include zero in their type range). p could have a more precise type
  73   // range that does not necessarily include all values of its inputs. Since each of these inputs will be a divisor of the newly cloned nodes
  74   // of 'n', we need to bail out of one of these divisors could be zero (zero in its type range).
  75   if ((n->Opcode() == Op_DivI || n->Opcode() == Op_ModI) && n->in(0) == NULL
  76       && region->is_CountedLoop() && n->in(2) == region->as_CountedLoop()->phi()) {
  77     Node* phi = region->as_CountedLoop()->phi();
  78     for (uint i = 1; i < phi->req(); i++) {
  79       if (_igvn.type(phi->in(i))->filter_speculative(TypeInt::ZERO) != Type::TOP) {
  80         // Zero could be a possible value but we already removed the zero check. Bail out to avoid a possible division by zero at a later point.
  81         return NULL;
  82       }
  83     }
  84   }
  85 
  86   int wins = 0;
  87   assert(!n->is_CFG(), "");
  88   assert(region->is_Region(), "");
  89 
  90   const Type* type = n->bottom_type();
  91   const TypeOopPtr* t_oop = _igvn.type(n)->isa_oopptr();
  92   Node* phi;
  93   if (t_oop != NULL && t_oop->is_known_instance_field()) {
  94     int iid    = t_oop->instance_id();
  95     int index  = C->get_alias_index(t_oop);
  96     int offset = t_oop->offset();
  97     phi = new PhiNode(region, type, NULL, iid, index, offset);
  98   } else {
  99     phi = PhiNode::make_blank(region, n);
 100   }
 101   uint old_unique = C->unique();
 102   for (uint i = 1; i < region->req(); i++) {
 103     Node* x;
 104     Node* the_clone = NULL;
 105     if (region->in(i) == C->top()) {
 106       x = C->top();             // Dead path?  Use a dead data op
 107     } else {
 108       x = n->clone();           // Else clone up the data op
 109       the_clone = x;            // Remember for possible deletion.
 110       // Alter data node to use pre-phi inputs
 111       if (n->in(0) == region)
 112         x->set_req( 0, region->in(i) );
 113       for (uint j = 1; j < n->req(); j++) {
 114         Node* in = n->in(j);
 115         if (in->is_Phi() && in->in(0) == region)
 116           x->set_req(j, in->in(i)); // Use pre-Phi input for the clone
 117       }
 118     }
 119     // Check for a 'win' on some paths
 120     const Type* t = x->Value(&_igvn);
 121 
 122     bool singleton = t->singleton();
 123 
 124     // A TOP singleton indicates that there are no possible values incoming
 125     // along a particular edge. In most cases, this is OK, and the Phi will
 126     // be eliminated later in an Ideal call. However, we can't allow this to
 127     // happen if the singleton occurs on loop entry, as the elimination of
 128     // the PhiNode may cause the resulting node to migrate back to a previous
 129     // loop iteration.
 130     if (singleton && t == Type::TOP) {
 131       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
 132       // irreducible loop may not be indicated by an affirmative is_Loop());
 133       // therefore, the only top we can split thru a phi is on a backedge of
 134       // a loop.
 135       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
 136     }
 137 
 138     if (singleton) {
 139       wins++;
 140       x = ((PhaseGVN&)_igvn).makecon(t);
 141     } else {
 142       // We now call Identity to try to simplify the cloned node.
 143       // Note that some Identity methods call phase->type(this).
 144       // Make sure that the type array is big enough for
 145       // our new node, even though we may throw the node away.
 146       // (Note: This tweaking with igvn only works because x is a new node.)
 147       _igvn.set_type(x, t);
 148       // If x is a TypeNode, capture any more-precise type permanently into Node
 149       // otherwise it will be not updated during igvn->transform since
 150       // igvn->type(x) is set to x->Value() already.
 151       x->raise_bottom_type(t);
 152       Node* y = x->Identity(&_igvn);
 153       if (y != x) {
 154         wins++;
 155         x = y;
 156       } else {
 157         y = _igvn.hash_find(x);
 158         if (y) {
 159           wins++;
 160           x = y;
 161         } else {
 162           // Else x is a new node we are keeping
 163           // We do not need register_new_node_with_optimizer
 164           // because set_type has already been called.
 165           _igvn._worklist.push(x);
 166         }
 167       }
 168     }
 169     if (x != the_clone && the_clone != NULL)
 170       _igvn.remove_dead_node(the_clone);
 171     phi->set_req( i, x );
 172   }
 173   // Too few wins?
 174   if (wins <= policy) {
 175     _igvn.remove_dead_node(phi);
 176     return NULL;
 177   }
 178 
 179   // Record Phi
 180   register_new_node( phi, region );
 181 
 182   for (uint i2 = 1; i2 < phi->req(); i2++) {
 183     Node *x = phi->in(i2);
 184     // If we commoned up the cloned 'x' with another existing Node,
 185     // the existing Node picks up a new use.  We need to make the
 186     // existing Node occur higher up so it dominates its uses.
 187     Node *old_ctrl;
 188     IdealLoopTree *old_loop;
 189 
 190     if (x->is_Con()) {
 191       // Constant's control is always root.
 192       set_ctrl(x, C->root());
 193       continue;
 194     }
 195     // The occasional new node
 196     if (x->_idx >= old_unique) {     // Found a new, unplaced node?
 197       old_ctrl = NULL;
 198       old_loop = NULL;               // Not in any prior loop
 199     } else {
 200       old_ctrl = get_ctrl(x);
 201       old_loop = get_loop(old_ctrl); // Get prior loop
 202     }
 203     // New late point must dominate new use
 204     Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
 205     if (new_ctrl == old_ctrl) // Nothing is changed
 206       continue;
 207 
 208     IdealLoopTree *new_loop = get_loop(new_ctrl);
 209 
 210     // Don't move x into a loop if its uses are
 211     // outside of loop. Otherwise x will be cloned
 212     // for each use outside of this loop.
 213     IdealLoopTree *use_loop = get_loop(region);
 214     if (!new_loop->is_member(use_loop) &&
 215         (old_loop == NULL || !new_loop->is_member(old_loop))) {
 216       // Take early control, later control will be recalculated
 217       // during next iteration of loop optimizations.
 218       new_ctrl = get_early_ctrl(x);
 219       new_loop = get_loop(new_ctrl);
 220     }
 221     // Set new location
 222     set_ctrl(x, new_ctrl);
 223     // If changing loop bodies, see if we need to collect into new body
 224     if (old_loop != new_loop) {
 225       if (old_loop && !old_loop->_child)
 226         old_loop->_body.yank(x);
 227       if (!new_loop->_child)
 228         new_loop->_body.push(x);  // Collect body info
 229     }
 230   }
 231 
 232   return phi;
 233 }
 234 
 235 //------------------------------dominated_by------------------------------------
 236 // Replace the dominated test with an obvious true or false.  Place it on the
 237 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 238 // live path up to the dominating control.
 239 void PhaseIdealLoop::dominated_by(IfProjNode* prevdom, IfNode* iff, bool flip, bool exclude_loop_predicate) {
 240   if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
 241 
 242   // prevdom is the dominating projection of the dominating test.
 243   assert(iff->Opcode() == Op_If ||
 244          iff->Opcode() == Op_CountedLoopEnd ||
 245          iff->Opcode() == Op_LongCountedLoopEnd ||
 246          iff->Opcode() == Op_RangeCheck,
 247         "Check this code when new subtype is added");
 248 
 249   int pop = prevdom->Opcode();
 250   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
 251   if (flip) {
 252     if (pop == Op_IfTrue)
 253       pop = Op_IfFalse;
 254     else
 255       pop = Op_IfTrue;
 256   }
 257   // 'con' is set to true or false to kill the dominated test.
 258   Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
 259   set_ctrl(con, C->root()); // Constant gets a new use
 260   // Hack the dominated test
 261   _igvn.replace_input_of(iff, 1, con);
 262 
 263   // If I dont have a reachable TRUE and FALSE path following the IfNode then
 264   // I can assume this path reaches an infinite loop.  In this case it's not
 265   // important to optimize the data Nodes - either the whole compilation will
 266   // be tossed or this path (and all data Nodes) will go dead.
 267   if (iff->outcnt() != 2) return;
 268 
 269   // Make control-dependent data Nodes on the live path (path that will remain
 270   // once the dominated IF is removed) become control-dependent on the
 271   // dominating projection.
 272   Node* dp = iff->proj_out_or_null(pop == Op_IfTrue);
 273 
 274   // Loop predicates may have depending checks which should not
 275   // be skipped. For example, range check predicate has two checks
 276   // for lower and upper bounds.
 277   if (dp == NULL)
 278     return;
 279 
 280   ProjNode* dp_proj  = dp->as_Proj();
 281   ProjNode* unc_proj = iff->proj_out(1 - dp_proj->_con)->as_Proj();
 282   if (exclude_loop_predicate &&
 283       (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
 284        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL ||
 285        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
 286     // If this is a range check (IfNode::is_range_check), do not
 287     // reorder because Compile::allow_range_check_smearing might have
 288     // changed the check.
 289     return; // Let IGVN transformation change control dependence.
 290   }
 291 
 292   IdealLoopTree* old_loop = get_loop(dp);
 293 
 294   for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
 295     Node* cd = dp->fast_out(i); // Control-dependent node
 296     // Do not rewire Div and Mod nodes which could have a zero divisor to avoid skipping their zero check.
 297     if (cd->depends_only_on_test() && _igvn.no_dependent_zero_check(cd)) {
 298       assert(cd->in(0) == dp, "");
 299       _igvn.replace_input_of(cd, 0, prevdom);
 300       set_early_ctrl(cd, false);
 301       IdealLoopTree* new_loop = get_loop(get_ctrl(cd));
 302       if (old_loop != new_loop) {
 303         if (!old_loop->_child) {
 304           old_loop->_body.yank(cd);
 305         }
 306         if (!new_loop->_child) {
 307           new_loop->_body.push(cd);
 308         }
 309       }
 310       --i;
 311       --imax;
 312     }
 313   }
 314 }
 315 
 316 //------------------------------has_local_phi_input----------------------------
 317 // Return TRUE if 'n' has Phi inputs from its local block and no other
 318 // block-local inputs (all non-local-phi inputs come from earlier blocks)
 319 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
 320   Node *n_ctrl = get_ctrl(n);
 321   // See if some inputs come from a Phi in this block, or from before
 322   // this block.
 323   uint i;
 324   for( i = 1; i < n->req(); i++ ) {
 325     Node *phi = n->in(i);
 326     if( phi->is_Phi() && phi->in(0) == n_ctrl )
 327       break;
 328   }
 329   if( i >= n->req() )
 330     return NULL;                // No Phi inputs; nowhere to clone thru
 331 
 332   // Check for inputs created between 'n' and the Phi input.  These
 333   // must split as well; they have already been given the chance
 334   // (courtesy of a post-order visit) and since they did not we must
 335   // recover the 'cost' of splitting them by being very profitable
 336   // when splitting 'n'.  Since this is unlikely we simply give up.
 337   for( i = 1; i < n->req(); i++ ) {
 338     Node *m = n->in(i);
 339     if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
 340       // We allow the special case of AddP's with no local inputs.
 341       // This allows us to split-up address expressions.
 342       if (m->is_AddP() &&
 343           get_ctrl(m->in(AddPNode::Base)) != n_ctrl &&
 344           get_ctrl(m->in(AddPNode::Address)) != n_ctrl &&
 345           get_ctrl(m->in(AddPNode::Offset)) != n_ctrl) {
 346         // Move the AddP up to the dominating point. That's fine because control of m's inputs
 347         // must dominate get_ctrl(m) == n_ctrl and we just checked that the input controls are != n_ctrl.
 348         Node* c = find_non_split_ctrl(idom(n_ctrl));
 349         if (c->is_OuterStripMinedLoop()) {
 350           c->as_Loop()->verify_strip_mined(1);
 351           c = c->in(LoopNode::EntryControl);
 352         }
 353         set_ctrl_and_loop(m, c);
 354         continue;
 355       }
 356       return NULL;
 357     }
 358     assert(n->is_Phi() || m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
 359   }
 360 
 361   return n_ctrl;
 362 }
 363 
 364 // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
 365 Node* PhaseIdealLoop::remix_address_expressions_add_left_shift(Node* n, IdealLoopTree* n_loop, Node* n_ctrl, BasicType bt) {
 366   assert(bt == T_INT || bt == T_LONG, "only for integers");
 367   int n_op = n->Opcode();
 368 
 369   if (n_op == Op_LShift(bt)) {
 370     // Scale is loop invariant
 371     Node* scale = n->in(2);
 372     Node* scale_ctrl = get_ctrl(scale);
 373     IdealLoopTree* scale_loop = get_loop(scale_ctrl);
 374     if (n_loop == scale_loop || !scale_loop->is_member(n_loop)) {
 375       return NULL;
 376     }
 377     const TypeInt* scale_t = scale->bottom_type()->isa_int();
 378     if (scale_t != NULL && scale_t->is_con() && scale_t->get_con() >= 16) {
 379       return NULL;              // Dont bother with byte/short masking
 380     }
 381     // Add must vary with loop (else shift would be loop-invariant)
 382     Node* add = n->in(1);
 383     Node* add_ctrl = get_ctrl(add);
 384     IdealLoopTree* add_loop = get_loop(add_ctrl);
 385     if (n_loop != add_loop) {
 386       return NULL;  // happens w/ evil ZKM loops
 387     }
 388 
 389     // Convert I-V into I+ (0-V); same for V-I
 390     if (add->Opcode() == Op_Sub(bt) &&
 391         _igvn.type(add->in(1)) != TypeInteger::zero(bt)) {
 392       assert(add->Opcode() == Op_SubI || add->Opcode() == Op_SubL, "");
 393       Node* zero = _igvn.integercon(0, bt);
 394       set_ctrl(zero, C->root());
 395       Node* neg = SubNode::make(zero, add->in(2), bt);
 396       register_new_node(neg, get_ctrl(add->in(2)));
 397       add = AddNode::make(add->in(1), neg, bt);
 398       register_new_node(add, add_ctrl);
 399     }
 400     if (add->Opcode() != Op_Add(bt)) return NULL;
 401     assert(add->Opcode() == Op_AddI || add->Opcode() == Op_AddL, "");
 402     // See if one add input is loop invariant
 403     Node* add_var = add->in(1);
 404     Node* add_var_ctrl = get_ctrl(add_var);
 405     IdealLoopTree* add_var_loop = get_loop(add_var_ctrl);
 406     Node* add_invar = add->in(2);
 407     Node* add_invar_ctrl = get_ctrl(add_invar);
 408     IdealLoopTree* add_invar_loop = get_loop(add_invar_ctrl);
 409     if (add_invar_loop == n_loop) {
 410       // Swap to find the invariant part
 411       add_invar = add_var;
 412       add_invar_ctrl = add_var_ctrl;
 413       add_invar_loop = add_var_loop;
 414       add_var = add->in(2);
 415     } else if (add_var_loop != n_loop) { // Else neither input is loop invariant
 416       return NULL;
 417     }
 418     if (n_loop == add_invar_loop || !add_invar_loop->is_member(n_loop)) {
 419       return NULL;              // No invariant part of the add?
 420     }
 421 
 422     // Yes!  Reshape address expression!
 423     Node* inv_scale = LShiftNode::make(add_invar, scale, bt);
 424     Node* inv_scale_ctrl =
 425             dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
 426             add_invar_ctrl : scale_ctrl;
 427     register_new_node(inv_scale, inv_scale_ctrl);
 428     Node* var_scale = LShiftNode::make(add_var, scale, bt);
 429     register_new_node(var_scale, n_ctrl);
 430     Node* var_add = AddNode::make(var_scale, inv_scale, bt);
 431     register_new_node(var_add, n_ctrl);
 432     _igvn.replace_node(n, var_add);
 433     return var_add;
 434   }
 435   return NULL;
 436 }
 437 
 438 //------------------------------remix_address_expressions----------------------
 439 // Rework addressing expressions to get the most loop-invariant stuff
 440 // moved out.  We'd like to do all associative operators, but it's especially
 441 // important (common) to do address expressions.
 442 Node* PhaseIdealLoop::remix_address_expressions(Node* n) {
 443   if (!has_ctrl(n))  return NULL;
 444   Node* n_ctrl = get_ctrl(n);
 445   IdealLoopTree* n_loop = get_loop(n_ctrl);
 446 
 447   // See if 'n' mixes loop-varying and loop-invariant inputs and
 448   // itself is loop-varying.
 449 
 450   // Only interested in binary ops (and AddP)
 451   if (n->req() < 3 || n->req() > 4) return NULL;
 452 
 453   Node* n1_ctrl = get_ctrl(n->in(                    1));
 454   Node* n2_ctrl = get_ctrl(n->in(                    2));
 455   Node* n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
 456   IdealLoopTree* n1_loop = get_loop(n1_ctrl);
 457   IdealLoopTree* n2_loop = get_loop(n2_ctrl);
 458   IdealLoopTree* n3_loop = get_loop(n3_ctrl);
 459 
 460   // Does one of my inputs spin in a tighter loop than self?
 461   if ((n_loop->is_member(n1_loop) && n_loop != n1_loop) ||
 462       (n_loop->is_member(n2_loop) && n_loop != n2_loop) ||
 463       (n_loop->is_member(n3_loop) && n_loop != n3_loop)) {
 464     return NULL;                // Leave well enough alone
 465   }
 466 
 467   // Is at least one of my inputs loop-invariant?
 468   if (n1_loop == n_loop &&
 469       n2_loop == n_loop &&
 470       n3_loop == n_loop) {
 471     return NULL;                // No loop-invariant inputs
 472   }
 473 
 474   Node* res = remix_address_expressions_add_left_shift(n, n_loop, n_ctrl, T_INT);
 475   if (res != NULL) {
 476     return res;
 477   }
 478   res = remix_address_expressions_add_left_shift(n, n_loop, n_ctrl, T_LONG);
 479   if (res != NULL) {
 480     return res;
 481   }
 482 
 483   int n_op = n->Opcode();
 484   // Replace (I+V) with (V+I)
 485   if (n_op == Op_AddI ||
 486       n_op == Op_AddL ||
 487       n_op == Op_AddF ||
 488       n_op == Op_AddD ||
 489       n_op == Op_MulI ||
 490       n_op == Op_MulL ||
 491       n_op == Op_MulF ||
 492       n_op == Op_MulD) {
 493     if (n2_loop == n_loop) {
 494       assert(n1_loop != n_loop, "");
 495       n->swap_edges(1, 2);
 496     }
 497   }
 498 
 499   // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
 500   // but not if I2 is a constant.
 501   if (n_op == Op_AddP) {
 502     if (n2_loop == n_loop && n3_loop != n_loop) {
 503       if (n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con()) {
 504         Node* n22_ctrl = get_ctrl(n->in(2)->in(2));
 505         Node* n23_ctrl = get_ctrl(n->in(2)->in(3));
 506         IdealLoopTree* n22loop = get_loop(n22_ctrl);
 507         IdealLoopTree* n23_loop = get_loop(n23_ctrl);
 508         if (n22loop != n_loop && n22loop->is_member(n_loop) &&
 509             n23_loop == n_loop) {
 510           Node* add1 = new AddPNode(n->in(1), n->in(2)->in(2), n->in(3));
 511           // Stuff new AddP in the loop preheader
 512           register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
 513           Node* add2 = new AddPNode(n->in(1), add1, n->in(2)->in(3));
 514           register_new_node(add2, n_ctrl);
 515           _igvn.replace_node(n, add2);
 516           return add2;
 517         }
 518       }
 519     }
 520 
 521     // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
 522     if (n2_loop != n_loop && n3_loop == n_loop) {
 523       if (n->in(3)->Opcode() == Op_AddX) {
 524         Node* V = n->in(3)->in(1);
 525         Node* I = n->in(3)->in(2);
 526         if (is_member(n_loop,get_ctrl(V))) {
 527         } else {
 528           Node *tmp = V; V = I; I = tmp;
 529         }
 530         if (!is_member(n_loop,get_ctrl(I))) {
 531           Node* add1 = new AddPNode(n->in(1), n->in(2), I);
 532           // Stuff new AddP in the loop preheader
 533           register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
 534           Node* add2 = new AddPNode(n->in(1), add1, V);
 535           register_new_node(add2, n_ctrl);
 536           _igvn.replace_node(n, add2);
 537           return add2;
 538         }
 539       }
 540     }
 541   }
 542 
 543   return NULL;
 544 }
 545 
 546 // Optimize ((in1[2*i] * in2[2*i]) + (in1[2*i+1] * in2[2*i+1]))
 547 Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) {
 548   assert(n->Opcode() == Op_AddI, "sanity");
 549   Node * nn = NULL;
 550   Node * in1 = n->in(1);
 551   Node * in2 = n->in(2);
 552   if (in1->Opcode() == Op_MulI && in2->Opcode() == Op_MulI) {
 553     IdealLoopTree* loop_n = get_loop(get_ctrl(n));
 554     if (loop_n->is_counted() &&
 555         loop_n->_head->as_Loop()->is_valid_counted_loop(T_INT) &&
 556         Matcher::match_rule_supported(Op_MulAddVS2VI) &&
 557         Matcher::match_rule_supported(Op_MulAddS2I)) {
 558       Node* mul_in1 = in1->in(1);
 559       Node* mul_in2 = in1->in(2);
 560       Node* mul_in3 = in2->in(1);
 561       Node* mul_in4 = in2->in(2);
 562       if (mul_in1->Opcode() == Op_LoadS &&
 563           mul_in2->Opcode() == Op_LoadS &&
 564           mul_in3->Opcode() == Op_LoadS &&
 565           mul_in4->Opcode() == Op_LoadS) {
 566         IdealLoopTree* loop1 = get_loop(get_ctrl(mul_in1));
 567         IdealLoopTree* loop2 = get_loop(get_ctrl(mul_in2));
 568         IdealLoopTree* loop3 = get_loop(get_ctrl(mul_in3));
 569         IdealLoopTree* loop4 = get_loop(get_ctrl(mul_in4));
 570         IdealLoopTree* loop5 = get_loop(get_ctrl(in1));
 571         IdealLoopTree* loop6 = get_loop(get_ctrl(in2));
 572         // All nodes should be in the same counted loop.
 573         if (loop_n == loop1 && loop_n == loop2 && loop_n == loop3 &&
 574             loop_n == loop4 && loop_n == loop5 && loop_n == loop6) {
 575           Node* adr1 = mul_in1->in(MemNode::Address);
 576           Node* adr2 = mul_in2->in(MemNode::Address);
 577           Node* adr3 = mul_in3->in(MemNode::Address);
 578           Node* adr4 = mul_in4->in(MemNode::Address);
 579           if (adr1->is_AddP() && adr2->is_AddP() && adr3->is_AddP() && adr4->is_AddP()) {
 580             if ((adr1->in(AddPNode::Base) == adr3->in(AddPNode::Base)) &&
 581                 (adr2->in(AddPNode::Base) == adr4->in(AddPNode::Base))) {
 582               nn = new MulAddS2INode(mul_in1, mul_in2, mul_in3, mul_in4);
 583               register_new_node(nn, get_ctrl(n));
 584               _igvn.replace_node(n, nn);
 585               return nn;
 586             } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) &&
 587                        (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) {
 588               nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3);
 589               register_new_node(nn, get_ctrl(n));
 590               _igvn.replace_node(n, nn);
 591               return nn;
 592             }
 593           }
 594         }
 595       }
 596     }
 597   }
 598   return nn;
 599 }
 600 
 601 //------------------------------conditional_move-------------------------------
 602 // Attempt to replace a Phi with a conditional move.  We have some pretty
 603 // strict profitability requirements.  All Phis at the merge point must
 604 // be converted, so we can remove the control flow.  We need to limit the
 605 // number of c-moves to a small handful.  All code that was in the side-arms
 606 // of the CFG diamond is now speculatively executed.  This code has to be
 607 // "cheap enough".  We are pretty much limited to CFG diamonds that merge
 608 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 609 Node *PhaseIdealLoop::conditional_move( Node *region ) {
 610 
 611   assert(region->is_Region(), "sanity check");
 612   if (region->req() != 3) return NULL;
 613 
 614   // Check for CFG diamond
 615   Node *lp = region->in(1);
 616   Node *rp = region->in(2);
 617   if (!lp || !rp) return NULL;
 618   Node *lp_c = lp->in(0);
 619   if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
 620   IfNode *iff = lp_c->as_If();
 621 
 622   // Check for ops pinned in an arm of the diamond.
 623   // Can't remove the control flow in this case
 624   if (lp->outcnt() > 1) return NULL;
 625   if (rp->outcnt() > 1) return NULL;
 626 
 627   IdealLoopTree* r_loop = get_loop(region);
 628   assert(r_loop == get_loop(iff), "sanity");
 629   // Always convert to CMOVE if all results are used only outside this loop.
 630   bool used_inside_loop = (r_loop == _ltree_root);
 631 
 632   // Check profitability
 633   int cost = 0;
 634   int phis = 0;
 635   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 636     Node *out = region->fast_out(i);
 637     if (!out->is_Phi()) continue; // Ignore other control edges, etc
 638     phis++;
 639     PhiNode* phi = out->as_Phi();
 640     BasicType bt = phi->type()->basic_type();
 641     switch (bt) {
 642     case T_DOUBLE:
 643     case T_FLOAT:
 644       if (C->use_cmove()) {
 645         continue; //TODO: maybe we want to add some cost
 646       }
 647       cost += Matcher::float_cmove_cost(); // Could be very expensive
 648       break;
 649     case T_LONG: {
 650       cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
 651     }
 652     case T_INT:                 // These all CMOV fine
 653     case T_ADDRESS: {           // (RawPtr)
 654       cost++;
 655       break;
 656     }
 657     case T_NARROWOOP: // Fall through
 658     case T_OBJECT: {            // Base oops are OK, but not derived oops
 659       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 660       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 661       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 662       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 663       // have a Phi for the base here that we convert to a CMOVE all is well
 664       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 665       // the allocator will have to produce a base by creating a CMOVE of the
 666       // relevant bases.  This puts the allocator in the business of
 667       // manufacturing expensive instructions, generally a bad plan.
 668       // Just Say No to Conditionally-Moved Derived Pointers.
 669       if (tp && tp->offset() != 0)
 670         return NULL;
 671       cost++;
 672       break;
 673     }
 674     default:
 675       return NULL;              // In particular, can't do memory or I/O
 676     }
 677     // Add in cost any speculative ops
 678     for (uint j = 1; j < region->req(); j++) {
 679       Node *proj = region->in(j);
 680       Node *inp = phi->in(j);
 681       if (get_ctrl(inp) == proj) { // Found local op
 682         cost++;
 683         // Check for a chain of dependent ops; these will all become
 684         // speculative in a CMOV.
 685         for (uint k = 1; k < inp->req(); k++)
 686           if (get_ctrl(inp->in(k)) == proj)
 687             cost += ConditionalMoveLimit; // Too much speculative goo
 688       }
 689     }
 690     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 691     // This will likely Split-If, a higher-payoff operation.
 692     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 693       Node* use = phi->fast_out(k);
 694       if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
 695         cost += ConditionalMoveLimit;
 696       // Is there a use inside the loop?
 697       // Note: check only basic types since CMoveP is pinned.
 698       if (!used_inside_loop && is_java_primitive(bt)) {
 699         IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
 700         if (r_loop == u_loop || r_loop->is_member(u_loop)) {
 701           used_inside_loop = true;
 702         }
 703       }
 704     }
 705   }//for
 706   Node* bol = iff->in(1);
 707   if (bol->Opcode() == Op_Opaque4) {
 708     return NULL; // Ignore loop predicate checks (the Opaque4 ensures they will go away)
 709   }
 710   assert(bol->Opcode() == Op_Bool, "Unexpected node");
 711   int cmp_op = bol->in(1)->Opcode();
 712   if (cmp_op == Op_SubTypeCheck) { // SubTypeCheck expansion expects an IfNode
 713     return NULL;
 714   }
 715   // It is expensive to generate flags from a float compare.
 716   // Avoid duplicated float compare.
 717   if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 718 
 719   float infrequent_prob = PROB_UNLIKELY_MAG(3);
 720   // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
 721   if (used_inside_loop) {
 722     if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
 723 
 724     // BlockLayoutByFrequency optimization moves infrequent branch
 725     // from hot path. No point in CMOV'ing in such case (110 is used
 726     // instead of 100 to take into account not exactness of float value).
 727     if (BlockLayoutByFrequency) {
 728       infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
 729     }
 730   }
 731   // Check for highly predictable branch.  No point in CMOV'ing if
 732   // we are going to predict accurately all the time.
 733   if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
 734     //keep going
 735   } else if (iff->_prob < infrequent_prob ||
 736       iff->_prob > (1.0f - infrequent_prob))
 737     return NULL;
 738 
 739   // --------------
 740   // Now replace all Phis with CMOV's
 741   Node *cmov_ctrl = iff->in(0);
 742   uint flip = (lp->Opcode() == Op_IfTrue);
 743   Node_List wq;
 744   while (1) {
 745     PhiNode* phi = NULL;
 746     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 747       Node *out = region->fast_out(i);
 748       if (out->is_Phi()) {
 749         phi = out->as_Phi();
 750         break;
 751       }
 752     }
 753     if (phi == NULL || _igvn.type(phi) == Type::TOP) {
 754       break;
 755     }
 756     if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
 757     // Move speculative ops
 758     wq.push(phi);
 759     while (wq.size() > 0) {
 760       Node *n = wq.pop();
 761       for (uint j = 1; j < n->req(); j++) {
 762         Node* m = n->in(j);
 763         if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
 764 #ifndef PRODUCT
 765           if (PrintOpto && VerifyLoopOptimizations) {
 766             tty->print("  speculate: ");
 767             m->dump();
 768           }
 769 #endif
 770           set_ctrl(m, cmov_ctrl);
 771           wq.push(m);
 772         }
 773       }
 774     }
 775     Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
 776     register_new_node( cmov, cmov_ctrl );
 777     _igvn.replace_node( phi, cmov );
 778 #ifndef PRODUCT
 779     if (TraceLoopOpts) {
 780       tty->print("CMOV  ");
 781       r_loop->dump_head();
 782       if (Verbose) {
 783         bol->in(1)->dump(1);
 784         cmov->dump(1);
 785       }
 786     }
 787     if (VerifyLoopOptimizations) verify();
 788 #endif
 789   }
 790 
 791   // The useless CFG diamond will fold up later; see the optimization in
 792   // RegionNode::Ideal.
 793   _igvn._worklist.push(region);
 794 
 795   return iff->in(1);
 796 }
 797 
 798 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
 799   for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
 800     Node* u = m->fast_out(i);
 801     if (u->is_CFG()) {
 802       if (u->Opcode() == Op_NeverBranch) {
 803         u = ((NeverBranchNode*)u)->proj_out(0);
 804         enqueue_cfg_uses(u, wq);
 805       } else {
 806         wq.push(u);
 807       }
 808     }
 809   }
 810 }
 811 
 812 // Try moving a store out of a loop, right before the loop
 813 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
 814   // Store has to be first in the loop body
 815   IdealLoopTree *n_loop = get_loop(n_ctrl);
 816   if (n->is_Store() && n_loop != _ltree_root &&
 817       n_loop->is_loop() && n_loop->_head->is_Loop() &&
 818       n->in(0) != NULL) {
 819     Node* address = n->in(MemNode::Address);
 820     Node* value = n->in(MemNode::ValueIn);
 821     Node* mem = n->in(MemNode::Memory);
 822     IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 823     IdealLoopTree* value_loop = get_loop(get_ctrl(value));
 824 
 825     // - address and value must be loop invariant
 826     // - memory must be a memory Phi for the loop
 827     // - Store must be the only store on this memory slice in the
 828     // loop: if there's another store following this one then value
 829     // written at iteration i by the second store could be overwritten
 830     // at iteration i+n by the first store: it's not safe to move the
 831     // first store out of the loop
 832     // - nothing must observe the memory Phi: it guarantees no read
 833     // before the store, we are also guaranteed the store post
 834     // dominates the loop head (ignoring a possible early
 835     // exit). Otherwise there would be extra Phi involved between the
 836     // loop's Phi and the store.
 837     // - there must be no early exit from the loop before the Store
 838     // (such an exit most of the time would be an extra use of the
 839     // memory Phi but sometimes is a bottom memory Phi that takes the
 840     // store as input).
 841 
 842     if (!n_loop->is_member(address_loop) &&
 843         !n_loop->is_member(value_loop) &&
 844         mem->is_Phi() && mem->in(0) == n_loop->_head &&
 845         mem->outcnt() == 1 &&
 846         mem->in(LoopNode::LoopBackControl) == n) {
 847 
 848       assert(n_loop->_tail != NULL, "need a tail");
 849       assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
 850 
 851       // Verify that there's no early exit of the loop before the store.
 852       bool ctrl_ok = false;
 853       {
 854         // Follow control from loop head until n, we exit the loop or
 855         // we reach the tail
 856         ResourceMark rm;
 857         Unique_Node_List wq;
 858         wq.push(n_loop->_head);
 859 
 860         for (uint next = 0; next < wq.size(); ++next) {
 861           Node *m = wq.at(next);
 862           if (m == n->in(0)) {
 863             ctrl_ok = true;
 864             continue;
 865           }
 866           assert(!has_ctrl(m), "should be CFG");
 867           if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
 868             ctrl_ok = false;
 869             break;
 870           }
 871           enqueue_cfg_uses(m, wq);
 872           if (wq.size() > 10) {
 873             ctrl_ok = false;
 874             break;
 875           }
 876         }
 877       }
 878       if (ctrl_ok) {
 879         // move the Store
 880         _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
 881         _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
 882         _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
 883         // Disconnect the phi now. An empty phi can confuse other
 884         // optimizations in this pass of loop opts.
 885         _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
 886         n_loop->_body.yank(mem);
 887 
 888         set_ctrl_and_loop(n, n->in(0));
 889 
 890         return n;
 891       }
 892     }
 893   }
 894   return NULL;
 895 }
 896 
 897 // Try moving a store out of a loop, right after the loop
 898 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
 899   if (n->is_Store() && n->in(0) != NULL) {
 900     Node *n_ctrl = get_ctrl(n);
 901     IdealLoopTree *n_loop = get_loop(n_ctrl);
 902     // Store must be in a loop
 903     if (n_loop != _ltree_root && !n_loop->_irreducible) {
 904       Node* address = n->in(MemNode::Address);
 905       Node* value = n->in(MemNode::ValueIn);
 906       IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 907       // address must be loop invariant
 908       if (!n_loop->is_member(address_loop)) {
 909         // Store must be last on this memory slice in the loop and
 910         // nothing in the loop must observe it
 911         Node* phi = NULL;
 912         for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 913           Node* u = n->fast_out(i);
 914           if (has_ctrl(u)) { // control use?
 915             IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 916             if (!n_loop->is_member(u_loop)) {
 917               continue;
 918             }
 919             if (u->is_Phi() && u->in(0) == n_loop->_head) {
 920               assert(_igvn.type(u) == Type::MEMORY, "bad phi");
 921               // multiple phis on the same slice are possible
 922               if (phi != NULL) {
 923                 return;
 924               }
 925               phi = u;
 926               continue;
 927             }
 928           }
 929           return;
 930         }
 931         if (phi != NULL) {
 932           // Nothing in the loop before the store (next iteration)
 933           // must observe the stored value
 934           bool mem_ok = true;
 935           {
 936             ResourceMark rm;
 937             Unique_Node_List wq;
 938             wq.push(phi);
 939             for (uint next = 0; next < wq.size() && mem_ok; ++next) {
 940               Node *m = wq.at(next);
 941               for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
 942                 Node* u = m->fast_out(i);
 943                 if (u->is_Store() || u->is_Phi()) {
 944                   if (u != n) {
 945                     wq.push(u);
 946                     mem_ok = (wq.size() <= 10);
 947                   }
 948                 } else {
 949                   mem_ok = false;
 950                   break;
 951                 }
 952               }
 953             }
 954           }
 955           if (mem_ok) {
 956             // Move the store out of the loop if the LCA of all
 957             // users (except for the phi) is outside the loop.
 958             Node* hook = new Node(1);
 959             hook->init_req(0, n_ctrl); // Add an input to prevent hook from being dead
 960             _igvn.rehash_node_delayed(phi);
 961             int count = phi->replace_edge(n, hook, &_igvn);
 962             assert(count > 0, "inconsistent phi");
 963 
 964             // Compute latest point this store can go
 965             Node* lca = get_late_ctrl(n, get_ctrl(n));
 966             if (lca->is_OuterStripMinedLoop()) {
 967               lca = lca->in(LoopNode::EntryControl);
 968             }
 969             if (n_loop->is_member(get_loop(lca))) {
 970               // LCA is in the loop - bail out
 971               _igvn.replace_node(hook, n);
 972               return;
 973             }
 974 #ifdef ASSERT
 975             if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
 976               assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
 977               n_loop->_head->as_Loop()->verify_strip_mined(1);
 978               Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
 979               IdealLoopTree* outer_loop = get_loop(outer);
 980               assert(n_loop->_parent == outer_loop, "broken loop tree");
 981               assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
 982             }
 983 #endif
 984             lca = place_outside_loop(lca, n_loop);
 985             assert(!n_loop->is_member(get_loop(lca)), "control must not be back in the loop");
 986             assert(get_loop(lca)->_nest < n_loop->_nest || lca->in(0)->Opcode() == Op_NeverBranch, "must not be moved into inner loop");
 987 
 988             // Move store out of the loop
 989             _igvn.replace_node(hook, n->in(MemNode::Memory));
 990             _igvn.replace_input_of(n, 0, lca);
 991             set_ctrl_and_loop(n, lca);
 992 
 993             // Disconnect the phi now. An empty phi can confuse other
 994             // optimizations in this pass of loop opts..
 995             if (phi->in(LoopNode::LoopBackControl) == phi) {
 996               _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
 997               n_loop->_body.yank(phi);
 998             }
 999           }
1000         }
1001       }
1002     }
1003   }
1004 }
1005 
1006 // If UseArrayMarkWordCheck is enabled, we can't use immutable memory for the flat array check
1007 // because we are loading the mark word which is mutable. Although the bits we are interested in
1008 // are immutable (we check for markWord::unlocked_value), we need to use raw memory to not break
1009 // anti dependency analysis. Below code will attempt to still move flat array checks out of loops,
1010 // mainly to enable loop unswitching.
1011 void PhaseIdealLoop::move_flat_array_check_out_of_loop(Node* n) {
1012   // Skip checks for more than one array
1013   if (n->req() > 3) {
1014     return;
1015   }
1016   Node* mem = n->in(FlatArrayCheckNode::Memory);
1017   Node* array = n->in(FlatArrayCheckNode::ArrayOrKlass)->uncast();
1018   IdealLoopTree* check_loop = get_loop(get_ctrl(n));
1019   IdealLoopTree* ary_loop = get_loop(get_ctrl(array));
1020 
1021   // Check if array is loop invariant
1022   if (!check_loop->is_member(ary_loop)) {
1023     // Walk up memory graph from the check until we leave the loop
1024     VectorSet wq;
1025     wq.set(mem->_idx);
1026     while (check_loop->is_member(get_loop(ctrl_or_self(mem)))) {
1027       if (mem->is_Phi()) {
1028         mem = mem->in(1);
1029       } else if (mem->is_MergeMem()) {
1030         mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
1031       } else if (mem->is_Proj()) {
1032         mem = mem->in(0);
1033       } else if (mem->is_MemBar() || mem->is_SafePoint()) {
1034         mem = mem->in(TypeFunc::Memory);
1035       } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) {
1036         mem = mem->in(MemNode::Memory);
1037       } else {
1038 #ifdef ASSERT
1039         mem->dump();
1040 #endif
1041         ShouldNotReachHere();
1042       }
1043       if (wq.test_set(mem->_idx)) {
1044         return;
1045       }
1046     }
1047     // Replace memory input and re-compute ctrl to move the check out of the loop
1048     _igvn.replace_input_of(n, 1, mem);
1049     set_ctrl_and_loop(n, get_early_ctrl(n));
1050     Node* bol = n->unique_out();
1051     set_ctrl_and_loop(bol, get_early_ctrl(bol));
1052   }
1053 }
1054 
1055 //------------------------------split_if_with_blocks_pre-----------------------
1056 // Do the real work in a non-recursive function.  Data nodes want to be
1057 // cloned in the pre-order so they can feed each other nicely.
1058 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
1059   // Cloning these guys is unlikely to win
1060   int n_op = n->Opcode();
1061   if (n_op == Op_MergeMem) {
1062     return n;
1063   }
1064   if (n->is_Proj()) {
1065     return n;
1066   }
1067 
1068   if (UseArrayMarkWordCheck && n->isa_FlatArrayCheck()) {
1069     move_flat_array_check_out_of_loop(n);
1070     return n;
1071   }
1072 
1073   // Do not clone-up CmpFXXX variations, as these are always
1074   // followed by a CmpI
1075   if (n->is_Cmp()) {
1076     return n;
1077   }
1078   // Attempt to use a conditional move instead of a phi/branch
1079   if (ConditionalMoveLimit > 0 && n_op == Op_Region) {
1080     Node *cmov = conditional_move( n );
1081     if (cmov) {
1082       return cmov;
1083     }
1084   }
1085   if (n->is_CFG() || n->is_LoadStore()) {
1086     return n;
1087   }
1088   if (n->is_Opaque1() ||     // Opaque nodes cannot be mod'd
1089       n_op == Op_Opaque2) {
1090     if (!C->major_progress()) {   // If chance of no more loop opts...
1091       _igvn._worklist.push(n);  // maybe we'll remove them
1092     }
1093     return n;
1094   }
1095 
1096   if (n->is_Con()) {
1097     return n;   // No cloning for Con nodes
1098   }
1099 
1100   Node *n_ctrl = get_ctrl(n);
1101   if (!n_ctrl) {
1102     return n;       // Dead node
1103   }
1104 
1105   Node* res = try_move_store_before_loop(n, n_ctrl);
1106   if (res != NULL) {
1107     return n;
1108   }
1109 
1110   // Attempt to remix address expressions for loop invariants
1111   Node *m = remix_address_expressions( n );
1112   if( m ) return m;
1113 
1114   if (n_op == Op_AddI) {
1115     Node *nn = convert_add_to_muladd( n );
1116     if ( nn ) return nn;
1117   }
1118 
1119   if (n->is_ConstraintCast()) {
1120     Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
1121     // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
1122     // Node control inputs don't necessarily agree with loop control info (due to
1123     // transformations happened in between), thus additional dominance check is needed
1124     // to keep loop info valid.
1125     if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
1126       _igvn.replace_node(n, dom_cast);
1127       return dom_cast;
1128     }
1129   }
1130 
1131   // Determine if the Node has inputs from some local Phi.
1132   // Returns the block to clone thru.
1133   Node *n_blk = has_local_phi_input( n );
1134   if( !n_blk ) return n;
1135 
1136   // Do not clone the trip counter through on a CountedLoop
1137   // (messes up the canonical shape).
1138   if (((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_loop_nest_inner_loop())) && n->Opcode() == Op_AddI) ||
1139       (n_blk->is_LongCountedLoop() && n->Opcode() == Op_AddL)) {
1140     return n;
1141   }
1142   // Pushing a shift through the iv Phi can get in the way of addressing optimizations or range check elimination
1143   if (n_blk->is_BaseCountedLoop() && n->Opcode() == Op_LShift(n_blk->as_BaseCountedLoop()->bt()) &&
1144       n->in(1) == n_blk->as_BaseCountedLoop()->phi()) {
1145     return n;
1146   }
1147 
1148   // Check for having no control input; not pinned.  Allow
1149   // dominating control.
1150   if (n->in(0)) {
1151     Node *dom = idom(n_blk);
1152     if (dom_lca(n->in(0), dom) != n->in(0)) {
1153       return n;
1154     }
1155   }
1156   // Policy: when is it profitable.  You must get more wins than
1157   // policy before it is considered profitable.  Policy is usually 0,
1158   // so 1 win is considered profitable.  Big merges will require big
1159   // cloning, so get a larger policy.
1160   int policy = n_blk->req() >> 2;
1161 
1162   // If the loop is a candidate for range check elimination,
1163   // delay splitting through it's phi until a later loop optimization
1164   if (n_blk->is_BaseCountedLoop()) {
1165     IdealLoopTree *lp = get_loop(n_blk);
1166     if (lp && lp->_rce_candidate) {
1167       return n;
1168     }
1169   }
1170 
1171   if (must_throttle_split_if()) return n;
1172 
1173   // Split 'n' through the merge point if it is profitable
1174   Node *phi = split_thru_phi( n, n_blk, policy );
1175   if (!phi) return n;
1176 
1177   // Found a Phi to split thru!
1178   // Replace 'n' with the new phi
1179   _igvn.replace_node( n, phi );
1180   // Moved a load around the loop, 'en-registering' something.
1181   if (n_blk->is_Loop() && n->is_Load() &&
1182       !phi->in(LoopNode::LoopBackControl)->is_Load())
1183     C->set_major_progress();
1184 
1185   return phi;
1186 }
1187 
1188 static bool merge_point_too_heavy(Compile* C, Node* region) {
1189   // Bail out if the region and its phis have too many users.
1190   int weight = 0;
1191   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1192     weight += region->fast_out(i)->outcnt();
1193   }
1194   int nodes_left = C->max_node_limit() - C->live_nodes();
1195   if (weight * 8 > nodes_left) {
1196     if (PrintOpto) {
1197       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
1198     }
1199     return true;
1200   } else {
1201     return false;
1202   }
1203 }
1204 
1205 static bool merge_point_safe(Node* region) {
1206   // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1207   // having a PhiNode input. This sidesteps the dangerous case where the split
1208   // ConvI2LNode may become TOP if the input Value() does not
1209   // overlap the ConvI2L range, leaving a node which may not dominate its
1210   // uses.
1211   // A better fix for this problem can be found in the BugTraq entry, but
1212   // expediency for Mantis demands this hack.
1213 #ifdef _LP64
1214   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1215     Node* n = region->fast_out(i);
1216     if (n->is_Phi()) {
1217       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1218         Node* m = n->fast_out(j);
1219         if (m->Opcode() == Op_ConvI2L)
1220           return false;
1221         if (m->is_CastII()) {
1222           return false;
1223         }
1224       }
1225     }
1226   }
1227 #endif
1228   return true;
1229 }
1230 
1231 
1232 //------------------------------place_outside_loop---------------------------------
1233 // Place some computation outside of this loop on the path to the use passed as argument
1234 Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const {
1235   Node* head = loop->_head;
1236   assert(!loop->is_member(get_loop(useblock)), "must be outside loop");
1237   if (head->is_Loop() && head->as_Loop()->is_strip_mined()) {
1238     loop = loop->_parent;
1239     assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop");
1240   }
1241 
1242   // Pick control right outside the loop
1243   for (;;) {
1244     Node* dom = idom(useblock);
1245     if (loop->is_member(get_loop(dom)) ||
1246         // NeverBranch nodes are not assigned to the loop when constructed
1247         (dom->Opcode() == Op_NeverBranch && loop->is_member(get_loop(dom->in(0))))) {
1248       break;
1249     }
1250     useblock = dom;
1251   }
1252   assert(find_non_split_ctrl(useblock) == useblock, "should be non split control");
1253   return useblock;
1254 }
1255 
1256 
1257 bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1258   if (!n->is_If() || n->is_BaseCountedLoopEnd()) {
1259     return false;
1260   }
1261   if (!n->in(0)->is_Region()) {
1262     return false;
1263   }
1264 
1265   Node* region = n->in(0);
1266   Node* dom = idom(region);
1267   if (!dom->is_If() || dom->in(1) != n->in(1)) {
1268     return false;
1269   }
1270   IfNode* dom_if = dom->as_If();
1271   Node* proj_true = dom_if->proj_out(1);
1272   Node* proj_false = dom_if->proj_out(0);
1273 
1274   for (uint i = 1; i < region->req(); i++) {
1275     if (is_dominator(proj_true, region->in(i))) {
1276       continue;
1277     }
1278     if (is_dominator(proj_false, region->in(i))) {
1279       continue;
1280     }
1281     return false;
1282   }
1283 
1284   return true;
1285 }
1286 
1287 
1288 bool PhaseIdealLoop::can_split_if(Node* n_ctrl) {
1289   if (must_throttle_split_if()) {
1290     return false;
1291   }
1292 
1293   // Do not do 'split-if' if irreducible loops are present.
1294   if (_has_irreducible_loops) {
1295     return false;
1296   }
1297 
1298   if (merge_point_too_heavy(C, n_ctrl)) {
1299     return false;
1300   }
1301 
1302   // Do not do 'split-if' if some paths are dead.  First do dead code
1303   // elimination and then see if its still profitable.
1304   for (uint i = 1; i < n_ctrl->req(); i++) {
1305     if (n_ctrl->in(i) == C->top()) {
1306       return false;
1307     }
1308   }
1309 
1310   // If trying to do a 'Split-If' at the loop head, it is only
1311   // profitable if the cmp folds up on BOTH paths.  Otherwise we
1312   // risk peeling a loop forever.
1313 
1314   // CNC - Disabled for now.  Requires careful handling of loop
1315   // body selection for the cloned code.  Also, make sure we check
1316   // for any input path not being in the same loop as n_ctrl.  For
1317   // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1318   // because the alternative loop entry points won't be converted
1319   // into LoopNodes.
1320   IdealLoopTree *n_loop = get_loop(n_ctrl);
1321   for (uint j = 1; j < n_ctrl->req(); j++) {
1322     if (get_loop(n_ctrl->in(j)) != n_loop) {
1323       return false;
1324     }
1325   }
1326 
1327   // Check for safety of the merge point.
1328   if (!merge_point_safe(n_ctrl)) {
1329     return false;
1330   }
1331 
1332   return true;
1333 }
1334 
1335 // Detect if the node is the inner strip-mined loop
1336 // Return: NULL if it's not the case, or the exit of outer strip-mined loop
1337 static Node* is_inner_of_stripmined_loop(const Node* out) {
1338   Node* out_le = NULL;
1339 
1340   if (out->is_CountedLoopEnd()) {
1341       const CountedLoopNode* loop = out->as_CountedLoopEnd()->loopnode();
1342 
1343       if (loop != NULL && loop->is_strip_mined()) {
1344         out_le = loop->in(LoopNode::EntryControl)->as_OuterStripMinedLoop()->outer_loop_exit();
1345       }
1346   }
1347 
1348   return out_le;
1349 }
1350 
1351 bool PhaseIdealLoop::flatten_array_element_type_check(Node *n) {
1352   // If the CmpP is a subtype check for a value that has just been
1353   // loaded from an array, the subtype check guarantees the value
1354   // can't be stored in a flattened array and the load of the value
1355   // happens with a flattened array check then: push the type check
1356   // through the phi of the flattened array check. This needs special
1357   // logic because the subtype check's input is not a phi but a
1358   // LoadKlass that must first be cloned through the phi.
1359   if (n->Opcode() != Op_CmpP) {
1360     return false;
1361   }
1362 
1363   Node* klassptr = n->in(1);
1364   Node* klasscon = n->in(2);
1365 
1366   if (klassptr->is_DecodeNarrowPtr()) {
1367     klassptr = klassptr->in(1);
1368   }
1369 
1370   if (klassptr->Opcode() != Op_LoadKlass && klassptr->Opcode() != Op_LoadNKlass) {
1371     return false;
1372   }
1373 
1374   if (!klasscon->is_Con()) {
1375     return false;
1376   }
1377 
1378   Node* addr = klassptr->in(MemNode::Address);
1379 
1380   if (!addr->is_AddP()) {
1381     return false;
1382   }
1383 
1384   intptr_t offset;
1385   Node* obj = AddPNode::Ideal_base_and_offset(addr, &_igvn, offset);
1386 
1387   if (obj == NULL) {
1388     return false;
1389   }
1390 
1391   assert(obj != NULL && addr->in(AddPNode::Base) == addr->in(AddPNode::Address), "malformed AddP?");
1392   if (obj->Opcode() == Op_CastPP) {
1393     obj = obj->in(1);
1394   }
1395 
1396   if (!obj->is_Phi()) {
1397     return false;
1398   }
1399 
1400   Node* region = obj->in(0);
1401 
1402   Node* phi = PhiNode::make_blank(region, n->in(1));
1403   for (uint i = 1; i < region->req(); i++) {
1404     Node* in = obj->in(i);
1405     Node* ctrl = region->in(i);
1406     if (addr->in(AddPNode::Base) != obj) {
1407       Node* cast = addr->in(AddPNode::Base);
1408       assert(cast->Opcode() == Op_CastPP && cast->in(0) != NULL, "inconsistent subgraph");
1409       Node* cast_clone = cast->clone();
1410       cast_clone->set_req(0, ctrl);
1411       cast_clone->set_req(1, in);
1412       register_new_node(cast_clone, ctrl);
1413       _igvn.set_type(cast_clone, cast_clone->Value(&_igvn));
1414       in = cast_clone;
1415     }
1416     Node* addr_clone = addr->clone();
1417     addr_clone->set_req(AddPNode::Base, in);
1418     addr_clone->set_req(AddPNode::Address, in);
1419     register_new_node(addr_clone, ctrl);
1420     _igvn.set_type(addr_clone, addr_clone->Value(&_igvn));
1421     Node* klassptr_clone = klassptr->clone();
1422     klassptr_clone->set_req(2, addr_clone);
1423     register_new_node(klassptr_clone, ctrl);
1424     _igvn.set_type(klassptr_clone, klassptr_clone->Value(&_igvn));
1425     if (klassptr != n->in(1)) {
1426       Node* decode = n->in(1);
1427       assert(decode->is_DecodeNarrowPtr(), "inconsistent subgraph");
1428       Node* decode_clone = decode->clone();
1429       decode_clone->set_req(1, klassptr_clone);
1430       register_new_node(decode_clone, ctrl);
1431       _igvn.set_type(decode_clone, decode_clone->Value(&_igvn));
1432       klassptr_clone = decode_clone;
1433     }
1434     phi->set_req(i, klassptr_clone);
1435   }
1436   register_new_node(phi, region);
1437   Node* orig = n->in(1);
1438   _igvn.replace_input_of(n, 1, phi);
1439   split_if_with_blocks_post(n);
1440   if (n->outcnt() != 0) {
1441     _igvn.replace_input_of(n, 1, orig);
1442     _igvn.remove_dead_node(phi);
1443   }
1444   return true;
1445 }
1446 
1447 //------------------------------split_if_with_blocks_post----------------------
1448 // Do the real work in a non-recursive function.  CFG hackery wants to be
1449 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1450 // info.
1451 void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1452 
1453   if (flatten_array_element_type_check(n)) {
1454     return;
1455   }
1456 
1457   // Cloning Cmp through Phi's involves the split-if transform.
1458   // FastLock is not used by an If
1459   if (n->is_Cmp() && !n->is_FastLock()) {
1460     Node *n_ctrl = get_ctrl(n);
1461     // Determine if the Node has inputs from some local Phi.
1462     // Returns the block to clone thru.
1463     Node *n_blk = has_local_phi_input(n);
1464     if (n_blk != n_ctrl) {
1465       return;
1466     }
1467 
1468     if (!can_split_if(n_ctrl)) {
1469       return;
1470     }
1471 
1472     if (n->outcnt() != 1) {
1473       return; // Multiple bool's from 1 compare?
1474     }
1475     Node *bol = n->unique_out();
1476     assert(bol->is_Bool(), "expect a bool here");
1477     if (bol->outcnt() != 1) {
1478       return;// Multiple branches from 1 compare?
1479     }
1480     Node *iff = bol->unique_out();
1481 
1482     // Check some safety conditions
1483     if (iff->is_If()) {        // Classic split-if?
1484       if (iff->in(0) != n_ctrl) {
1485         return; // Compare must be in same blk as if
1486       }
1487     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1488       // Can't split CMove with different control edge.
1489       if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1490         return;
1491       }
1492       if (get_ctrl(iff->in(2)) == n_ctrl ||
1493           get_ctrl(iff->in(3)) == n_ctrl) {
1494         return;                 // Inputs not yet split-up
1495       }
1496       if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1497         return;                 // Loop-invar test gates loop-varying CMOVE
1498       }
1499     } else {
1500       return;  // some other kind of node, such as an Allocate
1501     }
1502 
1503     // When is split-if profitable?  Every 'win' on means some control flow
1504     // goes dead, so it's almost always a win.
1505     int policy = 0;
1506     // Split compare 'n' through the merge point if it is profitable
1507     Node *phi = split_thru_phi( n, n_ctrl, policy);
1508     if (!phi) {
1509       return;
1510     }
1511 
1512     // Found a Phi to split thru!
1513     // Replace 'n' with the new phi
1514     _igvn.replace_node(n, phi);
1515 
1516     // Now split the bool up thru the phi
1517     Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1518     guarantee(bolphi != NULL, "null boolean phi node");
1519 
1520     _igvn.replace_node(bol, bolphi);
1521     assert(iff->in(1) == bolphi, "");
1522 
1523     if (bolphi->Value(&_igvn)->singleton()) {
1524       return;
1525     }
1526 
1527     // Conditional-move?  Must split up now
1528     if (!iff->is_If()) {
1529       Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1530       _igvn.replace_node(iff, cmovphi);
1531       return;
1532     }
1533 
1534     // Now split the IF
1535     do_split_if(iff);
1536     return;
1537   }
1538 
1539   // Two identical ifs back to back can be merged
1540   if (try_merge_identical_ifs(n)) {
1541     return;
1542   }
1543 
1544   // Check for an IF ready to split; one that has its
1545   // condition codes input coming from a Phi at the block start.
1546   int n_op = n->Opcode();
1547 
1548   // Check for an IF being dominated by another IF same test
1549   if (n_op == Op_If ||
1550       n_op == Op_RangeCheck) {
1551     Node *bol = n->in(1);
1552     uint max = bol->outcnt();
1553     // Check for same test used more than once?
1554     if (max > 1 && bol->is_Bool()) {
1555       // Search up IDOMs to see if this IF is dominated.
1556       Node *cutoff = get_ctrl(bol);
1557 
1558       // Now search up IDOMs till cutoff, looking for a dominating test
1559       Node *prevdom = n;
1560       Node *dom = idom(prevdom);
1561       while (dom != cutoff) {
1562         if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom &&
1563             safe_for_if_replacement(dom)) {
1564           // It's invalid to move control dependent data nodes in the inner
1565           // strip-mined loop, because:
1566           //  1) break validation of LoopNode::verify_strip_mined()
1567           //  2) move code with side-effect in strip-mined loop
1568           // Move to the exit of outer strip-mined loop in that case.
1569           Node* out_le = is_inner_of_stripmined_loop(dom);
1570           if (out_le != NULL) {
1571             prevdom = out_le;
1572           }
1573           // Replace the dominated test with an obvious true or false.
1574           // Place it on the IGVN worklist for later cleanup.
1575           C->set_major_progress();
1576           dominated_by(prevdom->as_IfProj(), n->as_If(), false, true);
1577 #ifndef PRODUCT
1578           if( VerifyLoopOptimizations ) verify();
1579 #endif
1580           return;
1581         }
1582         prevdom = dom;
1583         dom = idom(prevdom);
1584       }
1585     }
1586   }
1587 
1588   try_sink_out_of_loop(n);
1589 
1590   try_move_store_after_loop(n);
1591 
1592   // Remove multiple allocations of the same inline type
1593   if (n->is_InlineType()) {
1594     n->as_InlineType()->remove_redundant_allocations(&_igvn, this);
1595   }
1596 
1597   // Check for Opaque2's who's loop has disappeared - who's input is in the
1598   // same loop nest as their output.  Remove 'em, they are no longer useful.
1599   if( n_op == Op_Opaque2 &&
1600       n->in(1) != NULL &&
1601       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1602     _igvn.replace_node( n, n->in(1) );
1603   }
1604 }
1605 
1606 // Transform:
1607 //
1608 // if (some_condition) {
1609 //   // body 1
1610 // } else {
1611 //   // body 2
1612 // }
1613 // if (some_condition) {
1614 //   // body 3
1615 // } else {
1616 //   // body 4
1617 // }
1618 //
1619 // into:
1620 //
1621 //
1622 // if (some_condition) {
1623 //   // body 1
1624 //   // body 3
1625 // } else {
1626 //   // body 2
1627 //   // body 4
1628 // }
1629 bool PhaseIdealLoop::try_merge_identical_ifs(Node* n) {
1630   if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1631     Node *n_ctrl = n->in(0);
1632     IfNode* dom_if = idom(n_ctrl)->as_If();
1633     ProjNode* dom_proj_true = dom_if->proj_out(1);
1634     ProjNode* dom_proj_false = dom_if->proj_out(0);
1635 
1636     // Now split the IF
1637     RegionNode* new_false_region;
1638     RegionNode* new_true_region;
1639     do_split_if(n, &new_false_region, &new_true_region);
1640     assert(new_false_region->req() == new_true_region->req(), "");
1641 #ifdef ASSERT
1642     for (uint i = 1; i < new_false_region->req(); ++i) {
1643       assert(new_false_region->in(i)->in(0) == new_true_region->in(i)->in(0), "unexpected shape following split if");
1644       assert(i == new_false_region->req() - 1 || new_false_region->in(i)->in(0)->in(1) == new_false_region->in(i + 1)->in(0)->in(1), "unexpected shape following split if");
1645     }
1646 #endif
1647     assert(new_false_region->in(1)->in(0)->in(1) == dom_if->in(1), "dominating if and dominated if after split must share test");
1648 
1649     // We now have:
1650     // if (some_condition) {
1651     //   // body 1
1652     //   if (some_condition) {
1653     //     body3: // new_true_region
1654     //     // body3
1655     //   } else {
1656     //     goto body4;
1657     //   }
1658     // } else {
1659     //   // body 2
1660     //  if (some_condition) {
1661     //     goto body3;
1662     //   } else {
1663     //     body4:   // new_false_region
1664     //     // body4;
1665     //   }
1666     // }
1667     //
1668 
1669     // clone pinned nodes thru the resulting regions
1670     push_pinned_nodes_thru_region(dom_if, new_true_region);
1671     push_pinned_nodes_thru_region(dom_if, new_false_region);
1672 
1673     // Optimize out the cloned ifs. Because pinned nodes were cloned, this also allows a CastPP that would be dependent
1674     // on a projection of n to have the dom_if as a control dependency. We don't want the CastPP to end up with an
1675     // unrelated control dependency.
1676     for (uint i = 1; i < new_false_region->req(); i++) {
1677       if (is_dominator(dom_proj_true, new_false_region->in(i))) {
1678         dominated_by(dom_proj_true->as_IfProj(), new_false_region->in(i)->in(0)->as_If(), false, false);
1679       } else {
1680         assert(is_dominator(dom_proj_false, new_false_region->in(i)), "bad if");
1681         dominated_by(dom_proj_false->as_IfProj(), new_false_region->in(i)->in(0)->as_If(), false, false);
1682       }
1683     }
1684     return true;
1685   }
1686   return false;
1687 }
1688 
1689 void PhaseIdealLoop::push_pinned_nodes_thru_region(IfNode* dom_if, Node* region) {
1690   for (DUIterator i = region->outs(); region->has_out(i); i++) {
1691     Node* u = region->out(i);
1692     if (!has_ctrl(u) || u->is_Phi() || !u->depends_only_on_test() || !_igvn.no_dependent_zero_check(u)) {
1693       continue;
1694     }
1695     assert(u->in(0) == region, "not a control dependent node?");
1696     uint j = 1;
1697     for (; j < u->req(); ++j) {
1698       Node* in = u->in(j);
1699       if (!is_dominator(ctrl_or_self(in), dom_if)) {
1700         break;
1701       }
1702     }
1703     if (j == u->req()) {
1704       Node *phi = PhiNode::make_blank(region, u);
1705       for (uint k = 1; k < region->req(); ++k) {
1706         Node* clone = u->clone();
1707         clone->set_req(0, region->in(k));
1708         register_new_node(clone, region->in(k));
1709         phi->init_req(k, clone);
1710       }
1711       register_new_node(phi, region);
1712       _igvn.replace_node(u, phi);
1713       --i;
1714     }
1715   }
1716 }
1717 
1718 bool PhaseIdealLoop::safe_for_if_replacement(const Node* dom) const {
1719   if (!dom->is_CountedLoopEnd()) {
1720     return true;
1721   }
1722   CountedLoopEndNode* le = dom->as_CountedLoopEnd();
1723   CountedLoopNode* cl = le->loopnode();
1724   if (cl == NULL) {
1725     return true;
1726   }
1727   if (!cl->is_main_loop()) {
1728     return true;
1729   }
1730   if (cl->is_canonical_loop_entry() == NULL) {
1731     return true;
1732   }
1733   // Further unrolling is possible so loop exit condition might change
1734   return false;
1735 }
1736 
1737 // See if a shared loop-varying computation has no loop-varying uses.
1738 // Happens if something is only used for JVM state in uncommon trap exits,
1739 // like various versions of induction variable+offset.  Clone the
1740 // computation per usage to allow it to sink out of the loop.
1741 void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
1742   bool is_raw_to_oop_cast = n->is_ConstraintCast() &&
1743                             n->in(1)->bottom_type()->isa_rawptr() &&
1744                             !n->bottom_type()->isa_rawptr();
1745   if (has_ctrl(n) &&
1746       !n->is_Phi() &&
1747       !n->is_Bool() &&
1748       !n->is_Proj() &&
1749       !n->is_MergeMem() &&
1750       !n->is_CMove() &&
1751       !is_raw_to_oop_cast && // don't extend live ranges of raw oops
1752       n->Opcode() != Op_Opaque4 &&
1753       !n->is_Type()) {
1754     Node *n_ctrl = get_ctrl(n);
1755     IdealLoopTree *n_loop = get_loop(n_ctrl);
1756 
1757     if (n->in(0) != NULL) {
1758       IdealLoopTree* loop_ctrl = get_loop(n->in(0));
1759       if (n_loop != loop_ctrl && n_loop->is_member(loop_ctrl)) {
1760         // n has a control input inside a loop but get_ctrl() is member of an outer loop. This could happen, for example,
1761         // for Div nodes inside a loop (control input inside loop) without a use except for an UCT (outside the loop).
1762         // Rewire control of n to right outside of the loop, regardless if its input(s) are later sunk or not.
1763         _igvn.replace_input_of(n, 0, place_outside_loop(n_ctrl, loop_ctrl));
1764       }
1765     }
1766     if (n_loop != _ltree_root && n->outcnt() > 1) {
1767       // Compute early control: needed for anti-dependence analysis. It's also possible that as a result of
1768       // previous transformations in this loop opts round, the node can be hoisted now: early control will tell us.
1769       Node* early_ctrl = compute_early_ctrl(n, n_ctrl);
1770       if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now
1771           ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops!
1772         assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect");
1773         Node* outer_loop_clone = NULL;
1774         for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) {
1775           Node* u = n->last_out(j); // Clone private computation per use
1776           _igvn.rehash_node_delayed(u);
1777           Node* x = n->clone(); // Clone computation
1778           Node* x_ctrl = NULL;
1779           if (u->is_Phi()) {
1780             // Replace all uses of normal nodes.  Replace Phi uses
1781             // individually, so the separate Nodes can sink down
1782             // different paths.
1783             uint k = 1;
1784             while (u->in(k) != n) k++;
1785             u->set_req(k, x);
1786             // x goes next to Phi input path
1787             x_ctrl = u->in(0)->in(k);
1788             // Find control for 'x' next to use but not inside inner loops.
1789             x_ctrl = place_outside_loop(x_ctrl, n_loop);
1790             --j;
1791           } else {              // Normal use
1792             if (has_ctrl(u)) {
1793               x_ctrl = get_ctrl(u);
1794             } else {
1795               x_ctrl = u->in(0);
1796             }
1797             // Find control for 'x' next to use but not inside inner loops.
1798             x_ctrl = place_outside_loop(x_ctrl, n_loop);
1799             // Replace all uses
1800             if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) {
1801               // If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary
1802               // anymore now that we're going to pin n as well
1803               _igvn.replace_node(u, x);
1804               --j;
1805             } else {
1806               int nb = u->replace_edge(n, x, &_igvn);
1807               j -= nb;
1808             }
1809           }
1810 
1811           if (n->is_Load()) {
1812             // For loads, add a control edge to a CFG node outside of the loop
1813             // to force them to not combine and return back inside the loop
1814             // during GVN optimization (4641526).
1815             assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked");
1816 
1817             IdealLoopTree* x_loop = get_loop(x_ctrl);
1818             Node* x_head = x_loop->_head;
1819             if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) {
1820               // Do not add duplicate LoadNodes to the outer strip mined loop
1821               if (outer_loop_clone != NULL) {
1822                 _igvn.replace_node(x, outer_loop_clone);
1823                 continue;
1824               }
1825               outer_loop_clone = x;
1826             }
1827             x->set_req(0, x_ctrl);
1828           } else if (n->in(0) != NULL){
1829             x->set_req(0, x_ctrl);
1830           }
1831           assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1832           assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop");
1833           register_new_node(x, x_ctrl);
1834 
1835           // Chain of AddP: (AddP base (AddP base )) must keep the same base after sinking so:
1836           // 1- We don't add a CastPP here when the first one is sunk so if the second one is not, their bases remain
1837           // the same.
1838           // (see 2- below)
1839           assert(!x->is_AddP() || !x->in(AddPNode::Address)->is_AddP() ||
1840                  x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base) ||
1841                  !x->in(AddPNode::Address)->in(AddPNode::Base)->eqv_uncast(x->in(AddPNode::Base)), "unexpected AddP shape");
1842           if (x->in(0) == NULL && !x->is_DecodeNarrowPtr() &&
1843               !(x->is_AddP() && x->in(AddPNode::Address)->is_AddP() && x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base))) {
1844             assert(!x->is_Load(), "load should be pinned");
1845             // Use a cast node to pin clone out of loop
1846             Node* cast = NULL;
1847             for (uint k = 0; k < x->req(); k++) {
1848               Node* in = x->in(k);
1849               if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) {
1850                 const Type* in_t = _igvn.type(in);
1851                 cast = ConstraintCastNode::make_cast_for_type(x_ctrl, in, in_t, ConstraintCastNode::UnconditionalDependency);
1852               }
1853               if (cast != NULL) {
1854                 register_new_node(cast, x_ctrl);
1855                 x->replace_edge(in, cast);
1856                 // Chain of AddP:
1857                 // 2- A CastPP of the base is only added now that both AddP nodes are sunk
1858                 if (x->is_AddP() && k == AddPNode::Base) {
1859                   for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
1860                     Node* u = x->fast_out(i);
1861                     if (u->is_AddP() && u->in(AddPNode::Base) == n->in(AddPNode::Base)) {
1862                       _igvn.replace_input_of(u, AddPNode::Base, cast);
1863                       assert(u->find_out_with(Op_AddP) == NULL, "more than 2 chained AddP nodes?");
1864                     }
1865                   }
1866                 }
1867                 break;
1868               }
1869             }
1870             assert(cast != NULL, "must have added a cast to pin the node");
1871           }
1872         }
1873         _igvn.remove_dead_node(n);
1874       }
1875       _dom_lca_tags_round = 0;
1876     }
1877   }
1878 }
1879 
1880 // Compute the early control of a node by following its inputs until we reach
1881 // nodes that are pinned. Then compute the LCA of the control of all pinned nodes.
1882 Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) {
1883   Node* early_ctrl = NULL;
1884   ResourceMark rm;
1885   Unique_Node_List wq;
1886   wq.push(n);
1887   for (uint i = 0; i < wq.size(); i++) {
1888     Node* m = wq.at(i);
1889     Node* c = NULL;
1890     if (m->is_CFG()) {
1891       c = m;
1892     } else if (m->pinned()) {
1893       c = m->in(0);
1894     } else {
1895       for (uint j = 0; j < m->req(); j++) {
1896         Node* in = m->in(j);
1897         if (in != NULL) {
1898           wq.push(in);
1899         }
1900       }
1901     }
1902     if (c != NULL) {
1903       assert(is_dominator(c, n_ctrl), "control input must dominate current control");
1904       if (early_ctrl == NULL || is_dominator(early_ctrl, c)) {
1905         early_ctrl = c;
1906       }
1907     }
1908   }
1909   assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control");
1910   return early_ctrl;
1911 }
1912 
1913 bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) {
1914   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1915     Node* u = n->fast_out(i);
1916     if (u->Opcode() == Op_Opaque1) {
1917       return false;  // Found loop limit, bugfix for 4677003
1918     }
1919     // We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to
1920     // get_late_ctrl_with_anti_dep() use their own tag
1921     _dom_lca_tags_round++;
1922     assert(_dom_lca_tags_round != 0, "shouldn't wrap around");
1923 
1924     if (u->is_Phi()) {
1925       for (uint j = 1; j < u->req(); ++j) {
1926         if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) {
1927           return false;
1928         }
1929       }
1930     } else {
1931       Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0);
1932       if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) {
1933         return false;
1934       }
1935     }
1936   }
1937   return true;
1938 }
1939 
1940 bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) {
1941   if (n->is_Load()) {
1942     ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl);
1943   }
1944   IdealLoopTree *u_loop = get_loop(ctrl);
1945   if (u_loop == n_loop) {
1946     return false; // Found loop-varying use
1947   }
1948   if (n_loop->is_member(u_loop)) {
1949     return false; // Found use in inner loop
1950   }
1951   return true;
1952 }
1953 
1954 //------------------------------split_if_with_blocks---------------------------
1955 // Check for aggressive application of 'split-if' optimization,
1956 // using basic block level info.
1957 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
1958   Node* root = C->root();
1959   visited.set(root->_idx); // first, mark root as visited
1960   // Do pre-visit work for root
1961   Node* n   = split_if_with_blocks_pre(root);
1962   uint  cnt = n->outcnt();
1963   uint  i   = 0;
1964 
1965   while (true) {
1966     // Visit all children
1967     if (i < cnt) {
1968       Node* use = n->raw_out(i);
1969       ++i;
1970       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1971         // Now do pre-visit work for this use
1972         use = split_if_with_blocks_pre(use);
1973         nstack.push(n, i); // Save parent and next use's index.
1974         n   = use;         // Process all children of current use.
1975         cnt = use->outcnt();
1976         i   = 0;
1977       }
1978     }
1979     else {
1980       // All of n's children have been processed, complete post-processing.
1981       if (cnt != 0 && !n->is_Con()) {
1982         assert(has_node(n), "no dead nodes");
1983         split_if_with_blocks_post(n);
1984       }
1985       if (must_throttle_split_if()) {
1986         nstack.clear();
1987       }
1988       if (nstack.is_empty()) {
1989         // Finished all nodes on stack.
1990         break;
1991       }
1992       // Get saved parent node and next use's index. Visit the rest of uses.
1993       n   = nstack.node();
1994       cnt = n->outcnt();
1995       i   = nstack.index();
1996       nstack.pop();
1997     }
1998   }
1999 }
2000 
2001 
2002 //=============================================================================
2003 //
2004 //                   C L O N E   A   L O O P   B O D Y
2005 //
2006 
2007 //------------------------------clone_iff--------------------------------------
2008 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
2009 // "Nearly" because all Nodes have been cloned from the original in the loop,
2010 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
2011 // through the Phi recursively, and return a Bool.
2012 Node* PhaseIdealLoop::clone_iff(PhiNode* phi) {
2013 
2014   // Convert this Phi into a Phi merging Bools
2015   uint i;
2016   for (i = 1; i < phi->req(); i++) {
2017     Node *b = phi->in(i);
2018     if (b->is_Phi()) {
2019       _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi()));
2020     } else {
2021       assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
2022     }
2023   }
2024 
2025   Node* n = phi->in(1);
2026   Node* sample_opaque = NULL;
2027   Node *sample_bool = NULL;
2028   if (n->Opcode() == Op_Opaque4) {
2029     sample_opaque = n;
2030     sample_bool = n->in(1);
2031     assert(sample_bool->is_Bool(), "wrong type");
2032   } else {
2033     sample_bool = n;
2034   }
2035   Node* sample_cmp = sample_bool->in(1);
2036   const Type* t = Type::TOP;
2037   const TypePtr* at = NULL;
2038   if (sample_cmp->is_FlatArrayCheck()) {
2039     // Left input of a FlatArrayCheckNode is memory, set the (adr) type of the phi accordingly
2040     assert(sample_cmp->in(1)->bottom_type() == Type::MEMORY, "unexpected input type");
2041     t = Type::MEMORY;
2042     at = TypeRawPtr::BOTTOM;
2043   }
2044 
2045   // Make Phis to merge the Cmp's inputs.
2046   PhiNode *phi1 = new PhiNode(phi->in(0), t, at);
2047   PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP);
2048   for (i = 1; i < phi->req(); i++) {
2049     Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1);
2050     Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2);
2051     phi1->set_req(i, n1);
2052     phi2->set_req(i, n2);
2053     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
2054     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
2055   }
2056   // See if these Phis have been made before.
2057   // Register with optimizer
2058   Node *hit1 = _igvn.hash_find_insert(phi1);
2059   if (hit1) {                   // Hit, toss just made Phi
2060     _igvn.remove_dead_node(phi1); // Remove new phi
2061     assert(hit1->is_Phi(), "" );
2062     phi1 = (PhiNode*)hit1;      // Use existing phi
2063   } else {                      // Miss
2064     _igvn.register_new_node_with_optimizer(phi1);
2065   }
2066   Node *hit2 = _igvn.hash_find_insert(phi2);
2067   if (hit2) {                   // Hit, toss just made Phi
2068     _igvn.remove_dead_node(phi2); // Remove new phi
2069     assert(hit2->is_Phi(), "" );
2070     phi2 = (PhiNode*)hit2;      // Use existing phi
2071   } else {                      // Miss
2072     _igvn.register_new_node_with_optimizer(phi2);
2073   }
2074   // Register Phis with loop/block info
2075   set_ctrl(phi1, phi->in(0));
2076   set_ctrl(phi2, phi->in(0));
2077   // Make a new Cmp
2078   Node *cmp = sample_cmp->clone();
2079   cmp->set_req(1, phi1);
2080   cmp->set_req(2, phi2);
2081   _igvn.register_new_node_with_optimizer(cmp);
2082   set_ctrl(cmp, phi->in(0));
2083 
2084   // Make a new Bool
2085   Node *b = sample_bool->clone();
2086   b->set_req(1,cmp);
2087   _igvn.register_new_node_with_optimizer(b);
2088   set_ctrl(b, phi->in(0));
2089 
2090   if (sample_opaque != NULL) {
2091     Node* opaque = sample_opaque->clone();
2092     opaque->set_req(1, b);
2093     _igvn.register_new_node_with_optimizer(opaque);
2094     set_ctrl(opaque, phi->in(0));
2095     return opaque;
2096   }
2097 
2098   assert(b->is_Bool(), "");
2099   return b;
2100 }
2101 
2102 //------------------------------clone_bool-------------------------------------
2103 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
2104 // "Nearly" because all Nodes have been cloned from the original in the loop,
2105 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
2106 // through the Phi recursively, and return a Bool.
2107 CmpNode*PhaseIdealLoop::clone_bool(PhiNode* phi) {
2108   uint i;
2109   // Convert this Phi into a Phi merging Bools
2110   for( i = 1; i < phi->req(); i++ ) {
2111     Node *b = phi->in(i);
2112     if( b->is_Phi() ) {
2113       _igvn.replace_input_of(phi, i, clone_bool(b->as_Phi()));
2114     } else {
2115       assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
2116     }
2117   }
2118 
2119   Node *sample_cmp = phi->in(1);
2120 
2121   // Make Phis to merge the Cmp's inputs.
2122   PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
2123   PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
2124   for( uint j = 1; j < phi->req(); j++ ) {
2125     Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
2126     Node *n1, *n2;
2127     if( cmp_top->is_Cmp() ) {
2128       n1 = cmp_top->in(1);
2129       n2 = cmp_top->in(2);
2130     } else {
2131       n1 = n2 = cmp_top;
2132     }
2133     phi1->set_req( j, n1 );
2134     phi2->set_req( j, n2 );
2135     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
2136     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
2137   }
2138 
2139   // See if these Phis have been made before.
2140   // Register with optimizer
2141   Node *hit1 = _igvn.hash_find_insert(phi1);
2142   if( hit1 ) {                  // Hit, toss just made Phi
2143     _igvn.remove_dead_node(phi1); // Remove new phi
2144     assert( hit1->is_Phi(), "" );
2145     phi1 = (PhiNode*)hit1;      // Use existing phi
2146   } else {                      // Miss
2147     _igvn.register_new_node_with_optimizer(phi1);
2148   }
2149   Node *hit2 = _igvn.hash_find_insert(phi2);
2150   if( hit2 ) {                  // Hit, toss just made Phi
2151     _igvn.remove_dead_node(phi2); // Remove new phi
2152     assert( hit2->is_Phi(), "" );
2153     phi2 = (PhiNode*)hit2;      // Use existing phi
2154   } else {                      // Miss
2155     _igvn.register_new_node_with_optimizer(phi2);
2156   }
2157   // Register Phis with loop/block info
2158   set_ctrl(phi1, phi->in(0));
2159   set_ctrl(phi2, phi->in(0));
2160   // Make a new Cmp
2161   Node *cmp = sample_cmp->clone();
2162   cmp->set_req( 1, phi1 );
2163   cmp->set_req( 2, phi2 );
2164   _igvn.register_new_node_with_optimizer(cmp);
2165   set_ctrl(cmp, phi->in(0));
2166 
2167   assert( cmp->is_Cmp(), "" );
2168   return (CmpNode*)cmp;
2169 }
2170 
2171 //------------------------------sink_use---------------------------------------
2172 // If 'use' was in the loop-exit block, it now needs to be sunk
2173 // below the post-loop merge point.
2174 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
2175   if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
2176     set_ctrl(use, post_loop);
2177     for (DUIterator j = use->outs(); use->has_out(j); j++)
2178       sink_use(use->out(j), post_loop);
2179   }
2180 }
2181 
2182 void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
2183                                                  IdealLoopTree* loop, IdealLoopTree* outer_loop,
2184                                                  Node_List*& split_if_set, Node_List*& split_bool_set,
2185                                                  Node_List*& split_cex_set, Node_List& worklist,
2186                                                  uint new_counter, CloneLoopMode mode) {
2187   Node* nnn = old_new[old->_idx];
2188   // Copy uses to a worklist, so I can munge the def-use info
2189   // with impunity.
2190   for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
2191     worklist.push(old->fast_out(j));
2192 
2193   while( worklist.size() ) {
2194     Node *use = worklist.pop();
2195     if (!has_node(use))  continue; // Ignore dead nodes
2196     if (use->in(0) == C->top())  continue;
2197     IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
2198     // Check for data-use outside of loop - at least one of OLD or USE
2199     // must not be a CFG node.
2200 #ifdef ASSERT
2201     if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
2202       Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
2203       assert(mode != IgnoreStripMined, "incorrect cloning mode");
2204       assert((mode == ControlAroundStripMined && use == sfpt) || !use->is_reachable_from_root(), "missed a node");
2205     }
2206 #endif
2207     if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
2208 
2209       // If the Data use is an IF, that means we have an IF outside of the
2210       // loop that is switching on a condition that is set inside of the
2211       // loop.  Happens if people set a loop-exit flag; then test the flag
2212       // in the loop to break the loop, then test is again outside of the
2213       // loop to determine which way the loop exited.
2214       // Loop predicate If node connects to Bool node through Opaque1 node.
2215       if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
2216         // Since this code is highly unlikely, we lazily build the worklist
2217         // of such Nodes to go split.
2218         if (!split_if_set) {
2219           split_if_set = new Node_List();
2220         }
2221         split_if_set->push(use);
2222       }
2223       if (use->is_Bool()) {
2224         if (!split_bool_set) {
2225           split_bool_set = new Node_List();
2226         }
2227         split_bool_set->push(use);
2228       }
2229       if (use->Opcode() == Op_CreateEx) {
2230         if (!split_cex_set) {
2231           split_cex_set = new Node_List();
2232         }
2233         split_cex_set->push(use);
2234       }
2235 
2236 
2237       // Get "block" use is in
2238       uint idx = 0;
2239       while( use->in(idx) != old ) idx++;
2240       Node *prev = use->is_CFG() ? use : get_ctrl(use);
2241       assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
2242       Node *cfg = prev->_idx >= new_counter
2243         ? prev->in(2)
2244         : idom(prev);
2245       if( use->is_Phi() )     // Phi use is in prior block
2246         cfg = prev->in(idx);  // NOT in block of Phi itself
2247       if (cfg->is_top()) {    // Use is dead?
2248         _igvn.replace_input_of(use, idx, C->top());
2249         continue;
2250       }
2251 
2252       // If use is referenced through control edge... (idx == 0)
2253       if (mode == IgnoreStripMined && idx == 0) {
2254         LoopNode *head = loop->_head->as_Loop();
2255         if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) {
2256           // That node is outside the inner loop, leave it outside the
2257           // outer loop as well to not confuse verification code.
2258           assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop");
2259           _igvn.replace_input_of(use, 0, head->outer_loop_exit());
2260           continue;
2261         }
2262       }
2263 
2264       while(!outer_loop->is_member(get_loop(cfg))) {
2265         prev = cfg;
2266         cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
2267       }
2268       // If the use occurs after merging several exits from the loop, then
2269       // old value must have dominated all those exits.  Since the same old
2270       // value was used on all those exits we did not need a Phi at this
2271       // merge point.  NOW we do need a Phi here.  Each loop exit value
2272       // is now merged with the peeled body exit; each exit gets its own
2273       // private Phi and those Phis need to be merged here.
2274       Node *phi;
2275       if( prev->is_Region() ) {
2276         if( idx == 0 ) {      // Updating control edge?
2277           phi = prev;         // Just use existing control
2278         } else {              // Else need a new Phi
2279           phi = PhiNode::make( prev, old );
2280           // Now recursively fix up the new uses of old!
2281           for( uint i = 1; i < prev->req(); i++ ) {
2282             worklist.push(phi); // Onto worklist once for each 'old' input
2283           }
2284         }
2285       } else {
2286         // Get new RegionNode merging old and new loop exits
2287         prev = old_new[prev->_idx];
2288         assert( prev, "just made this in step 7" );
2289         if( idx == 0) {      // Updating control edge?
2290           phi = prev;         // Just use existing control
2291         } else {              // Else need a new Phi
2292           // Make a new Phi merging data values properly
2293           phi = PhiNode::make( prev, old );
2294           phi->set_req( 1, nnn );
2295         }
2296       }
2297       // If inserting a new Phi, check for prior hits
2298       if( idx != 0 ) {
2299         Node *hit = _igvn.hash_find_insert(phi);
2300         if( hit == NULL ) {
2301           _igvn.register_new_node_with_optimizer(phi); // Register new phi
2302         } else {                                      // or
2303           // Remove the new phi from the graph and use the hit
2304           _igvn.remove_dead_node(phi);
2305           phi = hit;                                  // Use existing phi
2306         }
2307         set_ctrl(phi, prev);
2308       }
2309       // Make 'use' use the Phi instead of the old loop body exit value
2310       _igvn.replace_input_of(use, idx, phi);
2311       if( use->_idx >= new_counter ) { // If updating new phis
2312         // Not needed for correctness, but prevents a weak assert
2313         // in AddPNode from tripping (when we end up with different
2314         // base & derived Phis that will become the same after
2315         // IGVN does CSE).
2316         Node *hit = _igvn.hash_find_insert(use);
2317         if( hit )             // Go ahead and re-hash for hits.
2318           _igvn.replace_node( use, hit );
2319       }
2320 
2321       // If 'use' was in the loop-exit block, it now needs to be sunk
2322       // below the post-loop merge point.
2323       sink_use( use, prev );
2324     }
2325   }
2326 }
2327 
2328 static void clone_outer_loop_helper(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop,
2329                                     const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase,
2330                                     bool check_old_new) {
2331   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2332     Node* u = n->fast_out(j);
2333     assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned");
2334     if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) {
2335       Node* c = phase->get_ctrl(u);
2336       IdealLoopTree* u_loop = phase->get_loop(c);
2337       assert(!loop->is_member(u_loop), "can be in outer loop or out of both loops only");
2338       if (outer_loop->is_member(u_loop)) {
2339         wq.push(u);
2340       } else {
2341         // nodes pinned with control in the outer loop but not referenced from the safepoint must be moved out of
2342         // the outer loop too
2343         Node* u_c = u->in(0);
2344         if (u_c != NULL) {
2345           IdealLoopTree* u_c_loop = phase->get_loop(u_c);
2346           if (outer_loop->is_member(u_c_loop) && !loop->is_member(u_c_loop)) {
2347             wq.push(u);
2348           }
2349         }
2350       }
2351     }
2352   }
2353 }
2354 
2355 void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
2356                                       IdealLoopTree* outer_loop, int dd, Node_List &old_new,
2357                                       Node_List& extra_data_nodes) {
2358   if (head->is_strip_mined() && mode != IgnoreStripMined) {
2359     CountedLoopNode* cl = head->as_CountedLoop();
2360     Node* l = cl->outer_loop();
2361     Node* tail = cl->outer_loop_tail();
2362     IfNode* le = cl->outer_loop_end();
2363     Node* sfpt = cl->outer_safepoint();
2364     CountedLoopEndNode* cle = cl->loopexit();
2365     CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
2366     CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null();
2367     Node* cle_out = cle->proj_out(false);
2368 
2369     Node* new_sfpt = NULL;
2370     Node* new_cle_out = cle_out->clone();
2371     old_new.map(cle_out->_idx, new_cle_out);
2372     if (mode == CloneIncludesStripMined) {
2373       // clone outer loop body
2374       Node* new_l = l->clone();
2375       Node* new_tail = tail->clone();
2376       IfNode* new_le = le->clone()->as_If();
2377       new_sfpt = sfpt->clone();
2378 
2379       set_loop(new_l, outer_loop->_parent);
2380       set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
2381       set_loop(new_cle_out, outer_loop->_parent);
2382       set_idom(new_cle_out, new_cle, dd);
2383       set_loop(new_sfpt, outer_loop->_parent);
2384       set_idom(new_sfpt, new_cle_out, dd);
2385       set_loop(new_le, outer_loop->_parent);
2386       set_idom(new_le, new_sfpt, dd);
2387       set_loop(new_tail, outer_loop->_parent);
2388       set_idom(new_tail, new_le, dd);
2389       set_idom(new_cl, new_l, dd);
2390 
2391       old_new.map(l->_idx, new_l);
2392       old_new.map(tail->_idx, new_tail);
2393       old_new.map(le->_idx, new_le);
2394       old_new.map(sfpt->_idx, new_sfpt);
2395 
2396       new_l->set_req(LoopNode::LoopBackControl, new_tail);
2397       new_l->set_req(0, new_l);
2398       new_tail->set_req(0, new_le);
2399       new_le->set_req(0, new_sfpt);
2400       new_sfpt->set_req(0, new_cle_out);
2401       new_cle_out->set_req(0, new_cle);
2402       new_cl->set_req(LoopNode::EntryControl, new_l);
2403 
2404       _igvn.register_new_node_with_optimizer(new_l);
2405       _igvn.register_new_node_with_optimizer(new_tail);
2406       _igvn.register_new_node_with_optimizer(new_le);
2407     } else {
2408       Node *newhead = old_new[loop->_head->_idx];
2409       newhead->as_Loop()->clear_strip_mined();
2410       _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
2411       set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
2412     }
2413     // Look at data node that were assigned a control in the outer
2414     // loop: they are kept in the outer loop by the safepoint so start
2415     // from the safepoint node's inputs.
2416     IdealLoopTree* outer_loop = get_loop(l);
2417     Node_Stack stack(2);
2418     stack.push(sfpt, 1);
2419     uint new_counter = C->unique();
2420     while (stack.size() > 0) {
2421       Node* n = stack.node();
2422       uint i = stack.index();
2423       while (i < n->req() &&
2424              (n->in(i) == NULL ||
2425               !has_ctrl(n->in(i)) ||
2426               get_loop(get_ctrl(n->in(i))) != outer_loop ||
2427               (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
2428         i++;
2429       }
2430       if (i < n->req()) {
2431         stack.set_index(i+1);
2432         stack.push(n->in(i), 0);
2433       } else {
2434         assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
2435         Node* m = n == sfpt ? new_sfpt : n->clone();
2436         if (m != NULL) {
2437           for (uint i = 0; i < n->req(); i++) {
2438             if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
2439               m->set_req(i, old_new[m->in(i)->_idx]);
2440             }
2441           }
2442         } else {
2443           assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
2444         }
2445         if (n != sfpt) {
2446           extra_data_nodes.push(n);
2447           _igvn.register_new_node_with_optimizer(m);
2448           assert(get_ctrl(n) == cle_out, "what other control?");
2449           set_ctrl(m, new_cle_out);
2450           old_new.map(n->_idx, m);
2451         }
2452         stack.pop();
2453       }
2454     }
2455     if (mode == CloneIncludesStripMined) {
2456       _igvn.register_new_node_with_optimizer(new_sfpt);
2457       _igvn.register_new_node_with_optimizer(new_cle_out);
2458     }
2459     // Some other transformation may have pessimistically assigned some
2460     // data nodes to the outer loop. Set their control so they are out
2461     // of the outer loop.
2462     ResourceMark rm;
2463     Unique_Node_List wq;
2464     for (uint i = 0; i < extra_data_nodes.size(); i++) {
2465       Node* old = extra_data_nodes.at(i);
2466       clone_outer_loop_helper(old, loop, outer_loop, old_new, wq, this, true);
2467     }
2468 
2469     Node* inner_out = sfpt->in(0);
2470     if (inner_out->outcnt() > 1) {
2471       clone_outer_loop_helper(inner_out, loop, outer_loop, old_new, wq, this, true);
2472     }
2473 
2474     Node* new_ctrl = cl->outer_loop_exit();
2475     assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest");
2476     for (uint i = 0; i < wq.size(); i++) {
2477       Node* n = wq.at(i);
2478       set_ctrl(n, new_ctrl);
2479       if (n->in(0) != NULL) {
2480         _igvn.replace_input_of(n, 0, new_ctrl);
2481       }
2482       clone_outer_loop_helper(n, loop, outer_loop, old_new, wq, this, false);
2483     }
2484   } else {
2485     Node *newhead = old_new[loop->_head->_idx];
2486     set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
2487   }
2488 }
2489 
2490 //------------------------------clone_loop-------------------------------------
2491 //
2492 //                   C L O N E   A   L O O P   B O D Y
2493 //
2494 // This is the basic building block of the loop optimizations.  It clones an
2495 // entire loop body.  It makes an old_new loop body mapping; with this mapping
2496 // you can find the new-loop equivalent to an old-loop node.  All new-loop
2497 // nodes are exactly equal to their old-loop counterparts, all edges are the
2498 // same.  All exits from the old-loop now have a RegionNode that merges the
2499 // equivalent new-loop path.  This is true even for the normal "loop-exit"
2500 // condition.  All uses of loop-invariant old-loop values now come from (one
2501 // or more) Phis that merge their new-loop equivalents.
2502 //
2503 // This operation leaves the graph in an illegal state: there are two valid
2504 // control edges coming from the loop pre-header to both loop bodies.  I'll
2505 // definitely have to hack the graph after running this transform.
2506 //
2507 // From this building block I will further edit edges to perform loop peeling
2508 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
2509 //
2510 // Parameter side_by_size_idom:
2511 //   When side_by_size_idom is NULL, the dominator tree is constructed for
2512 //      the clone loop to dominate the original.  Used in construction of
2513 //      pre-main-post loop sequence.
2514 //   When nonnull, the clone and original are side-by-side, both are
2515 //      dominated by the side_by_side_idom node.  Used in construction of
2516 //      unswitched loops.
2517 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
2518                                 CloneLoopMode mode, Node* side_by_side_idom) {
2519 
2520   LoopNode* head = loop->_head->as_Loop();
2521   head->verify_strip_mined(1);
2522 
2523   if (C->do_vector_loop() && PrintOpto) {
2524     const char* mname = C->method()->name()->as_quoted_ascii();
2525     if (mname != NULL) {
2526       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
2527     }
2528   }
2529 
2530   CloneMap& cm = C->clone_map();
2531   if (C->do_vector_loop()) {
2532     cm.set_clone_idx(cm.max_gen()+1);
2533 #ifndef PRODUCT
2534     if (PrintOpto) {
2535       tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
2536       loop->dump_head();
2537     }
2538 #endif
2539   }
2540 
2541   // Step 1: Clone the loop body.  Make the old->new mapping.
2542   clone_loop_body(loop->_body, old_new, &cm);
2543 
2544   IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
2545 
2546   // Step 2: Fix the edges in the new body.  If the old input is outside the
2547   // loop use it.  If the old input is INside the loop, use the corresponding
2548   // new node instead.
2549   fix_body_edges(loop->_body, loop, old_new, dd, outer_loop->_parent, false);
2550 
2551   Node_List extra_data_nodes; // data nodes in the outer strip mined loop
2552   clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
2553 
2554   // Step 3: Now fix control uses.  Loop varying control uses have already
2555   // been fixed up (as part of all input edges in Step 2).  Loop invariant
2556   // control uses must be either an IfFalse or an IfTrue.  Make a merge
2557   // point to merge the old and new IfFalse/IfTrue nodes; make the use
2558   // refer to this.
2559   Node_List worklist;
2560   uint new_counter = C->unique();
2561   fix_ctrl_uses(loop->_body, loop, old_new, mode, side_by_side_idom, &cm, worklist);
2562 
2563   // Step 4: If loop-invariant use is not control, it must be dominated by a
2564   // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
2565   // there if needed.  Make a Phi there merging old and new used values.
2566   Node_List *split_if_set = NULL;
2567   Node_List *split_bool_set = NULL;
2568   Node_List *split_cex_set = NULL;
2569   fix_data_uses(loop->_body, loop, mode, outer_loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
2570 
2571   for (uint i = 0; i < extra_data_nodes.size(); i++) {
2572     Node* old = extra_data_nodes.at(i);
2573     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2574                                 split_bool_set, split_cex_set, worklist, new_counter,
2575                                 mode);
2576   }
2577 
2578   // Check for IFs that need splitting/cloning.  Happens if an IF outside of
2579   // the loop uses a condition set in the loop.  The original IF probably
2580   // takes control from one or more OLD Regions (which in turn get from NEW
2581   // Regions).  In any case, there will be a set of Phis for each merge point
2582   // from the IF up to where the original BOOL def exists the loop.
2583   finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
2584 
2585 }
2586 
2587 void PhaseIdealLoop::finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set) {
2588   if (split_if_set) {
2589     while (split_if_set->size()) {
2590       Node *iff = split_if_set->pop();
2591       if (iff->in(1)->is_Phi()) {
2592         Node *b = clone_iff(iff->in(1)->as_Phi());
2593         _igvn.replace_input_of(iff, 1, b);
2594       }
2595     }
2596   }
2597   if (split_bool_set) {
2598     while (split_bool_set->size()) {
2599       Node *b = split_bool_set->pop();
2600       Node *phi = b->in(1);
2601       assert(phi->is_Phi(), "");
2602       CmpNode *cmp = clone_bool((PhiNode*) phi);
2603       _igvn.replace_input_of(b, 1, cmp);
2604     }
2605   }
2606   if (split_cex_set) {
2607     while (split_cex_set->size()) {
2608       Node *b = split_cex_set->pop();
2609       assert(b->in(0)->is_Region(), "");
2610       assert(b->in(1)->is_Phi(), "");
2611       assert(b->in(0)->in(0) == b->in(1)->in(0), "");
2612       split_up(b, b->in(0), NULL);
2613     }
2614   }
2615 }
2616 
2617 void PhaseIdealLoop::fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop,
2618                                    uint new_counter, Node_List &old_new, Node_List &worklist, Node_List*& split_if_set,
2619                                    Node_List*& split_bool_set, Node_List*& split_cex_set) {
2620   for(uint i = 0; i < body.size(); i++ ) {
2621     Node* old = body.at(i);
2622     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2623                                 split_bool_set, split_cex_set, worklist, new_counter,
2624                                 mode);
2625   }
2626 }
2627 
2628 void PhaseIdealLoop::fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode,
2629                                    Node* side_by_side_idom, CloneMap* cm, Node_List &worklist) {
2630   LoopNode* head = loop->_head->as_Loop();
2631   for(uint i = 0; i < body.size(); i++ ) {
2632     Node* old = body.at(i);
2633     if( !old->is_CFG() ) continue;
2634 
2635     // Copy uses to a worklist, so I can munge the def-use info
2636     // with impunity.
2637     for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) {
2638       worklist.push(old->fast_out(j));
2639     }
2640 
2641     while (worklist.size()) {  // Visit all uses
2642       Node *use = worklist.pop();
2643       if (!has_node(use))  continue; // Ignore dead nodes
2644       IdealLoopTree *use_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use );
2645       if (!loop->is_member(use_loop) && use->is_CFG()) {
2646         // Both OLD and USE are CFG nodes here.
2647         assert(use->is_Proj(), "" );
2648         Node* nnn = old_new[old->_idx];
2649 
2650         Node* newuse = NULL;
2651         if (head->is_strip_mined() && mode != IgnoreStripMined) {
2652           CountedLoopNode* cl = head->as_CountedLoop();
2653           CountedLoopEndNode* cle = cl->loopexit();
2654           Node* cle_out = cle->proj_out_or_null(false);
2655           if (use == cle_out) {
2656             IfNode* le = cl->outer_loop_end();
2657             use = le->proj_out(false);
2658             use_loop = get_loop(use);
2659             if (mode == CloneIncludesStripMined) {
2660               nnn = old_new[le->_idx];
2661             } else {
2662               newuse = old_new[cle_out->_idx];
2663             }
2664           }
2665         }
2666         if (newuse == NULL) {
2667           newuse = use->clone();
2668         }
2669 
2670         // Clone the loop exit control projection
2671         if (C->do_vector_loop() && cm != NULL) {
2672           cm->verify_insert_and_clone(use, newuse, cm->clone_idx());
2673         }
2674         newuse->set_req(0,nnn);
2675         _igvn.register_new_node_with_optimizer(newuse);
2676         set_loop(newuse, use_loop);
2677         set_idom(newuse, nnn, dom_depth(nnn) + 1 );
2678 
2679         // We need a Region to merge the exit from the peeled body and the
2680         // exit from the old loop body.
2681         RegionNode *r = new RegionNode(3);
2682         // Map the old use to the new merge point
2683         old_new.map( use->_idx, r );
2684         uint dd_r = MIN2(dom_depth(newuse), dom_depth(use));
2685         assert(dd_r >= dom_depth(dom_lca(newuse, use)), "" );
2686 
2687         // The original user of 'use' uses 'r' instead.
2688         for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
2689           Node* useuse = use->last_out(l);
2690           _igvn.rehash_node_delayed(useuse);
2691           uint uses_found = 0;
2692           if (useuse->in(0) == use) {
2693             useuse->set_req(0, r);
2694             uses_found++;
2695             if (useuse->is_CFG()) {
2696               // This is not a dom_depth > dd_r because when new
2697               // control flow is constructed by a loop opt, a node and
2698               // its dominator can end up at the same dom_depth
2699               assert(dom_depth(useuse) >= dd_r, "");
2700               set_idom(useuse, r, dom_depth(useuse));
2701             }
2702           }
2703           for (uint k = 1; k < useuse->req(); k++) {
2704             if( useuse->in(k) == use ) {
2705               useuse->set_req(k, r);
2706               uses_found++;
2707               if (useuse->is_Loop() && k == LoopNode::EntryControl) {
2708                 // This is not a dom_depth > dd_r because when new
2709                 // control flow is constructed by a loop opt, a node
2710                 // and its dominator can end up at the same dom_depth
2711                 assert(dom_depth(useuse) >= dd_r , "");
2712                 set_idom(useuse, r, dom_depth(useuse));
2713               }
2714             }
2715           }
2716           l -= uses_found;    // we deleted 1 or more copies of this edge
2717         }
2718 
2719         // Now finish up 'r'
2720         r->set_req(1, newuse);
2721         r->set_req(2,    use);
2722         _igvn.register_new_node_with_optimizer(r);
2723         set_loop(r, use_loop);
2724         set_idom(r, (side_by_side_idom == NULL) ? newuse->in(0) : side_by_side_idom, dd_r);
2725       } // End of if a loop-exit test
2726     }
2727   }
2728 }
2729 
2730 void PhaseIdealLoop::fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd,
2731                                     IdealLoopTree* parent, bool partial) {
2732   for(uint i = 0; i < body.size(); i++ ) {
2733     Node *old = body.at(i);
2734     Node *nnn = old_new[old->_idx];
2735     // Fix CFG/Loop controlling the new node
2736     if (has_ctrl(old)) {
2737       set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
2738     } else {
2739       set_loop(nnn, parent);
2740       if (old->outcnt() > 0) {
2741         Node* dom = idom(old);
2742         if (old_new[dom->_idx] != NULL) {
2743           dom = old_new[dom->_idx];
2744           set_idom(nnn, dom, dd );
2745         }
2746       }
2747     }
2748     // Correct edges to the new node
2749     for (uint j = 0; j < nnn->req(); j++) {
2750         Node *n = nnn->in(j);
2751         if (n != NULL) {
2752           IdealLoopTree *old_in_loop = get_loop(has_ctrl(n) ? get_ctrl(n) : n);
2753           if (loop->is_member(old_in_loop)) {
2754             if (old_new[n->_idx] != NULL) {
2755               nnn->set_req(j, old_new[n->_idx]);
2756             } else {
2757               assert(!body.contains(n), "");
2758               assert(partial, "node not cloned");
2759             }
2760           }
2761         }
2762     }
2763     _igvn.hash_find_insert(nnn);
2764   }
2765 }
2766 
2767 void PhaseIdealLoop::clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm) {
2768   for (uint i = 0; i < body.size(); i++) {
2769     Node* old = body.at(i);
2770     Node* nnn = old->clone();
2771     old_new.map(old->_idx, nnn);
2772     if (old->is_reduction()) {
2773       // Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
2774       nnn->add_flag(Node::Flag_is_reduction);
2775     }
2776     if (C->do_vector_loop() && cm != NULL) {
2777       cm->verify_insert_and_clone(old, nnn, cm->clone_idx());
2778     }
2779     _igvn.register_new_node_with_optimizer(nnn);
2780   }
2781 }
2782 
2783 
2784 //---------------------- stride_of_possible_iv -------------------------------------
2785 // Looks for an iff/bool/comp with one operand of the compare
2786 // being a cycle involving an add and a phi,
2787 // with an optional truncation (left-shift followed by a right-shift)
2788 // of the add. Returns zero if not an iv.
2789 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
2790   Node* trunc1 = NULL;
2791   Node* trunc2 = NULL;
2792   const TypeInteger* ttype = NULL;
2793   if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
2794     return 0;
2795   }
2796   BoolNode* bl = iff->in(1)->as_Bool();
2797   Node* cmp = bl->in(1);
2798   if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
2799     return 0;
2800   }
2801   // Must have an invariant operand
2802   if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
2803     return 0;
2804   }
2805   Node* add2 = NULL;
2806   Node* cmp1 = cmp->in(1);
2807   if (cmp1->is_Phi()) {
2808     // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
2809     Node* phi = cmp1;
2810     for (uint i = 1; i < phi->req(); i++) {
2811       Node* in = phi->in(i);
2812       Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
2813                                 &trunc1, &trunc2, &ttype, T_INT);
2814       if (add && add->in(1) == phi) {
2815         add2 = add->in(2);
2816         break;
2817       }
2818     }
2819   } else {
2820     // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
2821     Node* addtrunc = cmp1;
2822     Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
2823                                 &trunc1, &trunc2, &ttype, T_INT);
2824     if (add && add->in(1)->is_Phi()) {
2825       Node* phi = add->in(1);
2826       for (uint i = 1; i < phi->req(); i++) {
2827         if (phi->in(i) == addtrunc) {
2828           add2 = add->in(2);
2829           break;
2830         }
2831       }
2832     }
2833   }
2834   if (add2 != NULL) {
2835     const TypeInt* add2t = _igvn.type(add2)->is_int();
2836     if (add2t->is_con()) {
2837       return add2t->get_con();
2838     }
2839   }
2840   return 0;
2841 }
2842 
2843 
2844 //---------------------- stay_in_loop -------------------------------------
2845 // Return the (unique) control output node that's in the loop (if it exists.)
2846 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
2847   Node* unique = NULL;
2848   if (!n) return NULL;
2849   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2850     Node* use = n->fast_out(i);
2851     if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
2852       if (unique != NULL) {
2853         return NULL;
2854       }
2855       unique = use;
2856     }
2857   }
2858   return unique;
2859 }
2860 
2861 //------------------------------ register_node -------------------------------------
2862 // Utility to register node "n" with PhaseIdealLoop
2863 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
2864   _igvn.register_new_node_with_optimizer(n);
2865   loop->_body.push(n);
2866   if (n->is_CFG()) {
2867     set_loop(n, loop);
2868     set_idom(n, pred, ddepth);
2869   } else {
2870     set_ctrl(n, pred);
2871   }
2872 }
2873 
2874 //------------------------------ proj_clone -------------------------------------
2875 // Utility to create an if-projection
2876 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
2877   ProjNode* c = p->clone()->as_Proj();
2878   c->set_req(0, iff);
2879   return c;
2880 }
2881 
2882 //------------------------------ short_circuit_if -------------------------------------
2883 // Force the iff control output to be the live_proj
2884 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2885   guarantee(live_proj != NULL, "null projection");
2886   int proj_con = live_proj->_con;
2887   assert(proj_con == 0 || proj_con == 1, "false or true projection");
2888   Node *con = _igvn.intcon(proj_con);
2889   set_ctrl(con, C->root());
2890   if (iff) {
2891     iff->set_req(1, con);
2892   }
2893   return con;
2894 }
2895 
2896 //------------------------------ insert_if_before_proj -------------------------------------
2897 // Insert a new if before an if projection (* - new node)
2898 //
2899 // before
2900 //           if(test)
2901 //           /     \
2902 //          v       v
2903 //    other-proj   proj (arg)
2904 //
2905 // after
2906 //           if(test)
2907 //           /     \
2908 //          /       v
2909 //         |      * proj-clone
2910 //         v          |
2911 //    other-proj      v
2912 //                * new_if(relop(cmp[IU](left,right)))
2913 //                  /  \
2914 //                 v    v
2915 //         * new-proj  proj
2916 //         (returned)
2917 //
2918 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2919   IfNode* iff = proj->in(0)->as_If();
2920   IdealLoopTree *loop = get_loop(proj);
2921   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2922   int ddepth = dom_depth(proj);
2923 
2924   _igvn.rehash_node_delayed(iff);
2925   _igvn.rehash_node_delayed(proj);
2926 
2927   proj->set_req(0, NULL);  // temporary disconnect
2928   ProjNode* proj2 = proj_clone(proj, iff);
2929   register_node(proj2, loop, iff, ddepth);
2930 
2931   Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2932   register_node(cmp, loop, proj2, ddepth);
2933 
2934   BoolNode* bol = new BoolNode(cmp, relop);
2935   register_node(bol, loop, proj2, ddepth);
2936 
2937   int opcode = iff->Opcode();
2938   assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2939   IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2940     new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2941   register_node(new_if, loop, proj2, ddepth);
2942 
2943   proj->set_req(0, new_if); // reattach
2944   set_idom(proj, new_if, ddepth);
2945 
2946   ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2947   guarantee(new_exit != NULL, "null exit node");
2948   register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2949 
2950   return new_exit;
2951 }
2952 
2953 //------------------------------ insert_region_before_proj -------------------------------------
2954 // Insert a region before an if projection (* - new node)
2955 //
2956 // before
2957 //           if(test)
2958 //          /      |
2959 //         v       |
2960 //       proj      v
2961 //               other-proj
2962 //
2963 // after
2964 //           if(test)
2965 //          /      |
2966 //         v       |
2967 // * proj-clone    v
2968 //         |     other-proj
2969 //         v
2970 // * new-region
2971 //         |
2972 //         v
2973 // *      dum_if
2974 //       /     \
2975 //      v       \
2976 // * dum-proj    v
2977 //              proj
2978 //
2979 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2980   IfNode* iff = proj->in(0)->as_If();
2981   IdealLoopTree *loop = get_loop(proj);
2982   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2983   int ddepth = dom_depth(proj);
2984 
2985   _igvn.rehash_node_delayed(iff);
2986   _igvn.rehash_node_delayed(proj);
2987 
2988   proj->set_req(0, NULL);  // temporary disconnect
2989   ProjNode* proj2 = proj_clone(proj, iff);
2990   register_node(proj2, loop, iff, ddepth);
2991 
2992   RegionNode* reg = new RegionNode(2);
2993   reg->set_req(1, proj2);
2994   register_node(reg, loop, iff, ddepth);
2995 
2996   IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2997   register_node(dum_if, loop, reg, ddepth);
2998 
2999   proj->set_req(0, dum_if); // reattach
3000   set_idom(proj, dum_if, ddepth);
3001 
3002   ProjNode* dum_proj = proj_clone(other_proj, dum_if);
3003   register_node(dum_proj, loop, dum_if, ddepth);
3004 
3005   return reg;
3006 }
3007 
3008 //------------------------------ insert_cmpi_loop_exit -------------------------------------
3009 // Clone a signed compare loop exit from an unsigned compare and
3010 // insert it before the unsigned cmp on the stay-in-loop path.
3011 // All new nodes inserted in the dominator tree between the original
3012 // if and it's projections.  The original if test is replaced with
3013 // a constant to force the stay-in-loop path.
3014 //
3015 // This is done to make sure that the original if and it's projections
3016 // still dominate the same set of control nodes, that the ctrl() relation
3017 // from data nodes to them is preserved, and that their loop nesting is
3018 // preserved.
3019 //
3020 // before
3021 //          if(i <u limit)    unsigned compare loop exit
3022 //         /       |
3023 //        v        v
3024 //   exit-proj   stay-in-loop-proj
3025 //
3026 // after
3027 //          if(stay-in-loop-const)  original if
3028 //         /       |
3029 //        /        v
3030 //       /  if(i <  limit)    new signed test
3031 //      /  /       |
3032 //     /  /        v
3033 //    /  /  if(i <u limit)    new cloned unsigned test
3034 //   /  /   /      |
3035 //   v  v  v       |
3036 //    region       |
3037 //        |        |
3038 //      dum-if     |
3039 //     /  |        |
3040 // ether  |        |
3041 //        v        v
3042 //   exit-proj   stay-in-loop-proj
3043 //
3044 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
3045   const bool Signed   = true;
3046   const bool Unsigned = false;
3047 
3048   BoolNode* bol = if_cmpu->in(1)->as_Bool();
3049   if (bol->_test._test != BoolTest::lt) return NULL;
3050   CmpNode* cmpu = bol->in(1)->as_Cmp();
3051   if (cmpu->Opcode() != Op_CmpU) return NULL;
3052   int stride = stride_of_possible_iv(if_cmpu);
3053   if (stride == 0) return NULL;
3054 
3055   Node* lp_proj = stay_in_loop(if_cmpu, loop);
3056   guarantee(lp_proj != NULL, "null loop node");
3057 
3058   ProjNode* lp_continue = lp_proj->as_Proj();
3059   ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
3060   if (!lp_exit->is_IfFalse()) {
3061     // The loop exit condition is (i <u limit) ==> (i >= 0 && i < limit).
3062     // We therefore can't add a single exit condition.
3063     return NULL;
3064   }
3065   // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
3066   // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
3067   Node* limit = NULL;
3068   if (stride > 0) {
3069     limit = cmpu->in(2);
3070   } else {
3071     limit = _igvn.makecon(TypeInt::ZERO);
3072     set_ctrl(limit, C->root());
3073   }
3074   // Create a new region on the exit path
3075   RegionNode* reg = insert_region_before_proj(lp_exit);
3076   guarantee(reg != NULL, "null region node");
3077 
3078   // Clone the if-cmpu-true-false using a signed compare
3079   BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
3080   ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
3081   reg->add_req(cmpi_exit);
3082 
3083   // Clone the if-cmpu-true-false
3084   BoolTest::mask rel_u = bol->_test._test;
3085   ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
3086   reg->add_req(cmpu_exit);
3087 
3088   // Force original if to stay in loop.
3089   short_circuit_if(if_cmpu, lp_continue);
3090 
3091   return cmpi_exit->in(0)->as_If();
3092 }
3093 
3094 //------------------------------ remove_cmpi_loop_exit -------------------------------------
3095 // Remove a previously inserted signed compare loop exit.
3096 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
3097   Node* lp_proj = stay_in_loop(if_cmp, loop);
3098   assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
3099          stay_in_loop(lp_proj, loop)->is_If() &&
3100          stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
3101   Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
3102   set_ctrl(con, C->root());
3103   if_cmp->set_req(1, con);
3104 }
3105 
3106 //------------------------------ scheduled_nodelist -------------------------------------
3107 // Create a post order schedule of nodes that are in the
3108 // "member" set.  The list is returned in "sched".
3109 // The first node in "sched" is the loop head, followed by
3110 // nodes which have no inputs in the "member" set, and then
3111 // followed by the nodes that have an immediate input dependence
3112 // on a node in "sched".
3113 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
3114 
3115   assert(member.test(loop->_head->_idx), "loop head must be in member set");
3116   VectorSet visited;
3117   Node_Stack nstack(loop->_body.size());
3118 
3119   Node* n  = loop->_head;  // top of stack is cached in "n"
3120   uint idx = 0;
3121   visited.set(n->_idx);
3122 
3123   // Initially push all with no inputs from within member set
3124   for(uint i = 0; i < loop->_body.size(); i++ ) {
3125     Node *elt = loop->_body.at(i);
3126     if (member.test(elt->_idx)) {
3127       bool found = false;
3128       for (uint j = 0; j < elt->req(); j++) {
3129         Node* def = elt->in(j);
3130         if (def && member.test(def->_idx) && def != elt) {
3131           found = true;
3132           break;
3133         }
3134       }
3135       if (!found && elt != loop->_head) {
3136         nstack.push(n, idx);
3137         n = elt;
3138         assert(!visited.test(n->_idx), "not seen yet");
3139         visited.set(n->_idx);
3140       }
3141     }
3142   }
3143 
3144   // traverse out's that are in the member set
3145   while (true) {
3146     if (idx < n->outcnt()) {
3147       Node* use = n->raw_out(idx);
3148       idx++;
3149       if (!visited.test_set(use->_idx)) {
3150         if (member.test(use->_idx)) {
3151           nstack.push(n, idx);
3152           n = use;
3153           idx = 0;
3154         }
3155       }
3156     } else {
3157       // All outputs processed
3158       sched.push(n);
3159       if (nstack.is_empty()) break;
3160       n   = nstack.node();
3161       idx = nstack.index();
3162       nstack.pop();
3163     }
3164   }
3165 }
3166 
3167 
3168 //------------------------------ has_use_in_set -------------------------------------
3169 // Has a use in the vector set
3170 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
3171   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3172     Node* use = n->fast_out(j);
3173     if (vset.test(use->_idx)) {
3174       return true;
3175     }
3176   }
3177   return false;
3178 }
3179 
3180 
3181 //------------------------------ has_use_internal_to_set -------------------------------------
3182 // Has use internal to the vector set (ie. not in a phi at the loop head)
3183 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
3184   Node* head  = loop->_head;
3185   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3186     Node* use = n->fast_out(j);
3187     if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
3188       return true;
3189     }
3190   }
3191   return false;
3192 }
3193 
3194 
3195 //------------------------------ clone_for_use_outside_loop -------------------------------------
3196 // clone "n" for uses that are outside of loop
3197 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
3198   int cloned = 0;
3199   assert(worklist.size() == 0, "should be empty");
3200   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3201     Node* use = n->fast_out(j);
3202     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
3203       worklist.push(use);
3204     }
3205   }
3206 
3207   if (C->check_node_count(worklist.size() + NodeLimitFudgeFactor,
3208                           "Too many clones required in clone_for_use_outside_loop in partial peeling")) {
3209     return -1;
3210   }
3211 
3212   while( worklist.size() ) {
3213     Node *use = worklist.pop();
3214     if (!has_node(use) || use->in(0) == C->top()) continue;
3215     uint j;
3216     for (j = 0; j < use->req(); j++) {
3217       if (use->in(j) == n) break;
3218     }
3219     assert(j < use->req(), "must be there");
3220 
3221     // clone "n" and insert it between the inputs of "n" and the use outside the loop
3222     Node* n_clone = n->clone();
3223     _igvn.replace_input_of(use, j, n_clone);
3224     cloned++;
3225     Node* use_c;
3226     if (!use->is_Phi()) {
3227       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
3228     } else {
3229       // Use in a phi is considered a use in the associated predecessor block
3230       use_c = use->in(0)->in(j);
3231     }
3232     set_ctrl(n_clone, use_c);
3233     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
3234     get_loop(use_c)->_body.push(n_clone);
3235     _igvn.register_new_node_with_optimizer(n_clone);
3236 #ifndef PRODUCT
3237     if (TracePartialPeeling) {
3238       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
3239     }
3240 #endif
3241   }
3242   return cloned;
3243 }
3244 
3245 
3246 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
3247 // clone "n" for special uses that are in the not_peeled region.
3248 // If these def-uses occur in separate blocks, the code generator
3249 // marks the method as not compilable.  For example, if a "BoolNode"
3250 // is in a different basic block than the "IfNode" that uses it, then
3251 // the compilation is aborted in the code generator.
3252 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
3253                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
3254   if (n->is_Phi() || n->is_Load()) {
3255     return;
3256   }
3257   assert(worklist.size() == 0, "should be empty");
3258   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3259     Node* use = n->fast_out(j);
3260     if ( not_peel.test(use->_idx) &&
3261          (use->is_If() || use->is_CMove() || use->is_Bool()) &&
3262          use->in(1) == n)  {
3263       worklist.push(use);
3264     }
3265   }
3266   if (worklist.size() > 0) {
3267     // clone "n" and insert it between inputs of "n" and the use
3268     Node* n_clone = n->clone();
3269     loop->_body.push(n_clone);
3270     _igvn.register_new_node_with_optimizer(n_clone);
3271     set_ctrl(n_clone, get_ctrl(n));
3272     sink_list.push(n_clone);
3273     not_peel.set(n_clone->_idx);
3274 #ifndef PRODUCT
3275     if (TracePartialPeeling) {
3276       tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
3277     }
3278 #endif
3279     while( worklist.size() ) {
3280       Node *use = worklist.pop();
3281       _igvn.rehash_node_delayed(use);
3282       for (uint j = 1; j < use->req(); j++) {
3283         if (use->in(j) == n) {
3284           use->set_req(j, n_clone);
3285         }
3286       }
3287     }
3288   }
3289 }
3290 
3291 
3292 //------------------------------ insert_phi_for_loop -------------------------------------
3293 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
3294 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
3295   Node *phi = PhiNode::make(lp, back_edge_val);
3296   phi->set_req(LoopNode::EntryControl, lp_entry_val);
3297   // Use existing phi if it already exists
3298   Node *hit = _igvn.hash_find_insert(phi);
3299   if( hit == NULL ) {
3300     _igvn.register_new_node_with_optimizer(phi);
3301     set_ctrl(phi, lp);
3302   } else {
3303     // Remove the new phi from the graph and use the hit
3304     _igvn.remove_dead_node(phi);
3305     phi = hit;
3306   }
3307   _igvn.replace_input_of(use, idx, phi);
3308 }
3309 
3310 #ifdef ASSERT
3311 //------------------------------ is_valid_loop_partition -------------------------------------
3312 // Validate the loop partition sets: peel and not_peel
3313 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
3314                                               VectorSet& not_peel ) {
3315   uint i;
3316   // Check that peel_list entries are in the peel set
3317   for (i = 0; i < peel_list.size(); i++) {
3318     if (!peel.test(peel_list.at(i)->_idx)) {
3319       return false;
3320     }
3321   }
3322   // Check at loop members are in one of peel set or not_peel set
3323   for (i = 0; i < loop->_body.size(); i++ ) {
3324     Node *def  = loop->_body.at(i);
3325     uint di = def->_idx;
3326     // Check that peel set elements are in peel_list
3327     if (peel.test(di)) {
3328       if (not_peel.test(di)) {
3329         return false;
3330       }
3331       // Must be in peel_list also
3332       bool found = false;
3333       for (uint j = 0; j < peel_list.size(); j++) {
3334         if (peel_list.at(j)->_idx == di) {
3335           found = true;
3336           break;
3337         }
3338       }
3339       if (!found) {
3340         return false;
3341       }
3342     } else if (not_peel.test(di)) {
3343       if (peel.test(di)) {
3344         return false;
3345       }
3346     } else {
3347       return false;
3348     }
3349   }
3350   return true;
3351 }
3352 
3353 //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
3354 // Ensure a use outside of loop is of the right form
3355 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
3356   Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
3357   return (use->is_Phi() &&
3358           use_c->is_Region() && use_c->req() == 3 &&
3359           (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
3360            use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
3361            use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
3362           loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
3363 }
3364 
3365 //------------------------------ is_valid_clone_loop_form -------------------------------------
3366 // Ensure that all uses outside of loop are of the right form
3367 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
3368                                                uint orig_exit_idx, uint clone_exit_idx) {
3369   uint len = peel_list.size();
3370   for (uint i = 0; i < len; i++) {
3371     Node *def = peel_list.at(i);
3372 
3373     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3374       Node *use = def->fast_out(j);
3375       Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
3376       if (!loop->is_member(get_loop(use_c))) {
3377         // use is not in the loop, check for correct structure
3378         if (use->in(0) == def) {
3379           // Okay
3380         } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
3381           return false;
3382         }
3383       }
3384     }
3385   }
3386   return true;
3387 }
3388 #endif
3389 
3390 //------------------------------ partial_peel -------------------------------------
3391 // Partially peel (aka loop rotation) the top portion of a loop (called
3392 // the peel section below) by cloning it and placing one copy just before
3393 // the new loop head and the other copy at the bottom of the new loop.
3394 //
3395 //    before                       after                where it came from
3396 //
3397 //    stmt1                        stmt1
3398 //  loop:                          stmt2                     clone
3399 //    stmt2                        if condA goto exitA       clone
3400 //    if condA goto exitA        new_loop:                   new
3401 //    stmt3                        stmt3                     clone
3402 //    if !condB goto loop          if condB goto exitB       clone
3403 //  exitB:                         stmt2                     orig
3404 //    stmt4                        if !condA goto new_loop   orig
3405 //  exitA:                         goto exitA
3406 //                               exitB:
3407 //                                 stmt4
3408 //                               exitA:
3409 //
3410 // Step 1: find the cut point: an exit test on probable
3411 //         induction variable.
3412 // Step 2: schedule (with cloning) operations in the peel
3413 //         section that can be executed after the cut into
3414 //         the section that is not peeled.  This may need
3415 //         to clone operations into exit blocks.  For
3416 //         instance, a reference to A[i] in the not-peel
3417 //         section and a reference to B[i] in an exit block
3418 //         may cause a left-shift of i by 2 to be placed
3419 //         in the peel block.  This step will clone the left
3420 //         shift into the exit block and sink the left shift
3421 //         from the peel to the not-peel section.
3422 // Step 3: clone the loop, retarget the control, and insert
3423 //         phis for values that are live across the new loop
3424 //         head.  This is very dependent on the graph structure
3425 //         from clone_loop.  It creates region nodes for
3426 //         exit control and associated phi nodes for values
3427 //         flow out of the loop through that exit.  The region
3428 //         node is dominated by the clone's control projection.
3429 //         So the clone's peel section is placed before the
3430 //         new loop head, and the clone's not-peel section is
3431 //         forms the top part of the new loop.  The original
3432 //         peel section forms the tail of the new loop.
3433 // Step 4: update the dominator tree and recompute the
3434 //         dominator depth.
3435 //
3436 //                   orig
3437 //
3438 //                   stmt1
3439 //                     |
3440 //                     v
3441 //               loop predicate
3442 //                     |
3443 //                     v
3444 //                   loop<----+
3445 //                     |      |
3446 //                   stmt2    |
3447 //                     |      |
3448 //                     v      |
3449 //                    ifA     |
3450 //                   / |      |
3451 //                  v  v      |
3452 //               false true   ^  <-- last_peel
3453 //               /     |      |
3454 //              /   ===|==cut |
3455 //             /     stmt3    |  <-- first_not_peel
3456 //            /        |      |
3457 //            |        v      |
3458 //            v       ifB     |
3459 //          exitA:   / \      |
3460 //                  /   \     |
3461 //                 v     v    |
3462 //               false true   |
3463 //               /       \    |
3464 //              /         ----+
3465 //             |
3466 //             v
3467 //           exitB:
3468 //           stmt4
3469 //
3470 //
3471 //            after clone loop
3472 //
3473 //                   stmt1
3474 //                     |
3475 //                     v
3476 //               loop predicate
3477 //                 /       \
3478 //        clone   /         \   orig
3479 //               /           \
3480 //              /             \
3481 //             v               v
3482 //   +---->loop                loop<----+
3483 //   |      |                    |      |
3484 //   |    stmt2                stmt2    |
3485 //   |      |                    |      |
3486 //   |      v                    v      |
3487 //   |      ifA                 ifA     |
3488 //   |      | \                / |      |
3489 //   |      v  v              v  v      |
3490 //   ^    true  false      false true   ^  <-- last_peel
3491 //   |      |   ^   \       /    |      |
3492 //   | cut==|==  \   \     /  ===|==cut |
3493 //   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
3494 //   |      |    dom   | |       |      |
3495 //   |      v      \  1v v2      v      |
3496 //   |      ifB     regionA     ifB     |
3497 //   |      / \        |       / \      |
3498 //   |     /   \       v      /   \     |
3499 //   |    v     v    exitA:  v     v    |
3500 //   |    true  false      false true   |
3501 //   |    /     ^   \      /       \    |
3502 //   +----       \   \    /         ----+
3503 //               dom  \  /
3504 //                 \  1v v2
3505 //                  regionB
3506 //                     |
3507 //                     v
3508 //                   exitB:
3509 //                   stmt4
3510 //
3511 //
3512 //           after partial peel
3513 //
3514 //                  stmt1
3515 //                     |
3516 //                     v
3517 //               loop predicate
3518 //                 /
3519 //        clone   /             orig
3520 //               /          TOP
3521 //              /             \
3522 //             v               v
3523 //    TOP->loop                loop----+
3524 //          |                    |      |
3525 //        stmt2                stmt2    |
3526 //          |                    |      |
3527 //          v                    v      |
3528 //          ifA                 ifA     |
3529 //          | \                / |      |
3530 //          v  v              v  v      |
3531 //        true  false      false true   |     <-- last_peel
3532 //          |   ^   \       /    +------|---+
3533 //  +->newloop   \   \     /  === ==cut |   |
3534 //  |     stmt3   \   \   /     TOP     |   |
3535 //  |       |    dom   | |      stmt3   |   | <-- first_not_peel
3536 //  |       v      \  1v v2      v      |   |
3537 //  |       ifB     regionA     ifB     ^   v
3538 //  |       / \        |       / \      |   |
3539 //  |      /   \       v      /   \     |   |
3540 //  |     v     v    exitA:  v     v    |   |
3541 //  |     true  false      false true   |   |
3542 //  |     /     ^   \      /       \    |   |
3543 //  |    |       \   \    /         v   |   |
3544 //  |    |       dom  \  /         TOP  |   |
3545 //  |    |         \  1v v2             |   |
3546 //  ^    v          regionB             |   |
3547 //  |    |             |                |   |
3548 //  |    |             v                ^   v
3549 //  |    |           exitB:             |   |
3550 //  |    |           stmt4              |   |
3551 //  |    +------------>-----------------+   |
3552 //  |                                       |
3553 //  +-----------------<---------------------+
3554 //
3555 //
3556 //              final graph
3557 //
3558 //                  stmt1
3559 //                    |
3560 //                    v
3561 //               loop predicate
3562 //                    |
3563 //                    v
3564 //                  stmt2 clone
3565 //                    |
3566 //                    v
3567 //         ........> ifA clone
3568 //         :        / |
3569 //        dom      /  |
3570 //         :      v   v
3571 //         :  false   true
3572 //         :  |       |
3573 //         :  |       v
3574 //         :  |    newloop<-----+
3575 //         :  |        |        |
3576 //         :  |     stmt3 clone |
3577 //         :  |        |        |
3578 //         :  |        v        |
3579 //         :  |       ifB       |
3580 //         :  |      / \        |
3581 //         :  |     v   v       |
3582 //         :  |  false true     |
3583 //         :  |   |     |       |
3584 //         :  |   v    stmt2    |
3585 //         :  | exitB:  |       |
3586 //         :  | stmt4   v       |
3587 //         :  |       ifA orig  |
3588 //         :  |      /  \       |
3589 //         :  |     /    \      |
3590 //         :  |    v     v      |
3591 //         :  |  false  true    |
3592 //         :  |  /        \     |
3593 //         :  v  v         -----+
3594 //          RegionA
3595 //             |
3596 //             v
3597 //           exitA
3598 //
3599 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3600 
3601   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3602   if (!loop->_head->is_Loop()) {
3603     return false;
3604   }
3605   LoopNode *head = loop->_head->as_Loop();
3606 
3607   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3608     return false;
3609   }
3610 
3611   // Check for complex exit control
3612   for (uint ii = 0; ii < loop->_body.size(); ii++) {
3613     Node *n = loop->_body.at(ii);
3614     int opc = n->Opcode();
3615     if (n->is_Call()        ||
3616         opc == Op_Catch     ||
3617         opc == Op_CatchProj ||
3618         opc == Op_Jump      ||
3619         opc == Op_JumpProj) {
3620 #ifndef PRODUCT
3621       if (TracePartialPeeling) {
3622         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3623       }
3624 #endif
3625       return false;
3626     }
3627   }
3628 
3629   int dd = dom_depth(head);
3630 
3631   // Step 1: find cut point
3632 
3633   // Walk up dominators to loop head looking for first loop exit
3634   // which is executed on every path thru loop.
3635   IfNode *peel_if = NULL;
3636   IfNode *peel_if_cmpu = NULL;
3637 
3638   Node *iff = loop->tail();
3639   while (iff != head) {
3640     if (iff->is_If()) {
3641       Node *ctrl = get_ctrl(iff->in(1));
3642       if (ctrl->is_top()) return false; // Dead test on live IF.
3643       // If loop-varying exit-test, check for induction variable
3644       if (loop->is_member(get_loop(ctrl)) &&
3645           loop->is_loop_exit(iff) &&
3646           is_possible_iv_test(iff)) {
3647         Node* cmp = iff->in(1)->in(1);
3648         if (cmp->Opcode() == Op_CmpI) {
3649           peel_if = iff->as_If();
3650         } else {
3651           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3652           peel_if_cmpu = iff->as_If();
3653         }
3654       }
3655     }
3656     iff = idom(iff);
3657   }
3658 
3659   // Prefer signed compare over unsigned compare.
3660   IfNode* new_peel_if = NULL;
3661   if (peel_if == NULL) {
3662     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3663       return false;   // No peel point found
3664     }
3665     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3666     if (new_peel_if == NULL) {
3667       return false;   // No peel point found
3668     }
3669     peel_if = new_peel_if;
3670   }
3671   Node* last_peel        = stay_in_loop(peel_if, loop);
3672   Node* first_not_peeled = stay_in_loop(last_peel, loop);
3673   if (first_not_peeled == NULL || first_not_peeled == head) {
3674     return false;
3675   }
3676 
3677 #ifndef PRODUCT
3678   if (TraceLoopOpts) {
3679     tty->print("PartialPeel  ");
3680     loop->dump_head();
3681   }
3682 
3683   if (TracePartialPeeling) {
3684     tty->print_cr("before partial peel one iteration");
3685     Node_List wl;
3686     Node* t = head->in(2);
3687     while (true) {
3688       wl.push(t);
3689       if (t == head) break;
3690       t = idom(t);
3691     }
3692     while (wl.size() > 0) {
3693       Node* tt = wl.pop();
3694       tt->dump();
3695       if (tt == last_peel) tty->print_cr("-- cut --");
3696     }
3697   }
3698 #endif
3699   VectorSet peel;
3700   VectorSet not_peel;
3701   Node_List peel_list;
3702   Node_List worklist;
3703   Node_List sink_list;
3704 
3705   uint estimate = loop->est_loop_clone_sz(1);
3706   if (exceeding_node_budget(estimate)) {
3707     return false;
3708   }
3709 
3710   // Set of cfg nodes to peel are those that are executable from
3711   // the head through last_peel.
3712   assert(worklist.size() == 0, "should be empty");
3713   worklist.push(head);
3714   peel.set(head->_idx);
3715   while (worklist.size() > 0) {
3716     Node *n = worklist.pop();
3717     if (n != last_peel) {
3718       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3719         Node* use = n->fast_out(j);
3720         if (use->is_CFG() &&
3721             loop->is_member(get_loop(use)) &&
3722             !peel.test_set(use->_idx)) {
3723           worklist.push(use);
3724         }
3725       }
3726     }
3727   }
3728 
3729   // Set of non-cfg nodes to peel are those that are control
3730   // dependent on the cfg nodes.
3731   for (uint i = 0; i < loop->_body.size(); i++) {
3732     Node *n = loop->_body.at(i);
3733     Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
3734     if (peel.test(n_c->_idx)) {
3735       peel.set(n->_idx);
3736     } else {
3737       not_peel.set(n->_idx);
3738     }
3739   }
3740 
3741   // Step 2: move operations from the peeled section down into the
3742   //         not-peeled section
3743 
3744   // Get a post order schedule of nodes in the peel region
3745   // Result in right-most operand.
3746   scheduled_nodelist(loop, peel, peel_list);
3747 
3748   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3749 
3750   // For future check for too many new phis
3751   uint old_phi_cnt = 0;
3752   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
3753     Node* use = head->fast_out(j);
3754     if (use->is_Phi()) old_phi_cnt++;
3755   }
3756 
3757 #ifndef PRODUCT
3758   if (TracePartialPeeling) {
3759     tty->print_cr("\npeeled list");
3760   }
3761 #endif
3762 
3763   // Evacuate nodes in peel region into the not_peeled region if possible
3764   bool too_many_clones = false;
3765   uint new_phi_cnt = 0;
3766   uint cloned_for_outside_use = 0;
3767   for (uint i = 0; i < peel_list.size();) {
3768     Node* n = peel_list.at(i);
3769 #ifndef PRODUCT
3770   if (TracePartialPeeling) n->dump();
3771 #endif
3772     bool incr = true;
3773     if (!n->is_CFG()) {
3774       if (has_use_in_set(n, not_peel)) {
3775         // If not used internal to the peeled region,
3776         // move "n" from peeled to not_peeled region.
3777         if (!has_use_internal_to_set(n, peel, loop)) {
3778           // if not pinned and not a load (which maybe anti-dependent on a store)
3779           // and not a CMove (Matcher expects only bool->cmove).
3780           if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) {
3781             int new_clones = clone_for_use_outside_loop(loop, n, worklist);
3782             if (new_clones == -1) {
3783               too_many_clones = true;
3784               break;
3785             }
3786             cloned_for_outside_use += new_clones;
3787             sink_list.push(n);
3788             peel.remove(n->_idx);
3789             not_peel.set(n->_idx);
3790             peel_list.remove(i);
3791             incr = false;
3792 #ifndef PRODUCT
3793             if (TracePartialPeeling) {
3794               tty->print_cr("sink to not_peeled region: %d newbb: %d",
3795                             n->_idx, get_ctrl(n)->_idx);
3796             }
3797 #endif
3798           }
3799         } else {
3800           // Otherwise check for special def-use cases that span
3801           // the peel/not_peel boundary such as bool->if
3802           clone_for_special_use_inside_loop(loop, n, not_peel, sink_list, worklist);
3803           new_phi_cnt++;
3804         }
3805       }
3806     }
3807     if (incr) i++;
3808   }
3809 
3810   estimate += cloned_for_outside_use + new_phi_cnt;
3811   bool exceed_node_budget = !may_require_nodes(estimate);
3812   bool exceed_phi_limit = new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta;
3813 
3814   if (too_many_clones || exceed_node_budget || exceed_phi_limit) {
3815 #ifndef PRODUCT
3816     if (TracePartialPeeling && exceed_phi_limit) {
3817       tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
3818                     new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
3819     }
3820 #endif
3821     if (new_peel_if != NULL) {
3822       remove_cmpi_loop_exit(new_peel_if, loop);
3823     }
3824     // Inhibit more partial peeling on this loop
3825     assert(!head->is_partial_peel_loop(), "not partial peeled");
3826     head->mark_partial_peel_failed();
3827     if (cloned_for_outside_use > 0) {
3828       // Terminate this round of loop opts because
3829       // the graph outside this loop was changed.
3830       C->set_major_progress();
3831       return true;
3832     }
3833     return false;
3834   }
3835 
3836   // Step 3: clone loop, retarget control, and insert new phis
3837 
3838   // Create new loop head for new phis and to hang
3839   // the nodes being moved (sinked) from the peel region.
3840   LoopNode* new_head = new LoopNode(last_peel, last_peel);
3841   new_head->set_unswitch_count(head->unswitch_count()); // Preserve
3842   _igvn.register_new_node_with_optimizer(new_head);
3843   assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
3844   _igvn.replace_input_of(first_not_peeled, 0, new_head);
3845   set_loop(new_head, loop);
3846   loop->_body.push(new_head);
3847   not_peel.set(new_head->_idx);
3848   set_idom(new_head, last_peel, dom_depth(first_not_peeled));
3849   set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
3850 
3851   while (sink_list.size() > 0) {
3852     Node* n = sink_list.pop();
3853     set_ctrl(n, new_head);
3854   }
3855 
3856   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3857 
3858   clone_loop(loop, old_new, dd, IgnoreStripMined);
3859 
3860   const uint clone_exit_idx = 1;
3861   const uint orig_exit_idx  = 2;
3862   assert(is_valid_clone_loop_form(loop, peel_list, orig_exit_idx, clone_exit_idx), "bad clone loop");
3863 
3864   Node* head_clone             = old_new[head->_idx];
3865   LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
3866   Node* orig_tail_clone        = head_clone->in(2);
3867 
3868   // Add phi if "def" node is in peel set and "use" is not
3869 
3870   for (uint i = 0; i < peel_list.size(); i++) {
3871     Node *def  = peel_list.at(i);
3872     if (!def->is_CFG()) {
3873       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3874         Node *use = def->fast_out(j);
3875         if (has_node(use) && use->in(0) != C->top() &&
3876             (!peel.test(use->_idx) ||
3877              (use->is_Phi() && use->in(0) == head)) ) {
3878           worklist.push(use);
3879         }
3880       }
3881       while( worklist.size() ) {
3882         Node *use = worklist.pop();
3883         for (uint j = 1; j < use->req(); j++) {
3884           Node* n = use->in(j);
3885           if (n == def) {
3886 
3887             // "def" is in peel set, "use" is not in peel set
3888             // or "use" is in the entry boundary (a phi) of the peel set
3889 
3890             Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
3891 
3892             if ( loop->is_member(get_loop( use_c )) ) {
3893               // use is in loop
3894               if (old_new[use->_idx] != NULL) { // null for dead code
3895                 Node* use_clone = old_new[use->_idx];
3896                 _igvn.replace_input_of(use, j, C->top());
3897                 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
3898               }
3899             } else {
3900               assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
3901               // use is not in the loop, check if the live range includes the cut
3902               Node* lp_if = use_c->in(orig_exit_idx)->in(0);
3903               if (not_peel.test(lp_if->_idx)) {
3904                 assert(j == orig_exit_idx, "use from original loop");
3905                 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3906               }
3907             }
3908           }
3909         }
3910       }
3911     }
3912   }
3913 
3914   // Step 3b: retarget control
3915 
3916   // Redirect control to the new loop head if a cloned node in
3917   // the not_peeled region has control that points into the peeled region.
3918   // This necessary because the cloned peeled region will be outside
3919   // the loop.
3920   //                            from    to
3921   //          cloned-peeled    <---+
3922   //    new_head_clone:            |    <--+
3923   //          cloned-not_peeled  in(0)    in(0)
3924   //          orig-peeled
3925 
3926   for (uint i = 0; i < loop->_body.size(); i++) {
3927     Node *n = loop->_body.at(i);
3928     if (!n->is_CFG()           && n->in(0) != NULL        &&
3929         not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3930       Node* n_clone = old_new[n->_idx];
3931       _igvn.replace_input_of(n_clone, 0, new_head_clone);
3932     }
3933   }
3934 
3935   // Backedge of the surviving new_head (the clone) is original last_peel
3936   _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3937 
3938   // Cut first node in original not_peel set
3939   _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
3940   new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
3941   new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
3942 
3943   // Copy head_clone back-branch info to original head
3944   // and remove original head's loop entry and
3945   // clone head's back-branch
3946   _igvn.rehash_node_delayed(head); // Multiple edge updates
3947   head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
3948   head->set_req(LoopNode::LoopBackControl, C->top());
3949   _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3950 
3951   // Similarly modify the phis
3952   for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3953     Node* use = head->fast_out(k);
3954     if (use->is_Phi() && use->outcnt() > 0) {
3955       Node* use_clone = old_new[use->_idx];
3956       _igvn.rehash_node_delayed(use); // Multiple edge updates
3957       use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
3958       use->set_req(LoopNode::LoopBackControl, C->top());
3959       _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3960     }
3961   }
3962 
3963   // Step 4: update dominator tree and dominator depth
3964 
3965   set_idom(head, orig_tail_clone, dd);
3966   recompute_dom_depth();
3967 
3968   // Inhibit more partial peeling on this loop
3969   new_head_clone->set_partial_peel_loop();
3970   C->set_major_progress();
3971   loop->record_for_igvn();
3972 
3973 #ifndef PRODUCT
3974   if (TracePartialPeeling) {
3975     tty->print_cr("\nafter partial peel one iteration");
3976     Node_List wl;
3977     Node* t = last_peel;
3978     while (true) {
3979       wl.push(t);
3980       if (t == head_clone) break;
3981       t = idom(t);
3982     }
3983     while (wl.size() > 0) {
3984       Node* tt = wl.pop();
3985       if (tt == head) tty->print_cr("orig head");
3986       else if (tt == new_head_clone) tty->print_cr("new head");
3987       else if (tt == head_clone) tty->print_cr("clone head");
3988       tt->dump();
3989     }
3990   }
3991 #endif
3992   return true;
3993 }
3994 
3995 // Transform:
3996 //
3997 // loop<-----------------+
3998 //  |                    |
3999 // stmt1 stmt2 .. stmtn  |
4000 //  |     |        |     |
4001 //  \     |       /      |
4002 //    v   v     v        |
4003 //       region          |
4004 //         |             |
4005 //     shared_stmt       |
4006 //         |             |
4007 //         v             |
4008 //         if            |
4009 //         / \           |
4010 //        |   -----------+
4011 //        v
4012 //
4013 // into:
4014 //
4015 //    loop<-------------------+
4016 //     |                      |
4017 //     v                      |
4018 // +->loop                    |
4019 // |   |                      |
4020 // |  stmt1 stmt2 .. stmtn    |
4021 // |   |     |        |       |
4022 // |   |      \       /       |
4023 // |   |       v     v        |
4024 // |   |        region1       |
4025 // |   |           |          |
4026 // |  shared_stmt shared_stmt |
4027 // |   |           |          |
4028 // |   v           v          |
4029 // |   if          if         |
4030 // |   /\          / \        |
4031 // +--   |         |   -------+
4032 //       \         /
4033 //        v       v
4034 //         region2
4035 //
4036 // (region2 is shown to merge mirrored projections of the loop exit
4037 // ifs to make the diagram clearer but they really merge the same
4038 // projection)
4039 //
4040 // Conditions for this transformation to trigger:
4041 // - the path through stmt1 is frequent enough
4042 // - the inner loop will be turned into a counted loop after transformation
4043 bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new) {
4044   if (!DuplicateBackedge) {
4045     return false;
4046   }
4047   assert(!loop->_head->is_CountedLoop() || StressDuplicateBackedge, "Non-counted loop only");
4048   if (!loop->_head->is_Loop()) {
4049     return false;
4050   }
4051 
4052   uint estimate = loop->est_loop_clone_sz(1);
4053   if (exceeding_node_budget(estimate)) {
4054     return false;
4055   }
4056 
4057   LoopNode *head = loop->_head->as_Loop();
4058 
4059   Node* region = NULL;
4060   IfNode* exit_test = NULL;
4061   uint inner;
4062   float f;
4063   if (StressDuplicateBackedge) {
4064     if (head->is_strip_mined()) {
4065       return false;
4066     }
4067     Node* c = head->in(LoopNode::LoopBackControl);
4068 
4069     while (c != head) {
4070       if (c->is_Region()) {
4071         region = c;
4072       }
4073       c = idom(c);
4074     }
4075 
4076     if (region == NULL) {
4077       return false;
4078     }
4079 
4080     inner = 1;
4081   } else {
4082     // Is the shape of the loop that of a counted loop...
4083     Node* back_control = loop_exit_control(head, loop);
4084     if (back_control == NULL) {
4085       return false;
4086     }
4087 
4088     BoolTest::mask bt = BoolTest::illegal;
4089     float cl_prob = 0;
4090     Node* incr = NULL;
4091     Node* limit = NULL;
4092     Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
4093     if (cmp == NULL || cmp->Opcode() != Op_CmpI) {
4094       return false;
4095     }
4096 
4097     // With an extra phi for the candidate iv?
4098     // Or the region node is the loop head
4099     if (!incr->is_Phi() || incr->in(0) == head) {
4100       return false;
4101     }
4102 
4103     PathFrequency pf(head, this);
4104     region = incr->in(0);
4105 
4106     // Go over all paths for the extra phi's region and see if that
4107     // path is frequent enough and would match the expected iv shape
4108     // if the extra phi is removed
4109     inner = 0;
4110     for (uint i = 1; i < incr->req(); ++i) {
4111       Node* in = incr->in(i);
4112       Node* trunc1 = NULL;
4113       Node* trunc2 = NULL;
4114       const TypeInteger* iv_trunc_t = NULL;
4115       Node* orig_in = in;
4116       if (!(in = CountedLoopNode::match_incr_with_optional_truncation(in, &trunc1, &trunc2, &iv_trunc_t, T_INT))) {
4117         continue;
4118       }
4119       assert(in->Opcode() == Op_AddI, "wrong increment code");
4120       Node* xphi = NULL;
4121       Node* stride = loop_iv_stride(in, loop, xphi);
4122 
4123       if (stride == NULL) {
4124         continue;
4125       }
4126 
4127       PhiNode* phi = loop_iv_phi(xphi, NULL, head, loop);
4128       if (phi == NULL ||
4129           (trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr) ||
4130           (trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1)) {
4131         return false;
4132       }
4133 
4134       f = pf.to(region->in(i));
4135       if (f > 0.5) {
4136         inner = i;
4137         break;
4138       }
4139     }
4140 
4141     if (inner == 0) {
4142       return false;
4143     }
4144 
4145     exit_test = back_control->in(0)->as_If();
4146   }
4147 
4148   if (idom(region)->is_Catch()) {
4149     return false;
4150   }
4151 
4152   // Collect all control nodes that need to be cloned (shared_stmt in the diagram)
4153   Unique_Node_List wq;
4154   wq.push(head->in(LoopNode::LoopBackControl));
4155   for (uint i = 0; i < wq.size(); i++) {
4156     Node* c = wq.at(i);
4157     assert(get_loop(c) == loop, "not in the right loop?");
4158     if (c->is_Region()) {
4159       if (c != region) {
4160         for (uint j = 1; j < c->req(); ++j) {
4161           wq.push(c->in(j));
4162         }
4163       }
4164     } else {
4165       wq.push(c->in(0));
4166     }
4167     assert(!is_dominator(c, region) || c == region, "shouldn't go above region");
4168   }
4169 
4170   Node* region_dom = idom(region);
4171 
4172   // Can't do the transformation if this would cause a membar pair to
4173   // be split
4174   for (uint i = 0; i < wq.size(); i++) {
4175     Node* c = wq.at(i);
4176     if (c->is_MemBar() && (c->as_MemBar()->trailing_store() || c->as_MemBar()->trailing_load_store())) {
4177       assert(c->as_MemBar()->leading_membar()->trailing_membar() == c, "bad membar pair");
4178       if (!wq.member(c->as_MemBar()->leading_membar())) {
4179         return false;
4180       }
4181     }
4182   }
4183 
4184   // Collect data nodes that need to be clones as well
4185   int dd = dom_depth(head);
4186 
4187   for (uint i = 0; i < loop->_body.size(); ++i) {
4188     Node* n = loop->_body.at(i);
4189     if (has_ctrl(n)) {
4190       Node* c = get_ctrl(n);
4191       if (wq.member(c)) {
4192         wq.push(n);
4193       }
4194     } else {
4195       set_idom(n, idom(n), dd);
4196     }
4197   }
4198 
4199   // clone shared_stmt
4200   clone_loop_body(wq, old_new, NULL);
4201 
4202   Node* region_clone = old_new[region->_idx];
4203   region_clone->set_req(inner, C->top());
4204   set_idom(region, region->in(inner), dd);
4205 
4206   // Prepare the outer loop
4207   Node* outer_head = new LoopNode(head->in(LoopNode::EntryControl), old_new[head->in(LoopNode::LoopBackControl)->_idx]);
4208   register_control(outer_head, loop->_parent, outer_head->in(LoopNode::EntryControl));
4209   _igvn.replace_input_of(head, LoopNode::EntryControl, outer_head);
4210   set_idom(head, outer_head, dd);
4211 
4212   fix_body_edges(wq, loop, old_new, dd, loop->_parent, true);
4213 
4214   // Make one of the shared_stmt copies only reachable from stmt1, the
4215   // other only from stmt2..stmtn.
4216   Node* dom = NULL;
4217   for (uint i = 1; i < region->req(); ++i) {
4218     if (i != inner) {
4219       _igvn.replace_input_of(region, i, C->top());
4220     }
4221     Node* in = region_clone->in(i);
4222     if (in->is_top()) {
4223       continue;
4224     }
4225     if (dom == NULL) {
4226       dom = in;
4227     } else {
4228       dom = dom_lca(dom, in);
4229     }
4230   }
4231 
4232   set_idom(region_clone, dom, dd);
4233 
4234   // Set up the outer loop
4235   for (uint i = 0; i < head->outcnt(); i++) {
4236     Node* u = head->raw_out(i);
4237     if (u->is_Phi()) {
4238       Node* outer_phi = u->clone();
4239       outer_phi->set_req(0, outer_head);
4240       Node* backedge = old_new[u->in(LoopNode::LoopBackControl)->_idx];
4241       if (backedge == NULL) {
4242         backedge = u->in(LoopNode::LoopBackControl);
4243       }
4244       outer_phi->set_req(LoopNode::LoopBackControl, backedge);
4245       register_new_node(outer_phi, outer_head);
4246       _igvn.replace_input_of(u, LoopNode::EntryControl, outer_phi);
4247     }
4248   }
4249 
4250   // create control and data nodes for out of loop uses (including region2)
4251   Node_List worklist;
4252   uint new_counter = C->unique();
4253   fix_ctrl_uses(wq, loop, old_new, ControlAroundStripMined, outer_head, NULL, worklist);
4254 
4255   Node_List *split_if_set = NULL;
4256   Node_List *split_bool_set = NULL;
4257   Node_List *split_cex_set = NULL;
4258   fix_data_uses(wq, loop, ControlAroundStripMined, head->is_strip_mined() ? loop->_parent : loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set);
4259 
4260   finish_clone_loop(split_if_set, split_bool_set, split_cex_set);
4261 
4262   if (exit_test != NULL) {
4263     float cnt = exit_test->_fcnt;
4264     if (cnt != COUNT_UNKNOWN) {
4265       exit_test->_fcnt = cnt * f;
4266       old_new[exit_test->_idx]->as_If()->_fcnt = cnt * (1 - f);
4267     }
4268   }
4269 
4270   C->set_major_progress();
4271 
4272   return true;
4273 }
4274 
4275 //------------------------------reorg_offsets----------------------------------
4276 // Reorganize offset computations to lower register pressure.  Mostly
4277 // prevent loop-fallout uses of the pre-incremented trip counter (which are
4278 // then alive with the post-incremented trip counter forcing an extra
4279 // register move):
4280 //
4281 //     iv Phi            iv Phi
4282 //       |                 |
4283 //       |                AddI (+stride)
4284 //       |                 |
4285 //       |              Opaque2  # Blocks IGVN from folding these nodes until loop opts are over.
4286 //       |     ====>       |
4287 //       |                AddI (-stride)
4288 //       |                 |
4289 //       |               CastII  # Preserve type of iv Phi
4290 //       |                 |
4291 //   Outside Use       Outside Use
4292 //
4293 void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
4294   // Perform it only for canonical counted loops.
4295   // Loop's shape could be messed up by iteration_split_impl.
4296   if (!loop->_head->is_CountedLoop())
4297     return;
4298   if (!loop->_head->as_Loop()->is_valid_counted_loop(T_INT))
4299     return;
4300 
4301   CountedLoopNode *cl = loop->_head->as_CountedLoop();
4302   CountedLoopEndNode *cle = cl->loopexit();
4303   Node *exit = cle->proj_out(false);
4304   Node *phi = cl->phi();
4305 
4306   // Check for the special case when using the pre-incremented trip-counter on
4307   // the fall-out  path (forces the pre-incremented  and post-incremented trip
4308   // counter to be live  at the same time).  Fix this by  adjusting to use the
4309   // post-increment trip counter.
4310 
4311   bool progress = true;
4312   while (progress) {
4313     progress = false;
4314     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
4315       Node* use = phi->fast_out(i);   // User of trip-counter
4316       if (!has_ctrl(use))  continue;
4317       Node *u_ctrl = get_ctrl(use);
4318       if (use->is_Phi()) {
4319         u_ctrl = NULL;
4320         for (uint j = 1; j < use->req(); j++)
4321           if (use->in(j) == phi)
4322             u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
4323       }
4324       IdealLoopTree *u_loop = get_loop(u_ctrl);
4325       // Look for loop-invariant use
4326       if (u_loop == loop) continue;
4327       if (loop->is_member(u_loop)) continue;
4328       // Check that use is live out the bottom.  Assuming the trip-counter
4329       // update is right at the bottom, uses of of the loop middle are ok.
4330       if (dom_lca(exit, u_ctrl) != exit) continue;
4331       // Hit!  Refactor use to use the post-incremented tripcounter.
4332       // Compute a post-increment tripcounter.
4333       Node* c = exit;
4334       if (cl->is_strip_mined()) {
4335         IdealLoopTree* outer_loop = get_loop(cl->outer_loop());
4336         if (!outer_loop->is_member(u_loop)) {
4337           c = cl->outer_loop_exit();
4338         }
4339       }
4340       Node *opaq = new Opaque2Node(C, cle->incr());
4341       register_new_node(opaq, c);
4342       Node *neg_stride = _igvn.intcon(-cle->stride_con());
4343       set_ctrl(neg_stride, C->root());
4344       Node *post = new AddINode(opaq, neg_stride);
4345       register_new_node(post, c);
4346       post = new CastIINode(post, phi->bottom_type()); // preserve the iv phi's type
4347       register_new_node(post, c);
4348       _igvn.rehash_node_delayed(use);
4349       for (uint j = 1; j < use->req(); j++) {
4350         if (use->in(j) == phi)
4351           use->set_req(j, post);
4352       }
4353       // Since DU info changed, rerun loop
4354       progress = true;
4355       break;
4356     }
4357   }
4358 
4359 }